Merge branch 'feature/Lanczos' into ckelly_develop4

2026-02-28 09:36:13 +00:00 · 2017-10-10 13:41:43 -04:00
parent bf58557fb1 3006663b9c
commit ef61b549e6
14 changed files with 3374 additions and 2 deletions
--- a/bootstrap.sh
+++ b/bootstrap.sh
@@ -5,7 +5,7 @@ EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2'
 echo "-- deploying Eigen source..."
 wget ${EIGEN_URL} --no-check-certificate
 ./scripts/update_eigen.sh `basename ${EIGEN_URL}`
-rm `basename ${EIGEN_URL}`
+#rm `basename ${EIGEN_URL}`

 echo '-- generating Make.inc files...'
 ./scripts/filelist
--- a/lib/algorithms/Algorithms.h
+++ b/lib/algorithms/Algorithms.h
@@ -39,6 +39,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/approx/MultiShiftFunction.h>
 #include <Grid/algorithms/approx/Forecast.h>

+#include <Grid/algorithms/densematrix/DenseMatrix.h>
+#include <Grid/algorithms/densematrix/Francis.h>
+#include <Grid/algorithms/densematrix/Householder.h>
+
 #include <Grid/algorithms/iterative/ConjugateGradient.h>
 #include <Grid/algorithms/iterative/ConjugateResidual.h>
 #include <Grid/algorithms/iterative/NormalEquations.h>
@@ -48,6 +52,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/BlockConjugateGradient.h>
 #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
+#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h>
+#include <Grid/algorithms/iterative/SimpleLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>

--- a/lib/algorithms/LinearOperator.h
+++ b/lib/algorithms/LinearOperator.h
@@ -332,6 +332,75 @@ namespace Grid {
    };
    template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;

+#if 0
+  // This is specific to (Z)mobius fermions
+  template<class Matrix, class Field>
+    class KappaSimilarityTransform {
+  public:
+//    INHERIT_IMPL_TYPES(Matrix);
+    typedef typename Matrix::Coeff_t                     Coeff_t;
+    std::vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag;
+
+    KappaSimilarityTransform (Matrix &zmob) {
+      for (int i=0;i<(int)zmob.bs.size();i++) {
+	Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) );
+	kappa.push_back( k );
+	kappaDag.push_back( conj(k) );
+	kappaInv.push_back( 1.0 / k );
+	kappaInvDag.push_back( 1.0 / conj(k) );
+      }
+    }
+
+  template<typename vobj>
+    void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
+    GridBase *grid=out._grid;
+    out.checkerboard = in.checkerboard;
+    assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now
+    int Ls = grid->_rdimensions[0];
+    parallel_for(int ss=0;ss<grid->oSites();ss++){
+      vobj tmp = s[ss % Ls]*in._odata[ss];
+      vstream(out._odata[ss],tmp);
+    }
+  }
+
+  RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) {
+    sscale(in,out,s);
+    return norm2(out);
+  }
+
+  virtual RealD M       (const Field& in, Field& out) { return sscale_norm(in,out,&kappa[0]);   }
+  virtual RealD MDag    (const Field& in, Field& out) { return sscale_norm(in,out,&kappaDag[0]);}
+  virtual RealD MInv    (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);}
+  virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);}
+
+  };
+
+  template<class Matrix,class Field>
+    class SchurDiagTwoKappaOperator :  public SchurOperatorBase<Field> {
+  public:
+    KappaSimilarityTransform<Matrix, Field> _S;
+    SchurDiagTwoOperator<Matrix, Field> _Mat;
+
+    SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {};
+
+    virtual  RealD Mpc      (const Field &in, Field &out) {
+      Field tmp(in._grid);
+
+      _S.MInv(in,out);
+      _Mat.Mpc(out,tmp);
+      return _S.M(tmp,out);
+
+    }
+    virtual  RealD MpcDag   (const Field &in, Field &out){
+      Field tmp(in._grid);
+
+      _S.MDag(in,out);
+      _Mat.MpcDag(out,tmp);
+      return _S.MInvDag(tmp,out);
+    }
+  };
+#endif
+

    /////////////////////////////////////////////////////////////
    // Base classes for functions of operators
--- a/lib/algorithms/densematrix/DenseMatrix.h
+++ b/lib/algorithms/densematrix/DenseMatrix.h
@@ -0,0 +1,137 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/algorithms/iterative/DenseMatrix.h
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#ifndef GRID_DENSE_MATRIX_H
+#define GRID_DENSE_MATRIX_H
+
+namespace Grid {
+    /////////////////////////////////////////////////////////////
+    // Matrix untils
+    /////////////////////////////////////////////////////////////
+
+template<class T> using DenseVector = std::vector<T>;
+template<class T> using DenseMatrix = DenseVector<DenseVector<T> >;
+
+template<class T> void Size(DenseVector<T> & vec, int &N) 
+{ 
+  N= vec.size();
+}
+template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M) 
+{ 
+  N= mat.size();
+  M= mat[0].size();
+}
+
+template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N) 
+{ 
+  int M; Size(mat,N,M);
+  assert(N==M);
+}
+
+template<class T> void Resize(DenseVector<T > & mat, int N) { 
+  mat.resize(N);
+}
+template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) { 
+  mat.resize(N);
+  for(int i=0;i<N;i++){
+    mat[i].resize(M);
+  }
+}
+template<class T> void Fill(DenseMatrix<T> & mat, T&val) { 
+  int N,M;
+  Size(mat,N,M);
+  for(int i=0;i<N;i++){
+  for(int j=0;j<M;j++){
+    mat[i][j] = val;
+  }}
+}
+
+/** Transpose of a matrix **/
+template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){
+  int N,M;
+  Size(mat,N,M);
+  DenseMatrix<T> C; Resize(C,M,N);
+  for(int i=0;i<M;i++){
+  for(int j=0;j<N;j++){
+    C[i][j] = mat[j][i];
+  }} 
+  return C;
+}
+/** Set DenseMatrix to unit matrix **/
+template<class T> void Unity(DenseMatrix<T> &A){
+  int N;  SizeSquare(A,N);
+  for(int i=0;i<N;i++){
+    for(int j=0;j<N;j++){
+      if ( i==j ) A[i][j] = 1;
+      else        A[i][j] = 0;
+    } 
+  } 
+}
+
+/** Add C * I to matrix **/
+template<class T>
+void PlusUnit(DenseMatrix<T> & A,T c){
+  int dim;  SizeSquare(A,dim);
+  for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;} 
+}
+
+/** return the Hermitian conjugate of matrix **/
+template<class T>
+DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){
+
+  int dim; SizeSquare(mat,dim);
+
+  DenseMatrix<T> C; Resize(C,dim,dim);
+
+  for(int i=0;i<dim;i++){
+    for(int j=0;j<dim;j++){
+      C[i][j] = conj(mat[j][i]);
+    } 
+  } 
+  return C;
+}
+/**Get a square submatrix**/
+template <class T>
+DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end)
+{
+  DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st);
+
+  for(int i = row_st; i<row_end; i++){
+  for(int j = col_st; j<col_end; j++){
+    H[i-row_st][j-col_st]=A[i][j];
+  }}
+  return H;
+}
+
+}
+
+#include "Householder.h"
+#include "Francis.h"
+
+#endif
+
--- a/lib/algorithms/densematrix/Francis.h
+++ b/lib/algorithms/densematrix/Francis.h
@@ -0,0 +1,525 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/algorithms/iterative/Francis.h
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#ifndef FRANCIS_H
+#define FRANCIS_H
+
+#include <cstdlib>
+#include <string>
+#include <cmath>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <fstream>
+#include <complex>
+#include <algorithm>
+
+//#include <timer.h>
+//#include <lapacke.h>
+//#include <Eigen/Dense>
+
+namespace Grid {
+
+template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
+template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
+
+/**
+  Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm.
+H =
+      x  x  x  x  x  x  x  x  x
+      x  x  x  x  x  x  x  x  x
+      0  x  x  x  x  x  x  x  x
+      0  0  x  x  x  x  x  x  x
+      0  0  0  x  x  x  x  x  x
+      0  0  0  0  x  x  x  x  x
+      0  0  0  0  0  x  x  x  x
+      0  0  0  0  0  0  x  x  x
+      0  0  0  0  0  0  0  x  x
+Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.
+**/
+template <class T>
+int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
+{
+  DenseMatrix<T> H = Hin; 
+
+  int N ; SizeSquare(H,N);
+  int M = N;
+
+  Fill(evals,0);
+  Fill(evecs,0);
+
+  T s,t,x=0,y=0,z=0;
+  T u,d;
+  T apd,amd,bc;
+  DenseVector<T> p(N,0);
+  T nrm = Norm(H);    ///DenseMatrix Norm
+  int n, m;
+  int e = 0;
+  int it = 0;
+  int tot_it = 0;
+  int l = 0;
+  int r = 0;
+  DenseMatrix<T> P; Resize(P,N,N); Unity(P);
+  DenseVector<int> trows(N,0);
+
+  /// Check if the matrix is really hessenberg, if not abort
+  RealD sth = 0;
+  for(int j=0;j<N;j++){
+    for(int i=j+2;i<N;i++){
+      sth = abs(H[i][j]);
+      if(sth > small){
+	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl;
+	exit(1);
+      }
+    }
+  }
+
+  do{
+    std::cout << "Francis QR Step N = " << N << std::endl;
+    /** Check for convergence
+      x  x  x  x  x
+      0  x  x  x  x
+      0  0  x  x  x
+      0  0  x  x  x
+      0  0  0  0  x
+      for this matrix l = 4
+     **/
+    do{
+      l = Chop_subdiag(H,nrm,e,small);
+      r = 0;    ///May have converged on more than one eval
+      ///Single eval
+      if(l == N-1){
+        evals[e] = H[l][l];
+        N--; e++; r++; it = 0;
+      }
+      ///RealD eval
+      if(l == N-2){
+        trows[l+1] = 1;    ///Needed for UTSolve
+        apd = H[l][l] + H[l+1][l+1];
+        amd = H[l][l] - H[l+1][l+1];
+        bc =  (T)4.0*H[l+1][l]*H[l][l+1];
+        evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) );
+        evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) );
+        N-=2; e+=2; r++; it = 0;
+      }
+    } while(r>0);
+
+    if(N ==0) break;
+
+    DenseVector<T > ck; Resize(ck,3);
+    DenseVector<T> v;   Resize(v,3);
+
+    for(int m = N-3; m >= l; m--){
+      ///Starting vector essentially random shift.
+      if(it%10 == 0 && N >= 3 && it > 0){
+        s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
+        t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
+        x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
+        y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
+        z = H[m+1][m]*H[m+2][m+1];
+      }
+      ///Starting vector implicit Q theorem
+      else{
+        s = (H[N-2][N-2] + H[N-1][N-1]);
+        t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]);
+        x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
+        y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
+        z = H[m+1][m]*H[m+2][m+1];
+      }
+      ck[0] = x; ck[1] = y; ck[2] = z;
+
+      if(m == l) break;
+
+      /** Some stupid thing from numerical recipies, seems to work**/
+      // PAB.. for heaven's sake quote page, purpose, evidence it works.
+      //       what sort of comment is that!?!?!?
+      u=abs(H[m][m-1])*(abs(y)+abs(z));
+      d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1]));
+      if ((T)abs(u+d) == (T)abs(d) ){
+	l = m; break;
+      }
+
+      //if (u < small){l = m; break;}
+    }
+    if(it > 100000){
+     std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl;
+     std::cout << "got " << e << " evals " << l << " " << N << std::endl;
+      exit(1);
+    }
+    normalize(ck);    ///Normalization cancels in PHP anyway
+    T beta;
+    Householder_vector<T >(ck, 0, 2, v, beta);
+    Householder_mult<T >(H,v,beta,0,l,l+2,0);
+    Householder_mult<T >(H,v,beta,0,l,l+2,1);
+    ///Accumulate eigenvector
+    Householder_mult<T >(P,v,beta,0,l,l+2,1);
+    int sw = 0;      ///Are we on the last row?
+    for(int k=l;k<N-2;k++){
+      x = H[k+1][k];
+      y = H[k+2][k];
+      z = (T)0.0;
+      if(k+3 <= N-1){
+	z = H[k+3][k];
+      } else{
+	sw = 1; 
+	v[2] = (T)0.0;
+      }
+      ck[0] = x; ck[1] = y; ck[2] = z;
+      normalize(ck);
+      Householder_vector<T >(ck, 0, 2-sw, v, beta);
+      Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0);
+      Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1);
+      ///Accumulate eigenvector
+      Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1);
+    }
+    it++;
+    tot_it++;
+  }while(N > 1);
+  N = evals.size();
+  ///Annoying - UT solves in reverse order;
+  DenseVector<T> tmp; Resize(tmp,N);
+  for(int i=0;i<N;i++){
+    tmp[i] = evals[N-i-1];
+  } 
+  evals = tmp;
+  UTeigenvectors(H, trows, evals, evecs);
+  for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);}
+  return tot_it;
+}
+
+template <class T>
+int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
+{
+  /**
+  Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm.
+  H =
+  x  x  0  0  0  0
+  x  x  x  0  0  0
+  0  x  x  x  0  0
+  0  0  x  x  x  0
+  0  0  0  x  x  x
+  0  0  0  0  x  x
+  Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/
+  return my_Wilkinson(Hin, evals, evecs, small, small);
+}
+
+template <class T>
+int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol)
+{
+  int N; SizeSquare(Hin,N);
+  int M = N;
+
+  ///I don't want to modify the input but matricies must be passed by reference
+  //Scale a matrix by its "norm"
+  //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm);
+  DenseMatrix<T> H;  H = Hin;
+  
+  RealD Hnorm = abs(Norm(Hin));
+  H = H * (1.0 / Hnorm);
+
+  // TODO use openmp and memset
+  Fill(evals,0);
+  Fill(evecs,0);
+
+  T s, t, x = 0, y = 0, z = 0;
+  T u, d;
+  T apd, amd, bc;
+  DenseVector<T> p; Resize(p,N); Fill(p,0);
+
+  T nrm = Norm(H);    ///DenseMatrix Norm
+  int n, m;
+  int e = 0;
+  int it = 0;
+  int tot_it = 0;
+  int l = 0;
+  int r = 0;
+  DenseMatrix<T> P; Resize(P,N,N);
+  Unity(P);
+  DenseVector<int> trows(N, 0);
+  /// Check if the matrix is really symm tridiag
+  RealD sth = 0;
+  for(int j = 0; j < N; ++j)
+  {
+    for(int i = j + 2; i < N; ++i)
+    {
+      if(abs(H[i][j]) > tol || abs(H[j][i]) > tol)
+      {
+	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl;
+	std::cout << "Warning tridiagonalize and call again" << std::endl;
+        // exit(1); // see what is going on
+        //return;
+      }
+    }
+  }
+
+  do{
+    do{
+      //Jasper
+      //Check if the subdiagonal term is small enough (<small)
+      //if true then it is converged.
+      //check start from H.dim - e - 1
+      //How to deal with more than 2 are converged?
+      //What if Chop_symm_subdiag return something int the middle?
+      //--------------
+      l = Chop_symm_subdiag(H,nrm, e, small);
+      r = 0;    ///May have converged on more than one eval
+      //Jasper
+      //In this case
+      // x  x  0  0  0  0
+      // x  x  x  0  0  0
+      // 0  x  x  x  0  0
+      // 0  0  x  x  x  0
+      // 0  0  0  x  x  0
+      // 0  0  0  0  0  x  <- l
+      //--------------
+      ///Single eval
+      if(l == N - 1)
+      {
+        evals[e] = H[l][l];
+        N--;
+        e++;
+        r++;
+        it = 0;
+      }
+      //Jasper
+      // x  x  0  0  0  0
+      // x  x  x  0  0  0
+      // 0  x  x  x  0  0
+      // 0  0  x  x  0  0
+      // 0  0  0  0  x  x  <- l
+      // 0  0  0  0  x  x
+      //--------------
+      ///RealD eval
+      if(l == N - 2)
+      {
+        trows[l + 1] = 1;    ///Needed for UTSolve
+        apd = H[l][l] + H[l + 1][ l + 1];
+        amd = H[l][l] - H[l + 1][l + 1];
+        bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1];
+        evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc));
+        evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc));
+        N -= 2;
+        e += 2;
+        r++;
+        it = 0;
+      }
+    }while(r > 0);
+    //Jasper
+    //Already converged
+    //--------------
+    if(N == 0) break;
+
+    DenseVector<T> ck,v; Resize(ck,2); Resize(v,2);
+
+    for(int m = N - 3; m >= l; m--)
+    {
+      ///Starting vector essentially random shift.
+      if(it%10 == 0 && N >= 3 && it > 0)
+      {
+        t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]);
+        x = H[m][m] - t;
+        z = H[m + 1][m];
+      } else {
+      ///Starting vector implicit Q theorem
+        d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5;
+        t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2] 
+	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2]));
+        x = H[m][m] - t;
+        z = H[m + 1][m];
+      }
+      //Jasper
+      //why it is here????
+      //-----------------------
+      if(m == l)
+        break;
+
+      u = abs(H[m][m - 1]) * (abs(y) + abs(z));
+      d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1]));
+      if ((T)abs(u + d) == (T)abs(d))
+      {
+        l = m;
+        break;
+      }
+    }
+    //Jasper
+    if(it > 1000000)
+    {
+      std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl;
+      std::cout << "got " << e << " evals " << l << " " << N << std::endl;
+      exit(1);
+    }
+    //
+    T s, c;
+    Givens_calc<T>(x, z, c, s);
+    Givens_mult<T>(H, l, l + 1, c, -s, 0);
+    Givens_mult<T>(H, l, l + 1, c,  s, 1);
+    Givens_mult<T>(P, l, l + 1, c,  s, 1);
+    //
+    for(int k = l; k < N - 2; ++k)
+    {
+      x = H.A[k + 1][k];
+      z = H.A[k + 2][k];
+      Givens_calc<T>(x, z, c, s);
+      Givens_mult<T>(H, k + 1, k + 2, c, -s, 0);
+      Givens_mult<T>(H, k + 1, k + 2, c,  s, 1);
+      Givens_mult<T>(P, k + 1, k + 2, c,  s, 1);
+    }
+    it++;
+    tot_it++;
+  }while(N > 1);
+
+  N = evals.size();
+  ///Annoying - UT solves in reverse order;
+  DenseVector<T> tmp(N);
+  for(int i = 0; i < N; ++i)
+    tmp[i] = evals[N-i-1];
+  evals = tmp;
+  //
+  UTeigenvectors(H, trows, evals, evecs);
+  //UTSymmEigenvectors(H, trows, evals, evecs);
+  for(int i = 0; i < evals.size(); ++i)
+  {
+    evecs[i] = P * evecs[i];
+    normalize(evecs[i]);
+    evals[i] = evals[i] * Hnorm;
+  }
+  // // FIXME this is to test
+  // Hin.write("evecs3", evecs);
+  // Hin.write("evals3", evals);
+  // // check rsd
+  // for(int i = 0; i < M; i++) {
+  //   vector<T> Aevec = Hin * evecs[i];
+  //   RealD norm2(0.);
+  //   for(int j = 0; j < M; j++) {
+  //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]);
+  //   }
+  // }
+  return tot_it;
+}
+
+template <class T>
+void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
+
+  /**
+  turn a matrix A =
+  x  x  x  x  x
+  x  x  x  x  x
+  x  x  x  x  x
+  x  x  x  x  x
+  x  x  x  x  x
+  into
+  x  x  x  x  x
+  x  x  x  x  x
+  0  x  x  x  x
+  0  0  x  x  x
+  0  0  0  x  x
+  with householder rotations
+  Slow.
+  */
+  int N ; SizeSquare(A,N);
+  DenseVector<T > p; Resize(p,N); Fill(p,0);
+
+  for(int k=start;k<N-2;k++){
+    //cerr << "hess" << k << std::endl;
+    DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1);
+    for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column
+    normalize(ck);    ///Normalization cancels in PHP anyway
+    T beta;
+    Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector
+    Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA
+    Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H
+    ///Accumulate eigenvector
+    Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H
+  }
+  /*for(int l=0;l<N-2;l++){
+    for(int k=l+2;k<N;k++){
+    A(0,k,l);
+    }
+    }*/
+}
+
+template <class T>
+void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
+///Tridiagonalize a matrix
+  int N; SizeSquare(A,N);
+  Hess(A,Q,start);
+  /*for(int l=0;l<N-2;l++){
+    for(int k=l+2;k<N;k++){
+    A(0,l,k);
+    }
+    }*/
+}
+
+template <class T>
+void ForceTridiagonal(DenseMatrix<T> &A){
+///Tridiagonalize a matrix
+  int N ; SizeSquare(A,N);
+  for(int l=0;l<N-2;l++){
+    for(int k=l+2;k<N;k++){
+      A[l][k]=0;
+      A[k][l]=0;
+    }
+  }
+}
+
+template <class T>
+int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
+  ///Solve a symmetric eigensystem, not necessarily in tridiagonal form
+  int N; SizeSquare(Ain,N);
+  DenseMatrix<T > A; A = Ain;
+  DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q);
+  Tri(A,Q,0);
+  int it = my_Wilkinson<T>(A, evals, evecs, small);
+  for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
+  return it;
+}
+
+
+template <class T>
+int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
+  return my_Wilkinson(Ain, evals, evecs, small);
+}
+
+template <class T>
+int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
+  return my_SymmEigensystem(Ain, evals, evecs, small);
+}
+
+template <class T>
+int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
+///Solve a general eigensystem, not necessarily in tridiagonal form
+  int N = Ain.dim;
+  DenseMatrix<T > A(N); A = Ain;
+  DenseMatrix<T > Q(N);Q.Unity();
+  Hess(A,Q,0);
+  int it = QReigensystem<T>(A, evals, evecs, small);
+  for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
+  return it;
+}
+
+}
+#endif
--- a/lib/algorithms/densematrix/Householder.h
+++ b/lib/algorithms/densematrix/Householder.h
@@ -0,0 +1,242 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/algorithms/iterative/Householder.h
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#ifndef HOUSEHOLDER_H
+#define HOUSEHOLDER_H
+
+#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
+#define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
+#define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
+
+#include <cstdlib>
+#include <string>
+#include <cmath>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <fstream>
+#include <complex>
+#include <algorithm>
+
+namespace Grid {
+/** Comparison function for finding the max element in a vector **/
+template <class T> bool cf(T i, T j) { 
+  return abs(i) < abs(j); 
+}
+
+/** 
+	Calculate a real Givens angle 
+ **/
+template <class T> inline void Givens_calc(T y, T z, T &c, T &s){
+
+  RealD mz = (RealD)abs(z);
+  
+  if(mz==0.0){
+    c = 1; s = 0;
+  }
+  if(mz >= (RealD)abs(y)){
+    T t = -y/z;
+    s = (T)1.0 / sqrt ((T)1.0 + t * t);
+    c = s * t;
+  } else {
+    T t = -z/y;
+    c = (T)1.0 / sqrt ((T)1.0 + t * t);
+    s = c * t;
+  }
+}
+
+template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir)
+{
+  int q ; SizeSquare(A,q);
+
+  if(dir == 0){
+    for(int j=0;j<q;j++){
+      T nu = A[i][j];
+      T w  = A[k][j];
+      A[i][j] = (c*nu + s*w);
+      A[k][j] = (-s*nu + c*w);
+    }
+  }
+
+  if(dir == 1){
+    for(int j=0;j<q;j++){
+      T nu = A[j][i];
+      T w  = A[j][k];
+      A[j][i] = (c*nu - s*w);
+      A[j][k] = (s*nu + c*w);
+    }
+  }
+}
+
+/**
+	from input = x;
+	Compute the complex Householder vector, v, such that
+	P = (I - b v transpose(v) )
+	b = 2/v.v
+
+	P | x |    | x | k = 0
+	| x |    | 0 | 
+	| x | =  | 0 |
+	| x |    | 0 | j = 3
+	| x |	   | x |
+
+	These are the "Unreduced" Householder vectors.
+
+ **/
+template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta)
+{
+  int N ; Size(input,N);
+  T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> );
+
+  if(abs(m) > 0.0){
+    T alpha = 0;
+
+    for(int i=k; i<j+1; i++){
+      v[i] = input[i]/m;
+      alpha = alpha + v[i]*conj(v[i]);
+    }
+    alpha = sqrt(alpha);
+    beta = (T)1.0/(alpha*(alpha + abs(v[k]) ));
+
+    if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha;
+    else                 v[k] = -alpha;
+  } else{
+    for(int i=k; i<j+1; i++){
+      v[i] = 0.0;
+    } 
+  }
+}
+
+/**
+	from input = x;
+	Compute the complex Householder vector, v, such that
+	P = (I - b v transpose(v) )
+	b = 2/v.v
+
+	Px = alpha*e_dir
+
+	These are the "Unreduced" Householder vectors.
+
+ **/
+
+template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta)
+{
+  int N = input.size();
+  T m = *max_element(input.begin() + k, input.begin() + j + 1, cf);
+  
+  if(abs(m) > 0.0){
+    T alpha = 0;
+
+    for(int i=k; i<j+1; i++){
+      v[i] = input[i]/m;
+      alpha = alpha + v[i]*conj(v[i]);
+    }
+    
+    alpha = sqrt(alpha);
+    beta = 1.0/(alpha*(alpha + abs(v[dir]) ));
+	
+    if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha;
+    else                  v[dir] = -alpha;
+  }else{
+    for(int i=k; i<j+1; i++){
+      v[i] = 0.0;
+    } 
+  }
+}
+
+/**
+	Compute the product PA if trans = 0
+	AP if trans = 1
+	P = (I - b v transpose(v) )
+	b = 2/v.v
+	start at element l of matrix A
+	v is of length j - k + 1 of v are nonzero
+ **/
+
+template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans)
+{
+  int N ; SizeSquare(A,N);
+
+  if(abs(beta) > 0.0){
+    for(int p=l; p<N; p++){
+      T s = 0;
+      if(trans==0){
+	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p];
+	s *= beta;
+	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);}
+      } else {
+	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];}
+	s *= beta;
+	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);}
+      }
+    }
+  }
+}
+
+/**
+	Compute the product PA if trans = 0
+	AP if trans = 1
+	P = (I - b v transpose(v) )
+	b = 2/v.v
+	start at element l of matrix A
+	v is of length j - k + 1 of v are nonzero
+	A is tridiagonal
+ **/
+template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans)
+{
+  if(abs(beta) > 0.0){
+
+    int N ; SizeSquare(A,N);
+
+    DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0); 
+
+    T s;
+    for(int p=l; p<M; p++){
+      s = 0;
+      if(trans==0){
+	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p];
+      }else{
+	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i];
+      }
+      s = beta*s;
+      if(trans==0){
+	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k];
+      }else{
+	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]);
+      }
+    }
+    for(int p=l; p<M; p++){
+      if(trans==0){
+	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p];
+      }else{
+	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i];
+      }
+    }
+  }
+}
+}
+#endif
--- a/lib/algorithms/iterative/ConjugateGradientMixedPrec.h
+++ b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h
@@ -60,6 +60,7 @@ namespace Grid {
    }
  
    void operator() (const FieldD &src_d_in, FieldD &sol_d){
+
      TotalInnerIterations = 0;
 	
      GridStopWatch TotalTimer;
--- a/lib/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h
+++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h
--- a/lib/algorithms/iterative/SimpleLanczos.h
+++ b/lib/algorithms/iterative/SimpleLanczos.h
@@ -0,0 +1,933 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Chulwoo Jung <chulwoo@bnl.gov>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#ifndef GRID_LANC_H
+#define GRID_LANC_H
+
+#include <string.h>		//memset
+
+#ifdef USE_LAPACK
+#ifdef USE_MKL
+#include<mkl_lapack.h>
+#else
+void LAPACK_dstegr (char *jobz, char *range, int *n, double *d, double *e,
+		    double *vl, double *vu, int *il, int *iu, double *abstol,
+		    int *m, double *w, double *z, int *ldz, int *isuppz,
+		    double *work, int *lwork, int *iwork, int *liwork,
+		    int *info);
+//#include <lapacke/lapacke.h>
+#endif
+#endif
+
+#include <Grid/algorithms/densematrix/DenseMatrix.h>
+//#include <Grid/algorithms/iterative/EigenSort.h>
+
+// eliminate temorary vector in calc()
+#define MEM_SAVE
+
+namespace Grid
+{
+
+  struct Bisection
+  {
+
+#if 0
+    static void get_eig2 (int row_num, std::vector < RealD > &ALPHA,
+			  std::vector < RealD > &BETA,
+			  std::vector < RealD > &eig)
+    {
+      int i, j;
+        std::vector < RealD > evec1 (row_num + 3);
+        std::vector < RealD > evec2 (row_num + 3);
+      RealD eps2;
+        ALPHA[1] = 0.;
+        BETHA[1] = 0.;
+      for (i = 0; i < row_num - 1; i++)
+	{
+	  ALPHA[i + 1] = A[i * (row_num + 1)].real ();
+	  BETHA[i + 2] = A[i * (row_num + 1) + 1].real ();
+	}
+      ALPHA[row_num] = A[(row_num - 1) * (row_num + 1)].real ();
+        bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-10, 1e-10, evec1, eps2);
+        bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-16, 1e-16, evec2, eps2);
+
+      // Do we really need to sort here?
+      int begin = 1;
+      int end = row_num;
+      int swapped = 1;
+      while (swapped)
+	{
+	  swapped = 0;
+	  for (i = begin; i < end; i++)
+	    {
+	      if (mag (evec2[i]) > mag (evec2[i + 1]))
+		{
+		  swap (evec2 + i, evec2 + i + 1);
+		  swapped = 1;
+		}
+	    }
+	  end--;
+	  for (i = end - 1; i >= begin; i--)
+	    {
+	      if (mag (evec2[i]) > mag (evec2[i + 1]))
+		{
+		  swap (evec2 + i, evec2 + i + 1);
+		  swapped = 1;
+		}
+	    }
+	  begin++;
+	}
+
+      for (i = 0; i < row_num; i++)
+	{
+	  for (j = 0; j < row_num; j++)
+	    {
+	      if (i == j)
+		H[i * row_num + j] = evec2[i + 1];
+	      else
+		H[i * row_num + j] = 0.;
+	    }
+	}
+    }
+#endif
+
+    static void bisec (std::vector < RealD > &c,
+		       std::vector < RealD > &b,
+		       int n,
+		       int m1,
+		       int m2,
+		       RealD eps1,
+		       RealD relfeh, std::vector < RealD > &x, RealD & eps2)
+    {
+      std::vector < RealD > wu (n + 2);
+
+      RealD h, q, x1, xu, x0, xmin, xmax;
+      int i, a, k;
+
+      b[1] = 0.0;
+      xmin = c[n] - fabs (b[n]);
+      xmax = c[n] + fabs (b[n]);
+      for (i = 1; i < n; i++)
+	{
+	  h = fabs (b[i]) + fabs (b[i + 1]);
+	  if (c[i] + h > xmax)
+	    xmax = c[i] + h;
+	  if (c[i] - h < xmin)
+	    xmin = c[i] - h;
+	}
+      xmax *= 2.;
+
+      eps2 = relfeh * ((xmin + xmax) > 0.0 ? xmax : -xmin);
+      if (eps1 <= 0.0)
+	eps1 = eps2;
+      eps2 = 0.5 * eps1 + 7.0 * (eps2);
+      x0 = xmax;
+      for (i = m1; i <= m2; i++)
+	{
+	  x[i] = xmax;
+	  wu[i] = xmin;
+	}
+
+      for (k = m2; k >= m1; k--)
+	{
+	  xu = xmin;
+	  i = k;
+	  do
+	    {
+	      if (xu < wu[i])
+		{
+		  xu = wu[i];
+		  i = m1 - 1;
+		}
+	      i--;
+	    }
+	  while (i >= m1);
+	  if (x0 > x[k])
+	    x0 = x[k];
+	  while ((x0 - xu) > 2 * relfeh * (fabs (xu) + fabs (x0)) + eps1)
+	    {
+	      x1 = (xu + x0) / 2;
+
+	      a = 0;
+	      q = 1.0;
+	      for (i = 1; i <= n; i++)
+		{
+		  q =
+		    c[i] - x1 -
+		    ((q != 0.0) ? b[i] * b[i] / q : fabs (b[i]) / relfeh);
+		  if (q < 0)
+		    a++;
+		}
+//      printf("x1=%0.14e a=%d\n",x1,a);
+	      if (a < k)
+		{
+		  if (a < m1)
+		    {
+		      xu = x1;
+		      wu[m1] = x1;
+		    }
+		  else
+		    {
+		      xu = x1;
+		      wu[a + 1] = x1;
+		      if (x[a] > x1)
+			x[a] = x1;
+		    }
+		}
+	      else
+		x0 = x1;
+	    }
+	  printf ("x0=%0.14e xu=%0.14e k=%d\n", x0, xu, k);
+	  x[k] = (x0 + xu) / 2;
+	}
+    }
+  };
+
+/////////////////////////////////////////////////////////////
+// Implicitly restarted lanczos
+/////////////////////////////////////////////////////////////
+
+
+  template < class Field > class SimpleLanczos
+  {
+
+    const RealD small = 1.0e-16;
+  public:
+    int lock;
+    int get;
+    int Niter;
+    int converged;
+
+    int Nstop;			// Number of evecs checked for convergence
+    int Nk;			// Number of converged sought
+    int Np;			// Np -- Number of spare vecs in kryloc space
+    int Nm;			// Nm -- total number of vectors
+
+
+    RealD OrthoTime;
+
+    RealD eresid;
+
+    SortEigen < Field > _sort;
+
+    LinearOperatorBase < Field > &_Linop;
+
+    OperatorFunction < Field > &_poly;
+
+    /////////////////////////
+    // Constructor
+    /////////////////////////
+    void init (void)
+    {
+    };
+    void Abort (int ff, DenseVector < RealD > &evals,
+		DenseVector < DenseVector < RealD > >&evecs);
+
+    SimpleLanczos (LinearOperatorBase < Field > &Linop,	// op
+		   OperatorFunction < Field > &poly,	// polynmial
+		   int _Nstop,	// sought vecs
+		   int _Nk,	// sought vecs
+		   int _Nm,	// spare vecs
+		   RealD _eresid,	// resid in lmdue deficit 
+		   int _Niter):	// Max iterations
+     
+      _Linop (Linop),
+      _poly (poly),
+      Nstop (_Nstop), Nk (_Nk), Nm (_Nm), eresid (_eresid), Niter (_Niter)
+    {
+      Np = Nm - Nk;
+      assert (Np > 0);
+    };
+
+    /////////////////////////
+    // Sanity checked this routine (step) against Saad.
+    /////////////////////////
+    void RitzMatrix (DenseVector < Field > &evec, int k)
+    {
+
+      if (1)
+	return;
+
+      GridBase *grid = evec[0]._grid;
+      Field w (grid);
+      std::cout << GridLogMessage << "RitzMatrix " << std::endl;
+      for (int i = 0; i < k; i++)
+	{
+	  _Linop.HermOp (evec[i], w);
+//      _poly(_Linop,evec[i],w);
+	  std::cout << GridLogMessage << "[" << i << "] ";
+	  for (int j = 0; j < k; j++)
+	    {
+	      ComplexD in = innerProduct (evec[j], w);
+	      if (fabs ((double) i - j) > 1)
+		{
+		  if (abs (in) > 1.0e-9)
+		    {
+		      std::cout << GridLogMessage << "oops" << std::endl;
+		      abort ();
+		    }
+		  else
+		    std::cout << GridLogMessage << " 0 ";
+		}
+	      else
+		{
+		  std::cout << GridLogMessage << " " << in << " ";
+		}
+	    }
+	  std::cout << GridLogMessage << std::endl;
+	}
+    }
+
+    void step (DenseVector < RealD > &lmd,
+	       DenseVector < RealD > &lme,
+	       Field & last, Field & current, Field & next, uint64_t k)
+    {
+      if (lmd.size () <= k)
+	lmd.resize (k + Nm);
+      if (lme.size () <= k)
+	lme.resize (k + Nm);
+
+
+//      _poly(_Linop,current,next );   // 3. wk:=Avk−βkv_{k−1}
+      _Linop.HermOp (current, next);	// 3. wk:=Avk−βkv_{k−1}
+      if (k > 0)
+	{
+	  next -= lme[k - 1] * last;
+	}
+//      std::cout<<GridLogMessage << "<last|next>" << innerProduct(last,next) <<std::endl;
+
+      ComplexD zalph = innerProduct (current, next);	// 4. αk:=(wk,vk)
+      RealD alph = real (zalph);
+
+      next = next - alph * current;	// 5. wk:=wk−αkvk
+//      std::cout<<GridLogMessage << "<current|next>" << innerProduct(current,next) <<std::endl;
+
+      RealD beta = normalise (next);	// 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
+      // 7. vk+1 := wk/βk+1
+//       norm=beta;
+
+      int interval = Nm / 100 + 1;
+      if ((k % interval) == 0)
+	std::
+	  cout << GridLogMessage << k << " : alpha = " << zalph << " beta " <<
+	  beta << std::endl;
+      const RealD tiny = 1.0e-20;
+      if (beta < tiny)
+	{
+	  std::cout << GridLogMessage << " beta is tiny " << beta << std::
+	    endl;
+	}
+      lmd[k] = alph;
+      lme[k] = beta;
+
+    }
+
+    void qr_decomp (DenseVector < RealD > &lmd,
+		    DenseVector < RealD > &lme,
+		    int Nk,
+		    int Nm,
+		    DenseVector < RealD > &Qt, RealD Dsh, int kmin, int kmax)
+    {
+      int k = kmin - 1;
+      RealD x;
+
+      RealD Fden = 1.0 / hypot (lmd[k] - Dsh, lme[k]);
+      RealD c = (lmd[k] - Dsh) * Fden;
+      RealD s = -lme[k] * Fden;
+
+      RealD tmpa1 = lmd[k];
+      RealD tmpa2 = lmd[k + 1];
+      RealD tmpb = lme[k];
+
+      lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
+      lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
+      lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
+      x = -s * lme[k + 1];
+      lme[k + 1] = c * lme[k + 1];
+
+      for (int i = 0; i < Nk; ++i)
+	{
+	  RealD Qtmp1 = Qt[i + Nm * k];
+	  RealD Qtmp2 = Qt[i + Nm * (k + 1)];
+	  Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
+	  Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
+	}
+
+      // Givens transformations
+      for (int k = kmin; k < kmax - 1; ++k)
+	{
+
+	  RealD Fden = 1.0 / hypot (x, lme[k - 1]);
+	  RealD c = lme[k - 1] * Fden;
+	  RealD s = -x * Fden;
+
+	  RealD tmpa1 = lmd[k];
+	  RealD tmpa2 = lmd[k + 1];
+	  RealD tmpb = lme[k];
+
+	  lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
+	  lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
+	  lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
+	  lme[k - 1] = c * lme[k - 1] - s * x;
+
+	  if (k != kmax - 2)
+	    {
+	      x = -s * lme[k + 1];
+	      lme[k + 1] = c * lme[k + 1];
+	    }
+
+	  for (int i = 0; i < Nk; ++i)
+	    {
+	      RealD Qtmp1 = Qt[i + Nm * k];
+	      RealD Qtmp2 = Qt[i + Nm * (k + 1)];
+	      Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
+	      Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
+	    }
+	}
+    }
+
+#ifdef USE_LAPACK
+#ifdef USE_MKL
+#define LAPACK_INT MKL_INT
+#else
+#define LAPACK_INT long long
+#endif
+    void diagonalize_lapack (DenseVector < RealD > &lmd, DenseVector < RealD > &lme, int N1,	// all
+			     int N2,	// get
+			     GridBase * grid)
+    {
+      const int size = Nm;
+      LAPACK_INT NN = N1;
+      double evals_tmp[NN];
+      double DD[NN];
+      double EE[NN];
+      for (int i = 0; i < NN; i++)
+	for (int j = i - 1; j <= i + 1; j++)
+	  if (j < NN && j >= 0)
+	    {
+	      if (i == j)
+		DD[i] = lmd[i];
+	      if (i == j)
+		evals_tmp[i] = lmd[i];
+	      if (j == (i - 1))
+		EE[j] = lme[j];
+	    }
+      LAPACK_INT evals_found;
+      LAPACK_INT lwork =
+	((18 * NN) >
+	 (1 + 4 * NN + NN * NN) ? (18 * NN) : (1 + 4 * NN + NN * NN));
+      LAPACK_INT liwork = 3 + NN * 10;
+      LAPACK_INT iwork[liwork];
+      double work[lwork];
+      LAPACK_INT isuppz[2 * NN];
+      char jobz = 'N';		// calculate evals only
+      char range = 'I';		// calculate il-th to iu-th evals
+      //    char range = 'A'; // calculate all evals
+      char uplo = 'U';		// refer to upper half of original matrix
+      char compz = 'I';		// Compute eigenvectors of tridiagonal matrix
+      int ifail[NN];
+      LAPACK_INT info;
+//  int total = QMP_get_number_of_nodes();
+//  int node = QMP_get_node_number();
+//  GridBase *grid = evec[0]._grid;
+      int total = grid->_Nprocessors;
+      int node = grid->_processor;
+      int interval = (NN / total) + 1;
+      double vl = 0.0, vu = 0.0;
+      LAPACK_INT il = interval * node + 1, iu = interval * (node + 1);
+      if (iu > NN)
+	iu = NN;
+      double tol = 0.0;
+      if (1)
+	{
+	  memset (evals_tmp, 0, sizeof (double) * NN);
+	  if (il <= NN)
+	    {
+	      printf ("total=%d node=%d il=%d iu=%d\n", total, node, il, iu);
+#ifdef USE_MKL
+	      dstegr (&jobz, &range, &NN,
+#else
+	      LAPACK_dstegr (&jobz, &range, &NN,
+#endif
+			     (double *) DD, (double *) EE, &vl, &vu, &il, &iu,	// these four are ignored if second parameteris 'A'
+			     &tol,	// tolerance
+			     &evals_found, evals_tmp, (double *) NULL, &NN,
+			     isuppz, work, &lwork, iwork, &liwork, &info);
+	      for (int i = iu - 1; i >= il - 1; i--)
+		{
+		  printf ("node=%d evals_found=%d evals_tmp[%d] = %g\n", node,
+			  evals_found, i - (il - 1), evals_tmp[i - (il - 1)]);
+		  evals_tmp[i] = evals_tmp[i - (il - 1)];
+		  if (il > 1)
+		    evals_tmp[i - (il - 1)] = 0.;
+		}
+	    }
+	  {
+	    grid->GlobalSumVector (evals_tmp, NN);
+	  }
+	}
+// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
+    }
+#undef LAPACK_INT
+#endif
+
+
+    void diagonalize (DenseVector < RealD > &lmd,
+		      DenseVector < RealD > &lme,
+		      int N2, int N1, GridBase * grid)
+    {
+
+#ifdef USE_LAPACK
+      const int check_lapack = 0;	// just use lapack if 0, check against lapack if 1
+
+      if (!check_lapack)
+	return diagonalize_lapack (lmd, lme, N2, N1, grid);
+
+//      diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
+#endif
+    }
+
+#if 1
+    static RealD normalise (Field & v)
+    {
+      RealD nn = norm2 (v);
+      nn = sqrt (nn);
+      v = v * (1.0 / nn);
+      return nn;
+    }
+
+    void orthogonalize (Field & w, DenseVector < Field > &evec, int k)
+    {
+      double t0 = -usecond () / 1e6;
+      typedef typename Field::scalar_type MyComplex;
+      MyComplex ip;
+
+      if (0)
+	{
+	  for (int j = 0; j < k; ++j)
+	    {
+	      normalise (evec[j]);
+	      for (int i = 0; i < j; i++)
+		{
+		  ip = innerProduct (evec[i], evec[j]);	// are the evecs normalised? ; this assumes so.
+		  evec[j] = evec[j] - ip * evec[i];
+		}
+	    }
+	}
+
+      for (int j = 0; j < k; ++j)
+	{
+	  ip = innerProduct (evec[j], w);	// are the evecs normalised? ; this assumes so.
+	  w = w - ip * evec[j];
+	}
+      normalise (w);
+      t0 += usecond () / 1e6;
+      OrthoTime += t0;
+    }
+
+    void setUnit_Qt (int Nm, DenseVector < RealD > &Qt)
+    {
+      for (int i = 0; i < Qt.size (); ++i)
+	Qt[i] = 0.0;
+      for (int k = 0; k < Nm; ++k)
+	Qt[k + k * Nm] = 1.0;
+    }
+
+
+    void calc (DenseVector < RealD > &eval, const Field & src, int &Nconv)
+    {
+
+      GridBase *grid = src._grid;
+//      assert(grid == src._grid);
+
+      std::
+	cout << GridLogMessage << " -- Nk = " << Nk << " Np = " << Np << std::
+	endl;
+      std::cout << GridLogMessage << " -- Nm = " << Nm << std::endl;
+      std::cout << GridLogMessage << " -- size of eval   = " << eval.
+	size () << std::endl;
+
+//      assert(c.size() && Nm == eval.size());
+
+      DenseVector < RealD > lme (Nm);
+      DenseVector < RealD > lmd (Nm);
+
+
+      Field current (grid);
+      Field last (grid);
+      Field next (grid);
+
+      Nconv = 0;
+
+      RealD beta_k;
+
+      // Set initial vector
+      // (uniform vector) Why not src??
+      //      evec[0] = 1.0;
+      current = src;
+      std::cout << GridLogMessage << "norm2(src)= " << norm2 (src) << std::
+	endl;
+      normalise (current);
+      std::
+	cout << GridLogMessage << "norm2(evec[0])= " << norm2 (current) <<
+	std::endl;
+
+      // Initial Nk steps
+      OrthoTime = 0.;
+      double t0 = usecond () / 1e6;
+      RealD norm;		// sqrt norm of last vector
+
+      uint64_t iter = 0;
+
+      bool initted = false;
+      std::vector < RealD > low (Nstop * 10);
+      std::vector < RealD > high (Nstop * 10);
+      RealD cont = 0.;
+      while (1) {
+	  cont = 0.;
+	  std::vector < RealD > lme2 (Nm);
+	  std::vector < RealD > lmd2 (Nm);
+	  for (uint64_t k = 0; k < Nm; ++k, iter++) {
+	      step (lmd, lme, last, current, next, iter);
+	      last = current;
+	      current = next;
+	    }
+	  double t1 = usecond () / 1e6;
+	  std::cout << GridLogMessage << "IRL::Initial steps: " << t1 -
+	    t0 << "seconds" << std::endl;
+	  t0 = t1;
+	  std::
+	    cout << GridLogMessage << "IRL::Initial steps:OrthoTime " <<
+	    OrthoTime << "seconds" << std::endl;
+
+	  // getting eigenvalues
+	  lmd2.resize (iter + 2);
+	  lme2.resize (iter + 2);
+	  for (uint64_t k = 0; k < iter; ++k) {
+	      lmd2[k + 1] = lmd[k];
+	      lme2[k + 2] = lme[k];
+	    }
+	  t1 = usecond () / 1e6;
+	  std::cout << GridLogMessage << "IRL:: copy: " << t1 -
+	    t0 << "seconds" << std::endl;
+	  t0 = t1;
+	  {
+	    int total = grid->_Nprocessors;
+	    int node = grid->_processor;
+	    int interval = (Nstop / total) + 1;
+	    int iu = (iter + 1) - (interval * node + 1);
+	    int il = (iter + 1) - (interval * (node + 1));
+	    std::vector < RealD > eval2 (iter + 3);
+	    RealD eps2;
+	    Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
+			      eps2);
+//        diagonalize(eval2,lme2,iter,Nk,grid);
+	    RealD diff = 0.;
+	    for (int i = il; i <= iu; i++) {
+		if (initted)
+		  diff =
+		    fabs (eval2[i] - high[iu-i]) / (fabs (eval2[i]) +
+						      fabs (high[iu-i]));
+		if (initted && (diff > eresid))
+		  cont = 1.;
+		if (initted)
+		  printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
+			  high[iu-i], diff);
+		high[iu-i] = eval2[i];
+	      }
+	    il = (interval * node + 1);
+	    iu = (interval * (node + 1));
+	    Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
+			      eps2);
+	    for (int i = il; i <= iu; i++) {
+		if (initted)
+		  diff =
+		    fabs (eval2[i] - low[i]) / (fabs (eval2[i]) +
+						fabs (low[i]));
+		if (initted && (diff > eresid))
+		  cont = 1.;
+		if (initted)
+		  printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
+			  low[i], diff);
+		low[i] = eval2[i];
+	      }
+	    t1 = usecond () / 1e6;
+	    std::cout << GridLogMessage << "IRL:: diagonalize: " << t1 -
+	      t0 << "seconds" << std::endl;
+	    t0 = t1;
+	  }
+
+	  for (uint64_t k = 0; k < Nk; ++k) {
+//          eval[k] = eval2[k];
+	    }
+	  if (initted)
+	    {
+	      grid->GlobalSumVector (&cont, 1);
+	      if (cont < 1.) return;
+	    }
+	  initted = true;
+	}
+
+    }
+
+
+
+
+
+
+/**
+   There is some matrix Q such that for any vector y
+   Q.e_1 = y and Q is unitary.
+**/
+    template < class T >
+      static T orthQ (DenseMatrix < T > &Q, DenseVector < T > y)
+    {
+      int N = y.size ();	//Matrix Size
+      Fill (Q, 0.0);
+      T tau;
+      for (int i = 0; i < N; i++)
+	{
+	  Q[i][0] = y[i];
+	}
+      T sig = conj (y[0]) * y[0];
+      T tau0 = fabs (sqrt (sig));
+
+      for (int j = 1; j < N; j++)
+	{
+	  sig += conj (y[j]) * y[j];
+	  tau = abs (sqrt (sig));
+
+	  if (abs (tau0) > 0.0)
+	    {
+
+	      T gam = conj ((y[j] / tau) / tau0);
+	      for (int k = 0; k <= j - 1; k++)
+		{
+		  Q[k][j] = -gam * y[k];
+		}
+	      Q[j][j] = tau0 / tau;
+	    }
+	  else
+	    {
+	      Q[j - 1][j] = 1.0;
+	    }
+	  tau0 = tau;
+	}
+      return tau;
+    }
+
+/**
+	There is some matrix Q such that for any vector y
+	Q.e_k = y and Q is unitary.
+**/
+    template < class T >
+      static T orthU (DenseMatrix < T > &Q, DenseVector < T > y)
+    {
+      T tau = orthQ (Q, y);
+      SL (Q);
+      return tau;
+    }
+
+
+/**
+	Wind up with a matrix with the first con rows untouched
+
+say con = 2
+	Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum
+	and the matrix is upper hessenberg
+	and with f and Q appropriately modidied with Q is the arnoldi factorization
+
+**/
+
+    template < class T > static void Lock (DenseMatrix < T > &H,	///Hess mtx     
+					   DenseMatrix < T > &Q,	///Lock Transform
+					   T val,	///value to be locked
+					   int con,	///number already locked
+					   RealD small, int dfg, bool herm)
+    {
+      //ForceTridiagonal(H);
+
+      int M = H.dim;
+      DenseVector < T > vec;
+      Resize (vec, M - con);
+
+      DenseMatrix < T > AH;
+      Resize (AH, M - con, M - con);
+      AH = GetSubMtx (H, con, M, con, M);
+
+      DenseMatrix < T > QQ;
+      Resize (QQ, M - con, M - con);
+
+      Unity (Q);
+      Unity (QQ);
+
+      DenseVector < T > evals;
+      Resize (evals, M - con);
+      DenseMatrix < T > evecs;
+      Resize (evecs, M - con, M - con);
+
+      Wilkinson < T > (AH, evals, evecs, small);
+
+      int k = 0;
+      RealD cold = abs (val - evals[k]);
+      for (int i = 1; i < M - con; i++)
+	{
+	  RealD cnew = abs (val - evals[i]);
+	  if (cnew < cold)
+	    {
+	      k = i;
+	      cold = cnew;
+	    }
+	}
+      vec = evecs[k];
+
+      ComplexD tau;
+      orthQ (QQ, vec);
+      //orthQM(QQ,AH,vec);
+
+      AH = Hermitian (QQ) * AH;
+      AH = AH * QQ;
+
+      for (int i = con; i < M; i++)
+	{
+	  for (int j = con; j < M; j++)
+	    {
+	      Q[i][j] = QQ[i - con][j - con];
+	      H[i][j] = AH[i - con][j - con];
+	    }
+	}
+
+      for (int j = M - 1; j > con + 2; j--)
+	{
+
+	  DenseMatrix < T > U;
+	  Resize (U, j - 1 - con, j - 1 - con);
+	  DenseVector < T > z;
+	  Resize (z, j - 1 - con);
+	  T nm = norm (z);
+	  for (int k = con + 0; k < j - 1; k++)
+	    {
+	      z[k - con] = conj (H (j, k + 1));
+	    }
+	  normalise (z);
+
+	  RealD tmp = 0;
+	  for (int i = 0; i < z.size () - 1; i++)
+	    {
+	      tmp = tmp + abs (z[i]);
+	    }
+
+	  if (tmp < small / ((RealD) z.size () - 1.0))
+	    {
+	      continue;
+	    }
+
+	  tau = orthU (U, z);
+
+	  DenseMatrix < T > Hb;
+	  Resize (Hb, j - 1 - con, M);
+
+	  for (int a = 0; a < M; a++)
+	    {
+	      for (int b = 0; b < j - 1 - con; b++)
+		{
+		  T sum = 0;
+		  for (int c = 0; c < j - 1 - con; c++)
+		    {
+		      sum += H[a][con + 1 + c] * U[c][b];
+		    }		//sum += H(a,con+1+c)*U(c,b);}
+		  Hb[b][a] = sum;
+		}
+	    }
+
+	  for (int k = con + 1; k < j; k++)
+	    {
+	      for (int l = 0; l < M; l++)
+		{
+		  H[l][k] = Hb[k - 1 - con][l];
+		}
+	    }			//H(Hb[k-1-con][l] , l,k);}}
+
+	  DenseMatrix < T > Qb;
+	  Resize (Qb, M, M);
+
+	  for (int a = 0; a < M; a++)
+	    {
+	      for (int b = 0; b < j - 1 - con; b++)
+		{
+		  T sum = 0;
+		  for (int c = 0; c < j - 1 - con; c++)
+		    {
+		      sum += Q[a][con + 1 + c] * U[c][b];
+		    }		//sum += Q(a,con+1+c)*U(c,b);}
+		  Qb[b][a] = sum;
+		}
+	    }
+
+	  for (int k = con + 1; k < j; k++)
+	    {
+	      for (int l = 0; l < M; l++)
+		{
+		  Q[l][k] = Qb[k - 1 - con][l];
+		}
+	    }			//Q(Qb[k-1-con][l] , l,k);}}
+
+	  DenseMatrix < T > Hc;
+	  Resize (Hc, M, M);
+
+	  for (int a = 0; a < j - 1 - con; a++)
+	    {
+	      for (int b = 0; b < M; b++)
+		{
+		  T sum = 0;
+		  for (int c = 0; c < j - 1 - con; c++)
+		    {
+		      sum += conj (U[c][a]) * H[con + 1 + c][b];
+		    }		//sum += conj( U(c,a) )*H(con+1+c,b);}
+		  Hc[b][a] = sum;
+		}
+	    }
+
+	  for (int k = 0; k < M; k++)
+	    {
+	      for (int l = con + 1; l < j; l++)
+		{
+		  H[l][k] = Hc[k][l - 1 - con];
+		}
+	    }			//H(Hc[k][l-1-con] , l,k);}}
+
+	}
+    }
+#endif
+
+
+  };
+
+}
+#endif
--- a/lib/algorithms/iterative/bisec.c
+++ b/lib/algorithms/iterative/bisec.c
@@ -0,0 +1,122 @@
+#include <math.h>
+#include <stdlib.h>
+#include <vector>
+
+struct Bisection {
+
+static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
+{
+  int i,j;
+  std::vector<RealD> evec1(row_num+3);
+  std::vector<RealD> evec2(row_num+3);
+  RealD eps2;
+  ALPHA[1]=0.;
+  BETHA[1]=0.;
+  for(i=0;i<row_num-1;i++) {
+    ALPHA[i+1] = A[i*(row_num+1)].real();
+    BETHA[i+2] = A[i*(row_num+1)+1].real();
+  }
+  ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
+  bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
+  bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
+
+  // Do we really need to sort here?
+  int begin=1;
+  int end = row_num;
+  int swapped=1;
+  while(swapped) {
+    swapped=0;
+    for(i=begin;i<end;i++){
+      if(mag(evec2[i])>mag(evec2[i+1]))	{
+	swap(evec2+i,evec2+i+1);
+	swapped=1;
+      }
+    }
+    end--;
+    for(i=end-1;i>=begin;i--){
+      if(mag(evec2[i])>mag(evec2[i+1]))	{
+	swap(evec2+i,evec2+i+1);
+	swapped=1;
+      }
+    }
+    begin++;
+  }
+
+  for(i=0;i<row_num;i++){
+    for(j=0;j<row_num;j++) {
+      if(i==j) H[i*row_num+j]=evec2[i+1];
+      else H[i*row_num+j]=0.;
+    }
+  }
+}
+
+static void bisec(std::vector<RealD> &c,   
+		  std::vector<RealD> &b,
+		  int n,
+		  int m1,
+		  int m2,
+		  RealD eps1,
+		  RealD relfeh,
+		  std::vector<RealD> &x,
+		  RealD &eps2)
+{
+  std::vector<RealD> wu(n+2);
+
+  RealD h,q,x1,xu,x0,xmin,xmax; 
+  int i,a,k;
+
+  b[1]=0.0;
+  xmin=c[n]-fabs(b[n]);
+  xmax=c[n]+fabs(b[n]);
+  for(i=1;i<n;i++){
+    h=fabs(b[i])+fabs(b[i+1]);
+    if(c[i]+h>xmax) xmax= c[i]+h;
+    if(c[i]-h<xmin) xmin= c[i]-h;
+  }
+  xmax *=2.;
+
+  eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
+  if(eps1<=0.0) eps1=eps2;
+  eps2=0.5*eps1+7.0*(eps2);
+  x0=xmax;
+  for(i=m1;i<=m2;i++){
+    x[i]=xmax;
+    wu[i]=xmin;
+  }
+
+  for(k=m2;k>=m1;k--){
+    xu=xmin;
+    i=k;
+    do{
+      if(xu<wu[i]){
+	xu=wu[i];
+	i=m1-1;
+      }
+      i--;
+    }while(i>=m1);
+    if(x0>x[k]) x0=x[k];
+    while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
+      x1=(xu+x0)/2;
+
+      a=0;
+      q=1.0;
+      for(i=1;i<=n;i++){
+	q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
+	if(q<0) a++;
+      }
+      //			printf("x1=%e a=%d\n",x1,a);
+      if(a<k){
+	if(a<m1){
+	  xu=x1;
+	  wu[m1]=x1;
+	}else {
+	  xu=x1;
+	  wu[a+1]=x1;
+	  if(x[a]>x1) x[a]=x1;
+	}
+      }else x0=x1;
+    }
+    x[k]=(x0+xu)/2;
+  }
+}
+}
--- a/lib/qcd/action/fermion/CayleyFermion5D.cc
+++ b/lib/qcd/action/fermion/CayleyFermion5D.cc
@@ -415,6 +415,8 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
    assert(omega[i]!=Coeff_t(0.0));
    bs[i] = 0.5*(bpc/omega[i] + bmc);
    cs[i] = 0.5*(bpc/omega[i] - bmc);
+    std::cout<<GridLogMessage << "CayleyFermion5D "<<i<<" bs="<<bs[i]<<" cs="<<cs[i]<< std::endl;
+
  }

  ////////////////////////////////////////////////////////
--- a/lib/qcd/action/fermion/Fermion.h
+++ b/lib/qcd/action/fermion/Fermion.h
@@ -61,8 +61,8 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 #include <Grid/qcd/action/fermion/MobiusFermion.h>
 #include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 #include <Grid/qcd/action/fermion/ZMobiusFermion.h>
-#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
 #include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
+//#include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h>
 #include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
 #include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
 #include <Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
--- a/lib/qcd/action/fermion/SchurDiagTwoKappa.h
+++ b/lib/qcd/action/fermion/SchurDiagTwoKappa.h
@@ -1,3 +1,4 @@
+#if 1
    /*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 
@@ -97,6 +98,117 @@ namespace Grid {
    }
  };

+#if 0
+  ///////////////////////////////////////////////////////////////////////////////////////////////////////
+  // Copied from DiagTwoSolve
+  ///////////////////////////////////////////////////////////////////////////////////////////////////////
+  template<class Field> class SchurRedBlackDiagTwoSolve {
+  private:
+    OperatorFunction<Field> & _HermitianRBSolver;
+    int CBfactorise;
+  public:
+
+    /////////////////////////////////////////////////////
+    // Wrap the usual normal equations Schur trick
+    /////////////////////////////////////////////////////
+  SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver)  :
+     _HermitianRBSolver(HermitianRBSolver) 
+    { 
+      CBfactorise=0;
+    };
+
+    template<class Matrix>
+      void operator() (Matrix & _Matrix,const Field &in, Field &out){
+
+      // FIXME CGdiagonalMee not implemented virtual function
+      // FIXME use CBfactorise to control schur decomp
+      GridBase *grid = _Matrix.RedBlackGrid();
+      GridBase *fgrid= _Matrix.Grid();
+
+      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
+ 
+      Field src_e(grid);
+      Field src_o(grid);
+      Field sol_e(grid);
+      Field sol_o(grid);
+      Field   tmp(grid);
+      Field  Mtmp(grid);
+      Field resid(fgrid);
+
+      pickCheckerboard(Even,src_e,in);
+      pickCheckerboard(Odd ,src_o,in);
+      pickCheckerboard(Even,sol_e,out);
+      pickCheckerboard(Odd ,sol_o,out);
+    
+      /////////////////////////////////////////////////////
+      // src_o = Mdag * (source_o - Moe MeeInv source_e)
+      /////////////////////////////////////////////////////
+      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
+      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
+      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
+
+      // get the right MpcDag
+      _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);       
+
+      //////////////////////////////////////////////////////////////
+      // Call the red-black solver
+      //////////////////////////////////////////////////////////////
+      std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
+//      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
+      _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd);
+      _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd);
+
+      ///////////////////////////////////////////////////
+      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
+      ///////////////////////////////////////////////////
+      _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even);
+      src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even);
+      _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even);
+     
+      setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even);
+      setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd );
+
+      // Verify the unprec residual
+      _Matrix.M(out,resid); 
+      resid = resid-in;
+      RealD ns = norm2(in);
+      RealD nr = norm2(resid);
+
+      std::cout<<GridLogMessage << "SchurRedBlackDiagTwoKappa solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
+    }     
+  };
+#endif
+namespace QCD{
+    //
+    // Determinant is det of middle factor
+    // This assumes Mee is indept of U.
+    //
+    //
+    template<class Impl>
+    class SchurDifferentiableDiagTwo:  public SchurDiagTwoOperator<FermionOperator<Impl>,typename Impl::FermionField> 
+      {
+      public:
+      INHERIT_IMPL_TYPES(Impl);
+
+ 	typedef FermionOperator<Impl> Matrix;
+
+	SchurDifferentiableDiagTwo (Matrix &Mat) : SchurDiagTwoOperator<Matrix,FermionField>(Mat) {};
+    };
+#if 0
+    template<class Impl>
+    class SchurDifferentiableDiagTwoKappa :  public SchurDiagTwoKappaOperator<FermionOperator<Impl>,typename Impl::FermionField> 
+      {
+      public:
+      INHERIT_IMPL_TYPES(Impl);
+
+ 	typedef FermionOperator<Impl> Matrix;
+
+	SchurDifferentiableDiagTwoKappa (Matrix &Mat) : SchurDiagTwoKappaOperator<Matrix,FermionField>(Mat) {};
+    };
+#endif
+}
+
 }

 #endif
+#endif
--- a/lib/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h
+++ b/lib/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h
@@ -140,6 +140,7 @@ namespace Grid{

    };

+
  }
 }
 #endif