added eigensolver code for arnoldi and krylov schur

2026-07-07 02:43:29 +01:00 · 2025-07-31 16:40:24 -04:00
parent 41f344bbd3
commit 33b80c4e8e
3 changed files with 1330 additions and 0 deletions
@@ -0,0 +1,419 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: ./lib/algorithms/iterative/Arnoldi.h
+
+Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Chulwoo Jung <chulwoo@bnl.gov>
+Author: Christoph Lehner <clehner@bnl.gov>
+Author: Patrick Oare <poare@bnl.gov>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_ARNOLDI_H
+#define GRID_ARNOLDI_H
+
+NAMESPACE_BEGIN(Grid); 
+
+/**
+ * Options for which Ritz values to keep in implicit restart.
+ */
+enum RitzFilter {
+  EvalNormSmall,           // Keep evals with smallest norm
+  EvalNormLarge,           // Keep evals with largest norm
+  EvalReSmall,             // Keep evals with smallest real part
+  EvalReLarge              // Keep evals with largest real part
+};
+
+// Select comparison function from RitzFilter
+struct ComplexComparator
+{
+  RitzFilter f;
+  ComplexComparator (RitzFilter _f) : f(_f) {}
+  bool operator()(std::complex<double> z1, std::complex<double> z2) { 
+    switch (f) {
+      case EvalNormSmall:
+        return std::abs(z1) < std::abs(z2);
+      case EvalNormLarge:
+        return std::abs(z1) > std::abs(z2);
+      case EvalReSmall:
+        return std::abs(std::real(z1)) < std::abs(std::real(z2));
+      case EvalReLarge:
+        return std::abs(std::real(z1)) > std::abs(std::real(z2));
+      default:
+        assert(0);
+    }
+  }
+};
+
+/**
+ * Implementation of the Arnoldi algorithm.
+ */
+template<class Field> 
+class Arnoldi {
+
+  private:
+  
+    std::string cname = std::string("Arnoldi");
+    int MaxIter;   // Max iterations
+    RealD Tolerance;
+    RealD ssq;
+    RealD rtol;
+    int Nm;           // Number of basis vectors to track (equals MaxIter if no restart)
+    int Nk;           // Number of basis vectors to keep every restart (equals -1 if no restart)
+    int Nstop;       // Stop after converging Nstop eigenvectors.
+
+    LinearOperatorBase<Field> &Linop;
+    GridBase *Grid;
+
+    RealD approxLambdaMax;
+    RealD beta_k;
+    Field f;
+    std::vector<Field> basis;               // orthonormal Arnoldi basis
+    Eigen::MatrixXcd Hess;                  // Hessenberg matrix of size Nbasis (after construction)
+    Eigen::MatrixXcd Qt;                    // Transpose of basis rotation which projects out high modes.
+
+    Eigen::VectorXcd evals;                 // evals of Hess
+    Eigen::MatrixXcd littleEvecs;           // Nm x Nm evecs matrix
+    std::vector<Field> evecs;               // Vector of evec fields
+
+    RitzFilter ritzFilter;                        // how to sort evals
+
+  public:       
+
+    Arnoldi(LinearOperatorBase<Field> &_Linop, GridBase *_Grid, RealD _Tolerance, RitzFilter filter = EvalReSmall)
+      : Linop(_Linop), Grid(_Grid), Tolerance(_Tolerance), ritzFilter(filter), f(_Grid), MaxIter(-1), Nm(-1), Nk(-1), 
+          Nstop (-1), evals (0), evecs (), ssq (0.0), rtol (0.0), beta_k (0.0), approxLambdaMax (0.0)
+    {
+      f = Zero();
+    };
+
+    /**
+     * Runs the Arnoldi loop with(out) implicit restarting. For each iteration:
+     *   - Runs an Arnoldi step.
+     *   - Computes the eigensystem of the Hessenberg matrix.
+     *   - Performs implicit restarting.
+     */
+    void operator()(const Field& v0, int _maxIter, int _Nm, int _Nk, int _Nstop, bool doubleOrthog = false) {
+      MaxIter = _maxIter;
+      Nm = _Nm; Nk = _Nk;
+      Nstop = _Nstop;
+      
+      ssq = norm2(v0);
+      RealD approxLambdaMax = approxMaxEval(v0);
+      rtol = Tolerance * approxLambdaMax;
+
+      ComplexComparator compareComplex (ritzFilter);
+      std::cout << GridLogMessage << "Comparing Ritz values with: " << ritzFilter << std::endl;
+
+      int start = 1;
+      Field startVec = v0;
+      littleEvecs = Eigen::MatrixXcd::Zero(Nm, Nm);
+      for (int i = 0; i < MaxIter; i++) {
+        std::cout << GridLogMessage << "Restart Iteration " << i << std::endl;
+
+        // Perform Arnoldi steps to compute Krylov basis and Rayleigh quotient (Hess)
+        arnoldiIteration(startVec, Nm, start, doubleOrthog);
+        startVec = f;
+
+        // compute eigensystem and sort evals
+        // compute_eigensystem();
+        compute_eigensystem(Hess);
+        std::cout << GridLogMessage << "Eigenvalues after Arnoldi step: " << std::endl << evals << std::endl;
+
+        std::sort(evals.begin(), evals.end(), compareComplex);
+        std::cout << GridLogMessage << "Ritz values after sorting (first Nk preserved): " << std::endl << evals << std::endl;
+        // SU(N)::tepidConfiguration
+
+        // Implicit restart to de-weight unwanted eigenvalues
+        implicitRestart(_Nm, _Nk);      // probably can delete _Nm and _Nk from function args
+        start = Nk;
+
+        // check convergence and return if needed.
+        int Nconv = converged();
+        std::cout << GridLogMessage << "Number of evecs converged: " << Nconv << std::endl;
+        if (Nconv >= Nstop || i == MaxIter - 1) {
+          std::cout << GridLogMessage << "Converged with " << Nconv << " / " << Nstop << " eigenvectors on iteration " 
+                        << i << "." << std::endl;
+          basisRotate(evecs, Qt, 0, Nk, 0, Nk, Nm);
+          std::cout << GridLogMessage << "Eigenvalues [first " << Nconv << " converged]: " << std::endl << evals << std::endl;
+          return;
+        }
+      }      
+    }
+
+    /**
+     * Approximates the maximum eigenvalue of Linop.Op to normalize the residual and test for convergence. 
+     * 
+     * Parameters
+     * ----------
+     * Field& v0
+     *  Source field to start with. Must have non-zero norm.
+     * int MAX_ITER (default = 50)
+     *  Maximum number of iterations for power approximation. 
+     * 
+     * Returns
+     * -------
+     * RealD lamApprox
+     *  Approximation of largest eigenvalue. 
+     */
+    RealD approxMaxEval(const Field& v0, int MAX_ITER = 50) {
+      assert (norm2(v0) > 1e-8);                        // must have relatively large source norm to start
+      RealD lamApprox = 0.0;
+      RealD denom = 1.0; RealD num = 1.0;
+      Field v0cp (Grid); Field tmp (Grid);
+      v0cp = v0;
+      denom = std::sqrt(norm2(v0cp));
+      for (int i = 0; i < MAX_ITER; i++) {
+        Linop.Op(v0cp, tmp);                               // CAREFUL: do not do Op(tmp, tmp)
+        v0cp = tmp;
+        num = std::sqrt(norm2(v0cp));                      // num = |A^{n+1} v0|
+        lamApprox = num / denom;                           // lam = |A^{n+1} v0| / |A^n v0|
+        std::cout << GridLogDebug << "Approx for max eval: " << lamApprox << std::endl;
+        denom = num;                                       // denom = |A^{n} v0|
+      }
+      return lamApprox;
+    }
+
+    /**
+     * Constructs the Arnoldi basis for the Krylov space K_n(D, src). (TODO make private)
+     * 
+     * Parameters
+     * ----------
+     * v0 : Field&
+     *  Source to generate Krylov basis. 
+     * Nm : int
+     *  Final size of the basis desired. If the basis becomes complete before a basis of size Nm is constructed 
+     *  (determined by relative tolerance Tolerance), stops iteration there. 
+     * doubleOrthog : bool (default = false)
+     *  Whether to double orthogonalize the basis (for numerical cancellations) or not. 
+     * start        : int (default = 0)
+     *  If non-zero, assumes part of the Arnoldi basis has already been constructed. 
+     */
+    void arnoldiIteration(const Field& v0, int Nm, int start = 1, bool doubleOrthog = false)
+    {
+
+      ComplexD coeff;
+      Field w (Grid);           // A acting on last Krylov vector. 
+
+      if (start == 1) {       // initialize everything that we need.
+        RealD v0Norm = 1 / std::sqrt(ssq);
+        basis.push_back(v0Norm * v0);                // normalized source
+
+        Hess = Eigen::MatrixXcd::Zero(Nm, Nm);
+        f = Zero();
+      } else {
+        assert( start == basis.size() );      // should be starting at the end of basis (start = Nk)
+        Eigen::MatrixXcd HessCp = Hess;
+        Hess = Eigen::MatrixXcd::Zero(Nm, Nm);
+        Hess(Eigen::seqN(0, Nk), Eigen::seqN(0, Nk)) = HessCp;
+      }
+
+      // Construct next Arnoldi vector by normalizing w_i = Dv_i - \sum_j v_j h_{ji}
+      for (int i = start - 1; i < Nm; i++) {
+
+        Linop.Op(basis.back(), w);
+        for (int j = 0; j < basis.size(); j++) {
+          coeff = innerProduct(basis[j], w);       // coeff = h_{ij}. Note that since {vi} is ONB it's OK to subtract it off after. 
+          Hess(j, i) = coeff;
+          w -= coeff * basis[j];
+        }
+
+        if (doubleOrthog) {
+          // TODO implement
+        }
+
+        // add w_i to the pile
+        if (i < Nm - 1) {
+          coeff = std::sqrt(norm2(w));
+          Hess(i+1, i) = coeff;
+          basis.push_back(
+            (1.0/coeff) * w
+          );
+        }
+
+        // after iterations, update f and beta_k = ||f||
+        f = w;                                // make sure f is not normalized
+        beta_k = std::sqrt(norm2(f));         // beta_k = ||f_k|| determines convergence.
+      }
+
+      std::cout << GridLogMessage << "|f|^2 after Arnoldi step = " << norm2(f) << std::endl;
+      std::cout << GridLogDebug << "Computed Hessenberg matrix = " << std::endl << Hess << std::endl;
+
+      return;
+    }
+
+    /**
+     * Approximates the eigensystem of the linear operator by computing the eigensystem of 
+     * the Hessenberg matrix. Assumes that the Hessenberg matrix has already been constructed (by 
+     * calling the operator() function).
+     * 
+     * TODO implement in parent class eventually.
+     * 
+     * Parameters
+     * ----------
+     * Eigen::MatrixXcd& S
+     *  Schur matrix (upper triangular) similar to original Rayleigh quotient.
+     */
+    void compute_eigensystem(Eigen::MatrixXcd& S)
+    {
+
+      std::cout << GridLogMessage << "Computing eigenvalues." << std::endl;
+
+      evecs.clear();
+
+      Eigen::ComplexEigenSolver<Eigen::MatrixXcd> es;
+      es.compute(S);
+      evals = es.eigenvalues();
+      littleEvecs = es.eigenvectors();
+
+      // Convert evecs to lattice fields
+      for (int k = 0; k < evals.size(); k++) {
+        Eigen::VectorXcd vec = littleEvecs.col(k);
+        Field tmp (basis[0].Grid());
+        tmp = Zero();
+        for (int j = 0; j < basis.size(); j++) {
+          tmp = tmp + vec[j] * basis[j];
+        }
+        evecs.push_back(tmp);
+      }
+
+      std::cout << GridLogMessage << "Eigenvalues: " << std::endl << evals << std::endl;
+
+    }
+
+    /**
+     * Verifies the factorization DV = V^\dag H + f e^\dag with the last-computed 
+     * V, H, f. 
+     */
+    // RealD verifyFactorization() {
+    //   int k = basis.size();         // number of basis vectors, also the size of H.
+    //   std::vector<Field> factorized (k, Zero());
+    //   Field tmp (FGrid); tmp = Zero();
+    //   for (int i = 0; i < basis.size(); i++) {
+    //     Linop.Op(basis[i], tmp);
+    //   }
+    //   // basisRotate(basis, Q, 0, Nk, 0, Nk, Nm);
+    //   // Linop.Op(, )
+    // }
+
+    /* Getters */
+    Eigen::MatrixXcd    getHessenbergMat()  { return Hess; }
+    Field               getF()              { return f; }
+    std::vector<Field>  getBasis()          { return basis; }
+    Eigen::VectorXcd    getEvals()          { return evals; }
+    std::vector<Field>  getEvecs()          { return evecs; }
+
+    /**
+     * Implements implicit restarting for Arnoldi. Assumes eigenvalues are sorted. 
+     * 
+     * Parameters
+     * ----------
+     * int _Nm
+     *  Size of basis to keep (Hessenberg is MxM).
+     * int Nk
+     *  Number of basis vectors to keep at each restart.
+     */
+    void implicitRestart(int _Nm, int _Nk) {
+      assert ( _Nk <= _Nm );
+      Nm = _Nm; Nk = _Nk;
+      int Np = Nm - Nk;       // keep Nk smallest (or largest, depends on sort function) evecs
+      
+      std::cout << GridLogMessage << "Computing QR Factorizations." << std::endl;
+
+      Eigen::MatrixXcd Q = Eigen::MatrixXcd::Identity(Nm, Nm);
+      Eigen::MatrixXcd Qi (Nm, Nm);
+      Eigen::MatrixXcd R (Nm, Nm);
+
+      for (int i = Nk; i < Nm; i++) {        // keep the first Nk eigenvalues and iterate through the last Np. Should loop Np times
+
+        // Useful debugging output
+        std::cout << GridLogDebug << "Computing QR factorization for i = " << i << std::endl;
+        std::cout << GridLogDebug << "Eval shift = " << evals[i] << std::endl;
+        std::cout << GridLogDebug << "Hess before rotation: " << Hess << std::endl;
+
+        // QR factorize 
+        Eigen::HouseholderQR<Eigen::MatrixXcd> QR (Hess - evals[i] * Eigen::MatrixXcd::Identity(Nm, Nm));
+        Qi = QR.householderQ();
+        Q = Q * Qi;
+        Hess = Qi.adjoint() * Hess * Qi;
+
+        std::cout << GridLogDebug << "Qt up to i = " << Q.transpose() << std::endl;
+
+      }
+
+      std::cout << GridLogDebug << "Hess after all rotations: " << std::endl << Hess << std::endl; 
+
+      // form Arnoldi vector f: f is normal to the basis vectors and its norm \beta is used to determine the Ritz estimate. 
+      std::complex<double> beta = Hess(Nk, Nk-1);
+      std::complex<double> sigma = Q(Nm-1, Nk-1);
+      f = basis[Nk] * beta + f * sigma;
+      RealD betak = std::sqrt(norm2(f));
+      std::cout << GridLogMessage << "|f|^2 after implicit restart = " << norm2(f) << std::endl;
+
+      // Rotate basis by Qt
+      Qt = Q.transpose();
+      basisRotate(basis, Qt, 0, Nk + 1, 0, Nm, Nm);
+
+      // rotate
+      basisRotate(evecs, Qt, 0, Nk + 1, 0, Nm, Nm);
+
+      // Truncate the basis and restart
+      basis = std::vector<Field> (basis.begin(), basis.begin() + Nk);
+      // evecs = std::vector<Field> (evecs.begin(), evecs.begin() + Nk);
+      Hess = Hess(Eigen::seqN(0, Nk), Eigen::seqN(0, Nk));
+
+      std::cout << "evecs size: " << evecs.size() << std::endl;
+
+    }
+  
+    /**
+     * Computes the number of Arnoldi eigenvectors that have converged. An eigenvector s is considered converged 
+     * for a tolerance epsilon if 
+     *    r(s) := |\beta e_m^T s| < epsilon
+     * where beta is the norm of f_{m+1}.
+     * 
+     * Parameters
+     * ----------
+     * 
+     * Returns
+     * -------
+     * int : Number of converged eigenvectors.
+     */
+    int converged() {
+      int Nconv = 0;
+      for (int k = 0; k < evecs.size(); k++) {
+        RealD emTs = std::abs(littleEvecs(Nm - 1, k));           // e_m^T s
+        RealD ritzEstimate = beta_k * emTs;
+        // TODO should be ritzEstimate < Tolerance * lambda_max
+        std::cout << GridLogMessage << "Ritz estimate for evec " << k << " = " << ritzEstimate << std::endl;
+        if (ritzEstimate < rtol) {
+          Nconv++;
+        }
+      }
+      return Nconv;
+    }
+
+};
+    
+NAMESPACE_END(Grid);
+#endif
@@ -0,0 +1,277 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/ConjugateGradientTimeslice.h
+
+Copyright (C) 2015
+
+Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+			   /*  END LEGAL */
+#ifndef GRID_CONJUGATE_GRADIENT_TIMESLICE_H
+#define GRID_CONJUGATE_GRADIENT_TIMESLICE_H
+
+NAMESPACE_BEGIN(Grid);
+
+/////////////////////////////////////////////////////////////
+// Base classes for iterative processes based on operators
+// single input vec, single output vec.
+/////////////////////////////////////////////////////////////
+
+/**
+ * Simple modification of conjugate gradient that outputs the residual as a function 
+ * of time, in order to study the large wavelength behavior of the solver. 
+ */
+
+
+template <class Field>
+class ConjugateGradientTimeslice : public OperatorFunction<Field> {
+public:
+
+  using OperatorFunction<Field>::operator();
+  
+  bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge.
+                           // Defaults true.
+  RealD Tolerance;
+  Integer MaxIterations;
+  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
+  RealD TrueResidual;
+  
+  ConjugateGradientTimeslice(RealD tol, Integer maxit, bool err_on_no_conv = true)
+    : Tolerance(tol),
+      MaxIterations(maxit),
+      ErrorOnNoConverge(err_on_no_conv)
+  {};
+
+  virtual void LogIteration(int k,RealD a,RealD b){
+    //    std::cout << "ConjugageGradient::LogIteration() "<<std::endl;
+  };
+  virtual void LogBegin(void){
+    std::cout << "ConjugageGradient::LogBegin() "<<std::endl;
+  };
+
+    void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
+
+      this->LogBegin();
+
+      GRID_TRACE("ConjugateGradientTimeslice");
+    GridStopWatch PreambleTimer;
+    GridStopWatch ConstructTimer;
+    GridStopWatch NormTimer;
+    GridStopWatch AssignTimer;
+    PreambleTimer.Start();
+    psi.Checkerboard() = src.Checkerboard();
+
+    conformable(psi, src);
+
+    RealD cp, c, a, d, b, ssq, qq;
+    //RealD b_pred;
+
+    // Was doing copies
+    ConstructTimer.Start();
+    Field p  (src.Grid());
+    Field mmp(src.Grid());
+    Field r  (src.Grid());
+    ConstructTimer.Stop();
+
+    // Initial residual computation & set up
+    NormTimer.Start();
+    ssq = norm2(src);                 // Norm of source vector ||b||^2
+
+    ssqtx = localNorm2(src);          // Norm |b(x, t)|^2 as a field
+    std::vector<RealD> ssqt;          // Norm of source not summed over time slices, ssq(t) = \sum_x |b(x, t)|^2
+    sliceSum(ssqtx, ssqt, Tdir);      // TODO make sure Tdir is globally defined
+
+    RealD guess = norm2(psi);         // Norm of initial guess ||psi||^2
+    NormTimer.Stop();
+    assert(std::isnan(guess) == 0);
+    AssignTimer.Start();
+    if ( guess == 0.0 ) {
+      r = src;
+      p = r;
+      a = ssq;
+    } else { 
+      Linop.HermOpAndNorm(psi, mmp, d, b);        // 
+      r = src - mmp;      // Initial residual r0 = b - A guess
+      p = r;              // initial conj vector p0 = r0
+      a = norm2(p);
+    }
+    cp = a;
+    AssignTimer.Stop();
+
+    // Handle trivial case of zero src
+    if (ssq == 0.){
+      psi = Zero();
+      IterationsToComplete = 1;
+      TrueResidual = 0.;
+      return;
+    }
+
+    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl;
+    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   src " << ssq << std::endl;
+    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:    mp " << d << std::endl;
+    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   mmp " << b << std::endl;
+    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:  cp,r " << cp << std::endl;
+    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:     p " << a << std::endl;
+
+    RealD rsq = Tolerance * Tolerance * ssq;
+
+    // Check if guess is really REALLY good :)
+    if (cp <= rsq) {
+      TrueResidual = std::sqrt(a/ssq);
+      std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl;
+      IterationsToComplete = 0;	
+      return;
+    }
+
+    std::cout << GridLogIterative << std::setprecision(8)
+              << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
+
+    PreambleTimer.Stop();
+    GridStopWatch LinalgTimer;
+    GridStopWatch InnerTimer;
+    GridStopWatch AxpyNormTimer;
+    GridStopWatch LinearCombTimer;
+    GridStopWatch MatrixTimer;
+    GridStopWatch SolverTimer;
+
+    RealD usecs = -usecond();
+    SolverTimer.Start();
+    int k;
+    for (k = 1; k <= MaxIterations; k++) {
+
+      GridStopWatch IterationTimer;
+      IterationTimer.Start();
+      c = cp;
+
+      MatrixTimer.Start();
+      Linop.HermOp(p, mmp);         // Computes mmp = Ap
+      MatrixTimer.Stop();
+
+      LinalgTimer.Start();
+
+      InnerTimer.Start();
+      ComplexD dc  = innerProduct(p,mmp);         // p^\dagger A p
+      InnerTimer.Stop();
+      d = dc.real();
+      a = c / d;
+
+      // What is axpy? Some accelerator or something? Check Lattice_arith.h
+      AxpyNormTimer.Start();
+
+      // axpy_norm computes ax+by for vectors x and y compatible with a GPU. Here b is set to 1 (see the function in Lattice_reduction.h). 
+      // The first argument passes r by reference, so it stores r --> -a * Ap + 1 * r, i.e. it performs an update on 
+      // r_k --> r_{k+1} = r_k - \alpha_k A p_k. The function returns the norm squared of the first variable, i.e. ||r_{k+1}||^2.
+      cp = axpy_norm(r, -a, mmp, r);
+      AxpyNormTimer.Stop();
+      b = cp / c;
+
+      LinearCombTimer.Start();
+      {
+        autoView( psi_v , psi, AcceleratorWrite);
+        autoView( p_v   , p,   AcceleratorWrite);
+        autoView( r_v   , r,   AcceleratorWrite);
+        accelerator_for(ss,p_v.size(), Field::vector_object::Nsimd(),{
+            coalescedWrite(psi_v[ss], a      *  p_v(ss) + psi_v(ss));
+            coalescedWrite(p_v[ss]  , b      *  p_v(ss) + r_v  (ss));
+        });
+      }
+      LinearCombTimer.Stop();
+      LinalgTimer.Stop();
+      LogIteration(k,a,b);
+
+      IterationTimer.Stop();
+      if ( (k % 500) == 0 ) {
+        std::cout << GridLogMessage << "ConjugateGradient: Iteration " << k
+                << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
+      } else { 
+        std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
+                << " residual " << sqrt(cp/ssq) << " target " << Tolerance << " took " << IterationTimer.Elapsed() << std::endl;
+      }
+
+      // Stopping condition
+      if (cp <= rsq) {
+        usecs +=usecond();
+        SolverTimer.Stop();
+        Linop.HermOpAndNorm(psi, mmp, d, qq);
+        p = mmp - src;
+        GridBase *grid = src.Grid();
+        RealD DwfFlops = (1452. )*grid->gSites()*4*k
+   	               + (8+4+8+4+4)*12*grid->gSites()*k; // CG linear algebra
+        RealD srcnorm = std::sqrt(norm2(src));
+        RealD resnorm = std::sqrt(norm2(p));
+        RealD true_residual = resnorm / srcnorm;
+        std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k 
+          << "\tComputed residual " << std::sqrt(cp / ssq)
+          << "\tTrue residual " << true_residual
+          << "\tTarget " << Tolerance << std::endl;
+
+        // GridLogMessage logs the message to the terminal output; GridLogPerformance probably writes to a log file?
+        //	std::cout << GridLogMessage << "\tPreamble   " << PreambleTimer.Elapsed() <<std::endl;
+        std::cout << GridLogMessage << "\tSolver Elapsed    " << SolverTimer.Elapsed() <<std::endl;
+        std::cout << GridLogPerformance << "Time breakdown "<<std::endl;
+        std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
+        std::cout << GridLogPerformance << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
+        std::cout << GridLogPerformance << "\t\tInner      " << InnerTimer.Elapsed() <<std::endl;
+        std::cout << GridLogPerformance << "\t\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
+        std::cout << GridLogPerformance << "\t\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
+
+        std::cout << GridLogDebug << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl;
+
+        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
+
+        IterationsToComplete = k;	
+        TrueResidual = true_residual;
+
+        return;
+      }
+    }
+    // Failed. Calculate true residual before giving up                                                         
+    // Linop.HermOpAndNorm(psi, mmp, d, qq);
+    //    p = mmp - src;
+    //TrueResidual = sqrt(norm2(p)/ssq);
+    //    TrueResidual = 1;
+
+    std::cout << GridLogMessage << "ConjugateGradient did NOT converge "<<k<<" / "<< MaxIterations
+    	      <<" residual "<< std::sqrt(cp / ssq)<< std::endl;
+    SolverTimer.Stop();
+    std::cout << GridLogMessage << "\tPreamble   " << PreambleTimer.Elapsed() <<std::endl;
+    std::cout << GridLogMessage << "\tConstruct  " << ConstructTimer.Elapsed() <<std::endl;
+    std::cout << GridLogMessage << "\tNorm       " << NormTimer.Elapsed() <<std::endl;
+    std::cout << GridLogMessage << "\tAssign     " << AssignTimer.Elapsed() <<std::endl;
+    std::cout << GridLogMessage << "\tSolver     " << SolverTimer.Elapsed() <<std::endl;
+    std::cout << GridLogMessage << "Solver breakdown "<<std::endl;
+    std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
+    std::cout << GridLogMessage<< "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
+    std::cout << GridLogPerformance << "\t\tInner      " << InnerTimer.Elapsed() <<std::endl;
+    std::cout << GridLogPerformance << "\t\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
+    std::cout << GridLogPerformance << "\t\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
+
+    if (ErrorOnNoConverge) assert(0);
+    IterationsToComplete = k;
+
+  }
+};
+
+NAMESPACE_END(Grid);
+#endif
@@ -0,0 +1,634 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: ./lib/algorithms/iterative/KrylovSchur.h
+
+Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Chulwoo Jung <chulwoo@bnl.gov>
+Author: Christoph Lehner <clehner@bnl.gov>
+Author: Patrick Oare <poare@bnl.gov>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_KRYLOVSCHUR_H
+#define GRID_KRYLOVSCHUR_H
+
+NAMESPACE_BEGIN(Grid); 
+
+/**
+ * Computes a complex Schur decomposition of a complex matrix A using Eigen's matrix library. The Schur decomposition,
+ *    A = Q^\dag S Q
+ * factorizes A into a unitary matrix Q and an upper triangular matrix S. The eigenvalues of A lie on the diagonal of the upper triangular matrix S. 
+ * The Schur decomposition is not unique: in particular, any ordering of the eigenvalues of A can be used as the diagonal of the matrix S. 
+ * This class supports eigenvalue reordering by swapping two adjacent eigenvalues with a unitary transformation. 
+ */
+class ComplexSchurDecomposition {
+
+  private:
+
+    typedef Eigen::MatrixXcd CMat;
+
+    CMat A;           // Matrix to decompose, A = Q^\dag S Q
+    CMat Q;           // Unitary matrix Q
+    CMat S;           // Upper triangular matrix S
+
+    // Placeholders for Givens rotation
+    CMat Givens;      // Givens rotation
+    ComplexD s;       // off-diagonal element
+    ComplexD lam1;    // First eval for swap
+    ComplexD lam2;    // Second eval for swap
+    ComplexD phi;     // phase of s
+    RealD r;          // norm of s and lam2 - lam1
+    
+    int Nm;                               // size of matrix problem
+
+    ComplexComparator cCompare;           // function to sort the Schur matrix. 
+  
+  public:
+
+    /**
+     * If the input matrix _A is in Hessenberg form (upper triangular + first subdiagonal non-zero), then the Schur 
+     * decomposition is easier to compute. 
+     */
+    ComplexSchurDecomposition(CMat _A, bool isHess, RitzFilter ritzFilter = EvalReSmall) : A(_A), Nm (_A.rows()), cCompare (ritzFilter)
+    {
+      Eigen::ComplexSchur<CMat> schur (Nm);
+      if (isHess) {
+        schur.computeFromHessenberg(_A, CMat::Identity(Nm, Nm), true);
+      } else {
+        schur.compute(_A, true);
+      }
+      S = schur.matrixT();
+      Q = schur.matrixU().adjoint();      // Eigen computes A = Q S Q^\dag, we want A = Q^\dag S Q
+    }
+
+    // Getters
+    int  getNm()      { return Nm; }         // size of matrix problem
+    CMat getMatrixA() { return A;  }         // matrix for decomposition
+    CMat getMatrixQ() { return Q;  }         // unitary matrix Q
+    CMat getMatrixS() { return S;  }         // Schur matrix (upper triangular) S
+    CMat getRitz()    { return S.diagonal(); }
+
+    /**
+     * Checks the Schur decomposition A = Q^\dag S Q holds for the computed matrices. Returns if the relative 
+     * Frobenius norm || A - Q^\dag S Q || / || A || is less than rtol. 
+     */
+    bool checkDecomposition(RealD rtol = 1e-8) {
+      RealD Anorm = A.norm();
+      if (Anorm < rtol) {
+        std::cout << GridLogMessage << "Zero matrix" << std::endl;
+        return true;
+      }
+
+      std::cout << GridLogDebug << "S = " << std::endl << S << std::endl;
+      std::cout << GridLogDebug << "Q = " << std::endl << Q << std::endl;
+
+      CMat A2 = Q.adjoint() * S * Q;
+      std::cout << GridLogDebug << "Q^dag S Q = " << std::endl << A2 << std::endl;
+      RealD dA = (A - A2).norm() / Anorm;
+      return (dA < rtol);
+    }
+
+    /**
+     * Swaps the components on the diagonal of the Schur matrix at index i with index i + 1. 
+     * Updates the orthogonal matrix Q accordingly. 
+     */
+    void swapEvals(int i) {
+
+      assert(0 <= i && i <= Nm - 1);        // can only swap blocks with upper left index between 0 and Nm - 1
+
+      // get parameters for rotation
+      s    = S(i, i+1);
+      lam1 = S(i, i);
+      lam2 = S(i+1, i+1);
+      phi  = s / std::abs(s);
+      r    = std::sqrt(std::pow(std::abs(s), 2) + std::pow(std::abs(lam2 - lam1), 2));
+
+      // compute Givens rotation corresponding to these parameters
+      Givens = CMat::Identity(Nm, Nm);
+      Givens(i, i)     = std::abs(s) / r;
+      Givens(i+1, i+1) = Givens(i, i);
+      Givens(i, i+1)   = (phi / r) * std::conj(lam2 - lam1);
+      Givens(i+1, i)   = -std::conj(Givens(i, i+1));
+      
+      // rotate Schur matrix and unitary change of basis matrix Q
+      S = Givens * S * Givens.adjoint();
+      Q = Givens * Q;
+
+      return;
+    }
+
+    /**
+     * Reorders a Schur matrix &Schur to have the Ritz values that we would like to keep for 
+     * restart as the first Nk elements on the diagonal. 
+     * 
+     * This algorithm is implemented as Nk iterations of a a reverse bubble sort with comparator compare.
+     * TODO: pass in compare function as an argument, default to compare with <. 
+     */
+    // void schurReorder(int Nk, std::function compare) {
+    void schurReorder(int Nk) {
+      for (int i = 0; i < Nk; i++) {
+        for (int k = 0; k <= Nm - 2; k++) {
+          int idx = Nm - 2 - k;
+          // TODO use RitzFilter enum here
+          // if (std::abs(S(idx, idx)) < std::abs(S(idx+1, idx+1))) {            // sort by largest modulus of eigenvalue
+          // if (std::real(S(idx, idx)) > std::real(S(idx+1, idx+1))) {        // sort by smallest real eigenvalue
+          if ( cCompare(S(idx+1, idx+1), S(idx, idx)) ) {            // sort by largest modulus of eigenvalue
+            swapEvals(idx);
+          }
+        }
+      }
+      return;
+    }
+
+    void schurReorderBlock() {
+      // TODO method stub
+      return;
+    }
+
+};
+
+/**
+ * Implementation of the Krylov-Schur algorithm.
+ */
+template<class Field> 
+class KrylovSchur {
+
+  private:
+  
+    std::string cname = std::string("KrylovSchur");
+    int MaxIter;   // Max iterations
+    RealD Tolerance;
+    RealD ssq;
+    RealD rtol;
+    int Nm;           // Number of basis vectors to track (equals MaxIter if no restart)
+    int Nk;           // Number of basis vectors to keep every restart (equals -1 if no restart)
+    int Nstop;       // Stop after converging Nstop eigenvectors.
+
+    LinearOperatorBase<Field> &Linop;
+    GridBase *Grid;
+
+    RealD approxLambdaMax;
+    RealD beta_k;
+    Field u;                                // Residual vector perpendicular to Krylov space (u_{k+1} in notes)
+    Eigen::VectorXcd b;                     // b vector in Schur decomposition (e_{k+1} in Arnoldi).
+    std::vector<Field> basis;               // orthonormal Krylov basis
+    Eigen::MatrixXcd Rayleigh;              // Rayleigh quotient of size Nbasis (after construction)
+    Eigen::MatrixXcd Qt;                    // Transpose of basis rotation which projects out high modes.
+
+    Eigen::VectorXcd evals;                 // evals of Rayleigh quotient
+    Eigen::MatrixXcd littleEvecs;           // Nm x Nm evecs matrix
+    std::vector<Field> evecs;               // Vector of evec fields
+
+    RitzFilter ritzFilter;                        // how to sort evals
+
+  public:       
+
+    KrylovSchur(LinearOperatorBase<Field> &_Linop, GridBase *_Grid, RealD _Tolerance, RitzFilter filter = EvalReSmall)
+      : Linop(_Linop), Grid(_Grid), Tolerance(_Tolerance), ritzFilter(filter), u(_Grid), MaxIter(-1), Nm(-1), Nk(-1), Nstop (-1),
+        evals (0), evecs (), ssq (0.0), rtol (0.0), beta_k (0.0), approxLambdaMax (0.0)
+    {
+      u = Zero();
+    };
+
+
+    /* Getters */
+    Eigen::MatrixXcd    getRayleighQuotient()  { return Rayleigh; }
+    Field               getU()                 { return u;        }
+    std::vector<Field>  getBasis()             { return basis;    }
+    Eigen::VectorXcd    getEvals()             { return evals;    }
+    std::vector<Field>  getEvecs()             { return evecs;    }
+
+    /**
+     * Runs the Krylov-Schur loop.
+     *   - Runs an Arnoldi step to generate the Rayleigh quotient and Krylov basis.
+     *   - Schur decompose the Rayleigh quotient. 
+     *   - Permutes the Rayleigh quotient according to the eigenvalues. 
+     *   - Truncate the Krylov-Schur expansion. 
+     */
+    void operator()(const Field& v0, int _maxIter, int _Nm, int _Nk, int _Nstop, bool doubleOrthog = false) {
+      MaxIter = _maxIter;
+      Nm = _Nm; Nk = _Nk;
+      Nstop = _Nstop;
+      
+      ssq = norm2(v0);
+      RealD approxLambdaMax = approxMaxEval(v0);
+      rtol = Tolerance * approxLambdaMax;
+      // rtol = Tolerance;
+
+      b = Eigen::VectorXcd::Zero(Nm);       // start as e_{k+1}
+      b(Nm-1) = 1.0;
+
+      int start = 0;
+      Field startVec = v0;
+      littleEvecs = Eigen::MatrixXcd::Zero(Nm, Nm);
+      for (int i = 0; i < MaxIter; i++) {
+        std::cout << GridLogMessage << "Restart Iteration " << i << std::endl;
+
+        // Perform Arnoldi steps to compute Krylov basis and Rayleigh quotient (Hess)
+        arnoldiIteration(startVec, Nm, start, doubleOrthog);
+        startVec = u;        // original code
+        start = Nk;
+
+        // checkKSDecomposition();
+
+        // Perform a Schur decomposition on Rayleigh
+        // ComplexSchurDecomposition schur (Rayleigh, false);
+        ComplexSchurDecomposition schur (Rayleigh, false, ritzFilter);
+        std::cout << GridLogDebug << "Schur decomp holds? " << schur.checkDecomposition() << std::endl;
+
+        // Rearrange Schur matrix so wanted evals are on top left (like MATLAB's ordschur)
+        std::cout << GridLogMessage << "Reordering Schur eigenvalues" << std::endl;
+        schur.schurReorder(Nk);
+        Eigen::MatrixXcd Q = schur.getMatrixQ();
+        Qt = Q.adjoint();                           // TODO should Q be real?
+        Eigen::MatrixXcd S = schur.getMatrixS();
+        // std::cout << GridLogDebug << "Schur decomp holds after reorder? " << schur.checkDecomposition() << std::endl;
+
+        std::cout << GridLogMessage << "*** ROTATING TO SCHUR BASIS *** " << std::endl;
+
+        // Rotate Krylov basis, b vector, redefine Rayleigh quotient and evecs, and truncate.
+        Rayleigh = schur.getMatrixS();
+        b = Q * b;            // b^\dag = b^\dag * Q^\dag <==> b = Q*b
+        // basisRotate(basis, Q, 0, Nm, 0, Nm, Nm);
+        // basisRotate(evecs, Q, 0, Nm, 0, Nm, Nm);
+
+        std::vector<Field> basis2; 
+        constructUR(basis2, basis, Qt, Nm);
+        basis = basis2;
+
+        // std::vector<Field> evecs2;
+        // constructUR(evecs2, evecs, Qt, Nm);
+        // constructRU(evecs2, evecs, Q, Nm);
+        // evecs = evecs2;
+        // littleEvecs = littleEvecs * Q.adjoint();      // TODO try this and see if it works
+        // littleEvecs = Q * littleEvecs;      // TODO try this and see if it works
+        // std::cout << GridLogDebug << "Ritz vectors rotated correctly? " << checkEvecRotation() << std::endl;
+
+        // checkKSDecomposition();
+
+        std::cout << GridLogMessage << "*** TRUNCATING FOR RESTART *** " << std::endl;
+
+        Rayleigh = Rayleigh(Eigen::seqN(0, Nk), Eigen::seqN(0, Nk));
+        basis = std::vector<Field> (basis.begin(), basis.begin() + Nk);
+        // evecs = std::vector<Field> (evecs.begin(), evecs.begin() + Nk);
+        // littleEvecs = littleEvecs(Eigen::seqN(0, Nk), Eigen::seqN(0, Nk));
+        Eigen::VectorXcd btmp = b.head(Nk);             // careful about how you slice Eigen::VectorXcd!
+        b = btmp;
+
+        checkKSDecomposition();
+
+        // Compute eigensystem of Rayleigh. Note the eigenvectors correspond to the sorted eigenvalues.
+        std::cout << GridLogDebug << "S: " << std::endl << S << std::endl;
+        // computeEigensystem(S);
+        computeEigensystem(Rayleigh);
+        std::cout << GridLogMessage << "Eigenvalues (first Nk sorted): " << std::endl << evals << std::endl;
+
+        // check convergence and return if needed. 
+        int Nconv = converged();
+        std::cout << GridLogMessage << "Number of evecs converged: " << Nconv << std::endl;
+        if (Nconv >= Nstop || i == MaxIter - 1) {
+          std::cout << GridLogMessage << "Converged with " << Nconv << " / " << Nstop << " eigenvectors on iteration " 
+                        << i << "." << std::endl;
+          basisRotate(evecs, Qt, 0, Nk, 0, Nk, Nm);
+          std::cout << GridLogMessage << "Eigenvalues: " << evals << std::endl;
+          return;
+        }
+      }
+    }
+
+    /**
+     * Constructs the Arnoldi basis for the Krylov space K_n(D, src). (TODO make private)
+     * 
+     * Parameters
+     * ----------
+     * v0 : Field&
+     *  Source to generate Krylov basis. 
+     * Nm : int
+     *  Final size of the basis desired. If the basis becomes complete before a basis of size Nm is constructed 
+     *  (determined by relative tolerance Tolerance), stops iteration there. 
+     * doubleOrthog : bool (default = false)
+     *  Whether to double orthogonalize the basis (for numerical cancellations) or not. 
+     * start        : int (default = 0)
+     *  If non-zero, assumes part of the Arnoldi basis has already been constructed. 
+     */
+    void arnoldiIteration(const Field& v0, int Nm, int start = 0, bool doubleOrthog = false)
+    {
+
+      ComplexD coeff;
+      Field w (Grid);           // A acting on last Krylov vector. 
+
+      if (start == 0) {       // initialize everything that we need.
+        RealD v0Norm = 1 / std::sqrt(ssq);
+        basis.push_back(v0Norm * v0);                // normalized source
+
+        Rayleigh = Eigen::MatrixXcd::Zero(Nm, Nm);
+        u = Zero();
+      } else {
+        assert( start == basis.size() );      // should be starting at the end of basis (start = Nk)
+        std::cout << GridLogMessage << "Resetting Rayleigh and b" << std::endl;
+        Eigen::MatrixXcd RayleighCp = Rayleigh;
+        Rayleigh = Eigen::MatrixXcd::Zero(Nm, Nm);
+        Rayleigh(Eigen::seqN(0, Nk), Eigen::seqN(0, Nk)) = RayleighCp;
+
+        // append b^\dag to Rayleigh, add u to basis
+        Rayleigh(Nk, Eigen::seqN(0, Nk)) = b.adjoint();
+        basis.push_back(u);
+        b = Eigen::VectorXcd::Zero(Nm);
+      }
+
+      // Construct next Arnoldi vector by normalizing w_i = Dv_i - \sum_j v_j h_{ji}
+      for (int i = start; i < Nm; i++) {
+        Linop.Op(basis.back(), w);
+        for (int j = 0; j < basis.size(); j++) {
+          coeff = innerProduct(basis[j], w);       // coeff = h_{ij}. Note that since {vi} is ONB it's OK to subtract it off after. 
+          Rayleigh(j, i) = coeff;
+          w -= coeff * basis[j];
+        }
+
+        if (doubleOrthog) {
+          // TODO implement
+        }
+
+        // add w_i to the pile
+        if (i < Nm - 1) {
+          coeff = std::sqrt(norm2(w));
+          Rayleigh(i+1, i) = coeff;
+          basis.push_back(
+            (1.0/coeff) * w
+          );
+        }
+
+        // after iterations, update u and beta_k = ||u|| before norm
+        u = w;                                // make sure u is normalized
+        beta_k = std::sqrt(norm2(u));         // beta_k = ||f_k|| determines convergence.
+        u = (1/beta_k) * u;
+      }
+
+      b(Nm - 1) = beta_k;
+
+      // std::cout << GridLogMessage << "|f|^2 after Arnoldi step = " << norm2(f) << std::endl;
+      std::cout << GridLogMessage << "beta_k = |u| (before norm) after Arnoldi step = " << beta_k << std::endl;
+      std::cout << GridLogDebug << "Computed Rayleigh quotient = " << std::endl << Rayleigh << std::endl;
+
+      return;
+    }
+
+    /**
+     * Approximates the eigensystem of the linear operator by computing the eigensystem of 
+     * the Rayleigh quotient. Assumes that the Rayleigh quotient has already been constructed (by 
+     * calling the operator() function).
+     * 
+     * TODO implement in parent class eventually.
+     * 
+     * Parameters
+     * ----------
+     * Eigen::MatrixXcd& S
+     *  Schur matrix (upper triangular) similar to original Rayleigh quotient.
+     */
+    void computeEigensystem(Eigen::MatrixXcd& S)
+    {
+      std::cout << GridLogMessage << "Computing eigenvalues." << std::endl;
+
+      evecs.clear();
+      evals = S.diagonal();
+
+      // TODO: is there a faster way to get the eigenvectors of a triangular matrix?
+      // Rayleigh.triangularView<Eigen::Upper> tri;
+
+      Eigen::ComplexEigenSolver<Eigen::MatrixXcd> es;
+      // es.compute(Rayleigh);
+      es.compute(S);
+      littleEvecs = es.eigenvectors();
+
+      std::cout << GridLogDebug << "Little evecs: " << littleEvecs << std::endl;
+
+      // Convert evecs to lattice fields
+      for (int k = 0; k < evals.size(); k++) {
+        Eigen::VectorXcd vec = littleEvecs.col(k);
+        Field tmp (basis[0].Grid());
+        tmp = Zero();
+        for (int j = 0; j < basis.size(); j++) {
+          tmp = tmp + vec[j] * basis[j];
+        }
+        evecs.push_back(tmp);
+      }
+    }
+
+    /**
+     * Approximates the maximum eigenvalue of Linop.Op to normalize the residual and test for convergence. 
+     * 
+     * TODO implement in parent class eventually
+     * 
+     * Parameters
+     * ----------
+     * Field& v0
+     *  Source field to start with. Must have non-zero norm.
+     * int MAX_ITER (default = 50)
+     *  Maximum number of iterations for power approximation. 
+     * 
+     * Returns
+     * -------
+     * RealD lamApprox
+     *  Approximation of largest eigenvalue. 
+     */
+    RealD approxMaxEval(const Field& v0, int MAX_ITER = 50) {
+      assert (norm2(v0) > 1e-8);                        // must have relatively large source norm to start
+      RealD lamApprox = 0.0;
+      RealD denom = 1.0; RealD num = 1.0;
+      Field v0cp (Grid); Field tmp (Grid);
+      v0cp = v0;
+      denom = std::sqrt(norm2(v0cp));
+      for (int i = 0; i < MAX_ITER; i++) {
+        Linop.Op(v0cp, tmp);                               // CAREFUL: do not do Op(tmp, tmp)
+        v0cp = tmp;
+        num = std::sqrt(norm2(v0cp));                      // num = |A^{n+1} v0|
+        lamApprox = num / denom;                           // lam = |A^{n+1} v0| / |A^n v0|
+        std::cout << GridLogDebug << "Approx for max eval: " << lamApprox << std::endl;
+        denom = num;                                       // denom = |A^{n} v0|
+      }
+      return lamApprox;
+    }
+
+    /**
+     * Computes the number of Krylov-Schur eigenvectors that have converged. An eigenvector s is considered converged 
+     * for a tolerance epsilon if 
+     *    r(s) := |\beta e_m^T s| < epsilon
+     * where beta is the norm of f_{m+1}.
+     * 
+     * TODO implement in parent class eventually
+     * 
+     * Parameters
+     * ----------
+     * 
+     * Returns
+     * -------
+     * int : Number of converged eigenvectors.
+     */
+    int converged() {
+      int Nconv = 0;
+      std::cout << GridLogDebug << "b: " << b << std::endl;
+      Field tmp (Grid); Field fullEvec (Grid);
+      for (int k = 0; k < evecs.size(); k++) {
+        Eigen::VectorXcd evec_k = littleEvecs.col(k);
+        RealD ritzEstimate = std::abs(b.dot(evec_k));           // b^\dagger s
+        std::cout << GridLogMessage << "Ritz estimate for evec " << k << " = " << ritzEstimate << std::endl;
+        if (ritzEstimate < rtol) {
+          Nconv++;
+        }
+
+      }
+      // Check that Ritz estimate is explicitly || D (Uy) - lambda (Uy) ||
+      // checkRitzEstimate();
+      return Nconv;
+    }
+
+    /**
+     * Checks the Krylov-Schur decomposition DU = UR + f b^\dag with the last-computed 
+     * U, R, f, and b. 
+     */
+    bool checkKSDecomposition(RealD tol = 1e-8) {
+
+      std::cout << GridLogMessage << "*** CHECKING KRYLOV-SCHUR DECOMPOSITION *** " << std::endl;
+
+      int k = basis.size();         // number of basis vectors, also the size of Rayleigh.
+
+      // rotate basis by Rayleigh to construct UR
+      std::vector<Field> rotated = basis;
+      Eigen::MatrixXcd Rt = Rayleigh.adjoint();
+      constructUR(rotated, basis, Rayleigh, k);             // manually rotate
+      // basisRotate(rotated, Rt, 0, k, 0, k, k);           // UR
+
+      // TODO: make a new function that I'm positive does what this is doing
+      // just take the basis U = (u1 u2 ... uNm) and form the linear combination UR from R
+      
+      // For each i, form D u(i) and subtract off (US - u b^\dag)(i)
+      RealD delta = 0.0; RealD deltaSum = 0;
+      Field tmp (Grid); tmp = Zero();
+      for (int i = 0; i < k; i++) {
+        Linop.Op(basis[i], tmp);          // tmp = D u(i)
+        delta = norm2(tmp - rotated[i] - u * std::conj(b(i)));
+        delta = delta / norm2(tmp);               // relative tolerance
+        deltaSum += delta;
+
+        std::cout << GridLogDebug << "Iteration " << i << std::endl;
+        std::cout << GridLogDebug << "Du = " << norm2(tmp) << std::endl;
+        std::cout << GridLogDebug << "rotated = " << norm2(rotated[i]) << std::endl;
+        std::cout << GridLogDebug << "b[i] = " << b(i) << std::endl;
+        std::cout << GridLogMessage << "Deviation in decomp, column " << i << ": " << delta << std::endl;
+      }
+      std::cout << GridLogMessage << "Squared sum of relative deviations in decomposition: " << deltaSum << std::endl;
+
+      // std::cout << "[DEBUG] testing basis rotate" << std::endl;
+      // std::vector<Field> rotated2;
+      // constructUR(rotated2, basis, Rayleigh, k);
+      // for (int i = 0; i < k; i++) {
+      //   std::cout << "rotated[i] - UR[i] = " << norm2(rotated[i] - rotated2[i]) << std::endl;
+      // }
+
+      return deltaSum < tol;
+    }
+
+    /**
+     * Checks the Ritz vector s was rotated correctly by explicitly recomputing the 
+     * eigenvectors of the rotated Rayleigh quotient. 
+     */
+    bool checkRitzRotation(RealD tol = 1e-8) {
+      std::cout << GridLogMessage << "*** CHECKING RITZ VECTOR ROTATION *** " << std::endl;
+
+      Eigen::ComplexEigenSolver<Eigen::MatrixXcd> es;
+      es.compute(Rayleigh);
+      Eigen::MatrixXcd littleEvecs2 = es.eigenvectors();
+      RealD dLittle = (littleEvecs2 - littleEvecs).norm() / littleEvecs.norm();
+      std::cout << GridLogMessage << "|littleEvecs2 - littleEvecs| / |littleEvecs| = " << dLittle << std::endl;
+
+      std::cout << GridLogMessage << "Forming full eigenvectors" << std::endl;
+      RealD delta = 0.0; RealD deltaSum = 0;
+      for (int k = 0; k < evals.size(); k++) {
+        Eigen::VectorXcd vec = littleEvecs.col(k);
+        Field tmpEvec (Grid);
+        tmpEvec = Zero();
+        for (int j = 0; j < basis.size(); j++) {
+          tmpEvec = tmpEvec + vec[j] * basis[j];
+        }
+        delta = norm2(tmpEvec - evecs[k]) / norm2(evecs[k]);
+        std::cout << GridLogMessage << "Deviation in evec " << k << ": " << delta << std::endl;
+        deltaSum += delta;
+      }
+      return deltaSum < tol;
+    }
+
+    /**
+     * Checks the Ritz estimate R(s) is indeed the deviation of a Ritz eigenvector from being a true eigenvector. 
+     */
+    void checkRitzEstimate(RealD tol = 1e-8) {
+      std::cout << GridLogMessage << "*** CHECKING RITZ ESTIMATE *** " << std::endl;
+      Field tmp (Grid);
+      for (int k = 0; k < evecs.size(); k++) {
+        tmp = Zero();
+        Linop.Op(evecs[k], tmp);        // D evec
+        RealD ritz = std::sqrt(norm2(tmp - evals[k] * evecs[k]));
+        std::cout << "Ritz estimate " << k << " = " << ritz << std::endl;
+      }
+      return;
+    }
+
+    /**
+     * Given a vector of fields U (equivalently, a LxN matrix, where L is the number of degrees of 
+     * freedom on the lattice field) and an NxN matrix R, forms the product UR. 
+     * 
+     * Note that I believe this is equivalent to basisRotate(U, R.adjoint(), 0, N, 0, N, N), but I'm 
+     * not 100% sure (this will be slower and unoptimized though).
+     */
+    void constructUR(std::vector<Field>& UR, std::vector<Field> &U, Eigen::MatrixXcd& R, int N) {
+      Field tmp (Grid);
+      UR.clear();
+      for (int i = 0; i < N; i++) {
+        tmp = Zero();
+        for (int j = 0; j < N; j++) {
+          tmp = tmp + U[j] * R(j, i);
+        }
+        UR.push_back(tmp);
+      }
+      return;
+    }
+
+    /**
+     * Same as constructUR but for the product order RU. 
+     */
+    void constructRU(std::vector<Field>& RU, std::vector<Field> &U, Eigen::MatrixXcd& R, int N) {
+      Field tmp (Grid);
+      RU.clear();
+      for (int i = 0; i < N; i++) {
+        tmp = Zero();
+        for (int j = 0; j < N; j++) {
+          tmp = tmp + R(i, j) * U[j];
+        }
+        RU.push_back(tmp);
+      }
+      return;
+    }
+
+};
+    
+NAMESPACE_END(Grid);
+#endif