Adding Claude related files

New CLAUDE controllable visualiser
New file for animation in MD time direction
2026-04-26 05:26:01 +01:00 · 2026-04-21 10:41:18 -04:00 · 2026-04-10 11:23:25 -04:00 · 2026-04-02 13:55:38 -04:00
31 changed files with 3245 additions and 7158 deletions
@@ -75,7 +75,6 @@ NAMESPACE_CHECK(BiCGSTAB);
 #include <Grid/algorithms/iterative/FlexibleCommunicationAvoidingGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/iterative/SimpleLanczos.h>
 #include <Grid/algorithms/iterative/PowerMethod.h>
 #include <Grid/algorithms/iterative/AdefGeneric.h>
 #include <Grid/algorithms/iterative/AdefMrhs.h>
@@ -84,9 +83,4 @@ NAMESPACE_CHECK(PowerMethod);
 NAMESPACE_CHECK(multigrid);
 #include <Grid/algorithms/FFT.h>
 #include <Grid/algorithms/iterative/KrylovSchur.h>
 #include <Grid/algorithms/iterative/Arnoldi.h>
 #include <Grid/algorithms/iterative/LanczosBidiagonalization.h>
 #include <Grid/algorithms/iterative/RestartedLanczosBidiagonalization.h>
 #endif
@@ -1,433 +0,0 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid 
 Source file: ./lib/algorithms/iterative/Arnoldi.h
 Copyright (C) 2015
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: paboyle <paboyle@ph.ed.ac.uk>
 Author: Patrick Oare <poare@bnl.gov>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution directory
 *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_ARNOLDI_H
 #define GRID_ARNOLDI_H
 NAMESPACE_BEGIN(Grid); 
 //Moved to KrylovSchur
 #if 0
 /**
 <<<<<<< HEAD
 * Options for which Ritz values to keep in implicit restart.
 */
 enum RitzFilter {
  EvalNormSmall,           // Keep evals with smallest norm
  EvalNormLarge,           // Keep evals with largest norm
  EvalReSmall,             // Keep evals with smallest real part
  EvalReLarge              // Keep evals with largest real part
 };
 // Select comparison function from RitzFilter
 struct ComplexComparator
 {
  RitzFilter f;
  ComplexComparator (RitzFilter _f) : f(_f) {}
  bool operator()(std::complex<double> z1, std::complex<double> z2) { 
    switch (f) {
      RealD tmp1, tmp2;
      tmp1=std::abs(std::imag(z1));
      tmp2=std::abs(std::imag(z2));
      case EvalNormSmall:
        return std::abs(z1) < std::abs(z2);
      case EvalNormLarge:
        return std::abs(z1) > std::abs(z2);
 // Terrible hack
 //        return std::abs(std::real(z1)) < std::abs(std::real(z2));
 //	if ( std::abs(std::real(z1))  >4.) tmp1 +=1.;
 //	if ( std::abs(std::real(z2))  >4.) tmp2 +=1.;
      case EvalReSmall:
 	  return tmp1 < tmp2;
 //        return std::abs(std::imag(z1)) < std::abs(std::imag(z2));
      case EvalReLarge:
 	  return tmp1 > tmp2;
 //        return std::abs(std::real(z1)) > std::abs(std::real(z2));
      default:
        assert(0);
    }
  }
 };
 =======
 >>>>>>> 68af1bba67dd62881ead5ab1e54962a5486a0791
 #endif
 /**
 * Implementation of the Arnoldi algorithm.
 */
 template<class Field> 
 class Arnoldi {
  private:
    std::string cname = std::string("Arnoldi");
    int MaxIter;   // Max iterations
    RealD Tolerance;
    RealD ssq;
    RealD rtol;
    int Nm;           // Number of basis vectors to track (equals MaxIter if no restart)
    int Nk;           // Number of basis vectors to keep every restart (equals -1 if no restart)
    int Nstop;       // Stop after converging Nstop eigenvectors.
    LinearOperatorBase<Field> &Linop;
    GridBase *Grid;
    RealD approxLambdaMax;
    RealD beta_k;
    Field f;
    std::vector<Field> basis;               // orthonormal Arnoldi basis
    Eigen::MatrixXcd Hess;                  // Hessenberg matrix of size Nbasis (after construction)
    Eigen::MatrixXcd Qt;                    // Transpose of basis rotation which projects out high modes.
    Eigen::VectorXcd evals;                 // evals of Hess
    Eigen::MatrixXcd littleEvecs;           // Nm x Nm evecs matrix
    std::vector<Field> evecs;               // Vector of evec fields
    RitzFilter ritzFilter;                        // how to sort evals
  public:       
    Arnoldi(LinearOperatorBase<Field> &_Linop, GridBase *_Grid, RealD _Tolerance, RitzFilter filter = EvalReSmall)
      : Linop(_Linop), Grid(_Grid), Tolerance(_Tolerance), ritzFilter(filter), f(_Grid), MaxIter(-1), Nm(-1), Nk(-1), 
          Nstop (-1), evals (0), evecs (), ssq (0.0), rtol (0.0), beta_k (0.0), approxLambdaMax (0.0)
    {
      f = Zero();
    };
    /**
     * Runs the Arnoldi loop with(out) implicit restarting. For each iteration:
     *   - Runs an Arnoldi step.
     *   - Computes the eigensystem of the Hessenberg matrix.
     *   - Performs implicit restarting.
     */
    void operator()(const Field& v0, int _maxIter, int _Nm, int _Nk, int _Nstop, bool doubleOrthog = false) {
      MaxIter = _maxIter;
      Nm = _Nm; Nk = _Nk;
      Nstop = _Nstop;
      ssq = norm2(v0);
      RealD approxLambdaMax = approxMaxEval(v0);
      rtol = Tolerance * approxLambdaMax;
      ComplexComparator compareComplex (ritzFilter);
      std::cout << GridLogMessage << "Comparing Ritz values with: " << ritzFilter << std::endl;
      int start = 1;
      Field startVec = v0;
      littleEvecs = Eigen::MatrixXcd::Zero(Nm, Nm);
      for (int i = 0; i < MaxIter; i++) {
        std::cout << GridLogMessage << "Restart Iteration " << i << std::endl;
        // Perform Arnoldi steps to compute Krylov basis and Rayleigh quotient (Hess)
        arnoldiIteration(startVec, Nm, start, doubleOrthog);
        startVec = f;
        // compute eigensystem and sort evals
        // compute_eigensystem();
        compute_eigensystem(Hess);
        std::cout << GridLogMessage << "Eigenvalues after Arnoldi step: " << std::endl << evals << std::endl;
        std::sort(evals.begin(), evals.end(), compareComplex);
        std::cout << GridLogMessage << "Ritz values after sorting (first Nk preserved): " << std::endl << evals << std::endl;
        // SU(N)::tepidConfiguration
        // Implicit restart to de-weight unwanted eigenvalues
        implicitRestart(_Nm, _Nk);      // probably can delete _Nm and _Nk from function args
        start = Nk;
        // check convergence and return if needed.
        int Nconv = converged();
        std::cout << GridLogMessage << "Number of evecs converged: " << Nconv << std::endl;
        if (Nconv >= Nstop || i == MaxIter - 1) {
          std::cout << GridLogMessage << "Converged with " << Nconv << " / " << Nstop << " eigenvectors on iteration " 
                        << i << "." << std::endl;
          basisRotate(evecs, Qt, 0, Nk, 0, Nk, Nm);
          std::cout << GridLogMessage << "Eigenvalues [first " << Nconv << " converged]: " << std::endl << evals << std::endl;
          return;
        }
      }      
    }
    /**
     * Approximates the maximum eigenvalue of Linop.Op to normalize the residual and test for convergence. 
     * 
     * Parameters
     * ----------
     * Field& v0
     *  Source field to start with. Must have non-zero norm.
     * int MAX_ITER (default = 50)
     *  Maximum number of iterations for power approximation. 
     * 
     * Returns
     * -------
     * RealD lamApprox
     *  Approximation of largest eigenvalue. 
     */
    RealD approxMaxEval(const Field& v0, int MAX_ITER = 50) {
      assert (norm2(v0) > 1e-8);                        // must have relatively large source norm to start
      RealD lamApprox = 0.0;
      RealD denom = 1.0; RealD num = 1.0;
      Field v0cp (Grid); Field tmp (Grid);
      v0cp = v0;
      denom = std::sqrt(norm2(v0cp));
      for (int i = 0; i < MAX_ITER; i++) {
        Linop.Op(v0cp, tmp);                               // CAREFUL: do not do Op(tmp, tmp)
        v0cp = tmp;
        num = std::sqrt(norm2(v0cp));                      // num = |A^{n+1} v0|
        lamApprox = num / denom;                           // lam = |A^{n+1} v0| / |A^n v0|
        std::cout << GridLogDebug << "Approx for max eval: " << lamApprox << std::endl;
        denom = num;                                       // denom = |A^{n} v0|
      }
      return lamApprox;
    }
    /**
     * Constructs the Arnoldi basis for the Krylov space K_n(D, src). (TODO make private)
     * 
     * Parameters
     * ----------
     * v0 : Field&
     *  Source to generate Krylov basis. 
     * Nm : int
     *  Final size of the basis desired. If the basis becomes complete before a basis of size Nm is constructed 
     *  (determined by relative tolerance Tolerance), stops iteration there. 
     * doubleOrthog : bool (default = false)
     *  Whether to double orthogonalize the basis (for numerical cancellations) or not. 
     * start        : int (default = 0)
     *  If non-zero, assumes part of the Arnoldi basis has already been constructed. 
     */
    void arnoldiIteration(const Field& v0, int Nm, int start = 1, bool doubleOrthog = false)
    {
      ComplexD coeff;
      Field w (Grid);           // A acting on last Krylov vector. 
      if (start == 1) {       // initialize everything that we need.
        RealD v0Norm = 1 / std::sqrt(ssq);
        basis.push_back(v0Norm * v0);                // normalized source
        Hess = Eigen::MatrixXcd::Zero(Nm, Nm);
        f = Zero();
      } else {
        assert( start == basis.size() );      // should be starting at the end of basis (start = Nk)
        Eigen::MatrixXcd HessCp = Hess;
        Hess = Eigen::MatrixXcd::Zero(Nm, Nm);
        Hess(Eigen::seqN(0, Nk), Eigen::seqN(0, Nk)) = HessCp;
      }
      // Construct next Arnoldi vector by normalizing w_i = Dv_i - \sum_j v_j h_{ji}
      for (int i = start - 1; i < Nm; i++) {
        Linop.Op(basis.back(), w);
        for (int j = 0; j < basis.size(); j++) {
          coeff = innerProduct(basis[j], w);       // coeff = h_{ij}. Note that since {vi} is ONB it's OK to subtract it off after. 
          Hess(j, i) = coeff;
          w -= coeff * basis[j];
        }
        if (doubleOrthog) {
          // TODO implement
        }
        // add w_i to the pile
        if (i < Nm - 1) {
          coeff = std::sqrt(norm2(w));
          Hess(i+1, i) = coeff;
          basis.push_back(
            (1.0/coeff) * w
          );
        }
        // after iterations, update f and beta_k = ||f||
        f = w;                                // make sure f is not normalized
        beta_k = std::sqrt(norm2(f));         // beta_k = ||f_k|| determines convergence.
      }
      std::cout << GridLogMessage << "|f|^2 after Arnoldi step = " << norm2(f) << std::endl;
      std::cout << GridLogDebug << "Computed Hessenberg matrix = " << std::endl << Hess << std::endl;
      return;
    }
    /**
     * Approximates the eigensystem of the linear operator by computing the eigensystem of 
     * the Hessenberg matrix. Assumes that the Hessenberg matrix has already been constructed (by 
     * calling the operator() function).
     * 
     * TODO implement in parent class eventually.
     * 
     * Parameters
     * ----------
     * Eigen::MatrixXcd& S
     *  Schur matrix (upper triangular) similar to original Rayleigh quotient.
     */
    void compute_eigensystem(Eigen::MatrixXcd& S)
    {
      std::cout << GridLogMessage << "Computing eigenvalues." << std::endl;
      evecs.clear();
      Eigen::ComplexEigenSolver<Eigen::MatrixXcd> es;
      es.compute(S);
      evals = es.eigenvalues();
      littleEvecs = es.eigenvectors();
      // Convert evecs to lattice fields
      for (int k = 0; k < evals.size(); k++) {
        Eigen::VectorXcd vec = littleEvecs.col(k);
        Field tmp (basis[0].Grid());
        tmp = Zero();
        for (int j = 0; j < basis.size(); j++) {
          tmp = tmp + vec[j] * basis[j];
        }
        evecs.push_back(tmp);
      }
      std::cout << GridLogMessage << "Eigenvalues: " << std::endl << evals << std::endl;
    }
    /**
     * Verifies the factorization DV = V^\dag H + f e^\dag with the last-computed 
     * V, H, f. 
     */
    // RealD verifyFactorization() {
    //   int k = basis.size();         // number of basis vectors, also the size of H.
    //   std::vector<Field> factorized (k, Zero());
    //   Field tmp (FGrid); tmp = Zero();
    //   for (int i = 0; i < basis.size(); i++) {
    //     Linop.Op(basis[i], tmp);
    //   }
    //   // basisRotate(basis, Q, 0, Nk, 0, Nk, Nm);
    //   // Linop.Op(, )
    // }
    /* Getters */
    Eigen::MatrixXcd    getHessenbergMat()  { return Hess; }
    Field               getF()              { return f; }
    std::vector<Field>  getBasis()          { return basis; }
    Eigen::VectorXcd    getEvals()          { return evals; }
    std::vector<Field>  getEvecs()          { return evecs; }
    /**
     * Implements implicit restarting for Arnoldi. Assumes eigenvalues are sorted. 
     * 
     * Parameters
     * ----------
     * int _Nm
     *  Size of basis to keep (Hessenberg is MxM).
     * int Nk
     *  Number of basis vectors to keep at each restart.
     */
    void implicitRestart(int _Nm, int _Nk) {
      assert ( _Nk <= _Nm );
      Nm = _Nm; Nk = _Nk;
      int Np = Nm - Nk;       // keep Nk smallest (or largest, depends on sort function) evecs
      std::cout << GridLogMessage << "Computing QR Factorizations." << std::endl;
      Eigen::MatrixXcd Q = Eigen::MatrixXcd::Identity(Nm, Nm);
      Eigen::MatrixXcd Qi (Nm, Nm);
      Eigen::MatrixXcd R (Nm, Nm);
      for (int i = Nk; i < Nm; i++) {        // keep the first Nk eigenvalues and iterate through the last Np. Should loop Np times
        // Useful debugging output
        std::cout << GridLogDebug << "Computing QR factorization for i = " << i << std::endl;
        std::cout << GridLogDebug << "Eval shift = " << evals[i] << std::endl;
        std::cout << GridLogDebug << "Hess before rotation: " << Hess << std::endl;
        // QR factorize 
        Eigen::HouseholderQR<Eigen::MatrixXcd> QR (Hess - evals[i] * Eigen::MatrixXcd::Identity(Nm, Nm));
        Qi = QR.householderQ();
        Q = Q * Qi;
        Hess = Qi.adjoint() * Hess * Qi;
        std::cout << GridLogDebug << "Qt up to i = " << Q.transpose() << std::endl;
      }
      std::cout << GridLogDebug << "Hess after all rotations: " << std::endl << Hess << std::endl; 
      // form Arnoldi vector f: f is normal to the basis vectors and its norm \beta is used to determine the Ritz estimate. 
      std::complex<double> beta = Hess(Nk, Nk-1);
      std::complex<double> sigma = Q(Nm-1, Nk-1);
      f = basis[Nk] * beta + f * sigma;
      RealD betak = std::sqrt(norm2(f));
      std::cout << GridLogMessage << "|f|^2 after implicit restart = " << norm2(f) << std::endl;
      // Rotate basis by Qt
      Qt = Q.transpose();
      basisRotate(basis, Qt, 0, Nk + 1, 0, Nm, Nm);
      // rotate
      basisRotate(evecs, Qt, 0, Nk + 1, 0, Nm, Nm);
      // Truncate the basis and restart
      basis = std::vector<Field> (basis.begin(), basis.begin() + Nk);
      // evecs = std::vector<Field> (evecs.begin(), evecs.begin() + Nk);
      Hess = Hess(Eigen::seqN(0, Nk), Eigen::seqN(0, Nk));
      std::cout << "evecs size: " << evecs.size() << std::endl;
    }
    /**
     * Computes the number of Arnoldi eigenvectors that have converged. An eigenvector s is considered converged 
     * for a tolerance epsilon if 
     *    r(s) := |\beta e_m^T s| < epsilon
     * where beta is the norm of f_{m+1}.
     * 
     * Parameters
     * ----------
     * 
     * Returns
     * -------
     * int : Number of converged eigenvectors.
     */
    int converged() {
      int Nconv = 0;
      for (int k = 0; k < evecs.size(); k++) {
        RealD emTs = std::abs(littleEvecs(Nm - 1, k));           // e_m^T s
        RealD ritzEstimate = beta_k * emTs;
        // TODO should be ritzEstimate < Tolerance * lambda_max
        std::cout << GridLogMessage << "Ritz estimate for evec " << k << " = " << ritzEstimate << std::endl;
        if (ritzEstimate < rtol) {
          Nconv++;
        }
      }
      return Nconv;
    }
 };
 NAMESPACE_END(Grid);
 #endif
@@ -1,277 +0,0 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./lib/algorithms/iterative/ConjugateGradientTimeslice.h
 Copyright (C) 2015
 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: paboyle <paboyle@ph.ed.ac.uk>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 			   /*  END LEGAL */
 #ifndef GRID_CONJUGATE_GRADIENT_TIMESLICE_H
 #define GRID_CONJUGATE_GRADIENT_TIMESLICE_H
 NAMESPACE_BEGIN(Grid);
 /////////////////////////////////////////////////////////////
 // Base classes for iterative processes based on operators
 // single input vec, single output vec.
 /////////////////////////////////////////////////////////////
 /**
 * Simple modification of conjugate gradient that outputs the residual as a function 
 * of time, in order to study the large wavelength behavior of the solver. 
 */
 template <class Field>
 class ConjugateGradientTimeslice : public OperatorFunction<Field> {
 public:
  using OperatorFunction<Field>::operator();
  bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge.
                           // Defaults true.
  RealD Tolerance;
  Integer MaxIterations;
  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
  RealD TrueResidual;
  ConjugateGradientTimeslice(RealD tol, Integer maxit, bool err_on_no_conv = true)
    : Tolerance(tol),
      MaxIterations(maxit),
      ErrorOnNoConverge(err_on_no_conv)
  {};
  virtual void LogIteration(int k,RealD a,RealD b){
    //    std::cout << "ConjugageGradient::LogIteration() "<<std::endl;
  };
  virtual void LogBegin(void){
    std::cout << "ConjugageGradient::LogBegin() "<<std::endl;
  };
    void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
      this->LogBegin();
      GRID_TRACE("ConjugateGradientTimeslice");
    GridStopWatch PreambleTimer;
    GridStopWatch ConstructTimer;
    GridStopWatch NormTimer;
    GridStopWatch AssignTimer;
    PreambleTimer.Start();
    psi.Checkerboard() = src.Checkerboard();
    conformable(psi, src);
    RealD cp, c, a, d, b, ssq, qq;
    //RealD b_pred;
    // Was doing copies
    ConstructTimer.Start();
    Field p  (src.Grid());
    Field mmp(src.Grid());
    Field r  (src.Grid());
    ConstructTimer.Stop();
    // Initial residual computation & set up
    NormTimer.Start();
    ssq = norm2(src);                 // Norm of source vector ||b||^2
    ssqtx = localNorm2(src);          // Norm |b(x, t)|^2 as a field
    std::vector<RealD> ssqt;          // Norm of source not summed over time slices, ssq(t) = \sum_x |b(x, t)|^2
    sliceSum(ssqtx, ssqt, Tdir);      // TODO make sure Tdir is globally defined
    RealD guess = norm2(psi);         // Norm of initial guess ||psi||^2
    NormTimer.Stop();
    assert(std::isnan(guess) == 0);
    AssignTimer.Start();
    if ( guess == 0.0 ) {
      r = src;
      p = r;
      a = ssq;
    } else { 
      Linop.HermOpAndNorm(psi, mmp, d, b);        // 
      r = src - mmp;      // Initial residual r0 = b - A guess
      p = r;              // initial conj vector p0 = r0
      a = norm2(p);
    }
    cp = a;
    AssignTimer.Stop();
    // Handle trivial case of zero src
    if (ssq == 0.){
      psi = Zero();
      IterationsToComplete = 1;
      TrueResidual = 0.;
      return;
    }
    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl;
    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   src " << ssq << std::endl;
    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:    mp " << d << std::endl;
    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:   mmp " << b << std::endl;
    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:  cp,r " << cp << std::endl;
    std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient:     p " << a << std::endl;
    RealD rsq = Tolerance * Tolerance * ssq;
    // Check if guess is really REALLY good :)
    if (cp <= rsq) {
      TrueResidual = std::sqrt(a/ssq);
      std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl;
      IterationsToComplete = 0;	
      return;
    }
    std::cout << GridLogIterative << std::setprecision(8)
              << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
    PreambleTimer.Stop();
    GridStopWatch LinalgTimer;
    GridStopWatch InnerTimer;
    GridStopWatch AxpyNormTimer;
    GridStopWatch LinearCombTimer;
    GridStopWatch MatrixTimer;
    GridStopWatch SolverTimer;
    RealD usecs = -usecond();
    SolverTimer.Start();
    int k;
    for (k = 1; k <= MaxIterations; k++) {
      GridStopWatch IterationTimer;
      IterationTimer.Start();
      c = cp;
      MatrixTimer.Start();
      Linop.HermOp(p, mmp);         // Computes mmp = Ap
      MatrixTimer.Stop();
      LinalgTimer.Start();
      InnerTimer.Start();
      ComplexD dc  = innerProduct(p,mmp);         // p^\dagger A p
      InnerTimer.Stop();
      d = dc.real();
      a = c / d;
      // What is axpy? Some accelerator or something? Check Lattice_arith.h
      AxpyNormTimer.Start();
      // axpy_norm computes ax+by for vectors x and y compatible with a GPU. Here b is set to 1 (see the function in Lattice_reduction.h). 
      // The first argument passes r by reference, so it stores r --> -a * Ap + 1 * r, i.e. it performs an update on 
      // r_k --> r_{k+1} = r_k - \alpha_k A p_k. The function returns the norm squared of the first variable, i.e. ||r_{k+1}||^2.
      cp = axpy_norm(r, -a, mmp, r);
      AxpyNormTimer.Stop();
      b = cp / c;
      LinearCombTimer.Start();
      {
        autoView( psi_v , psi, AcceleratorWrite);
        autoView( p_v   , p,   AcceleratorWrite);
        autoView( r_v   , r,   AcceleratorWrite);
        accelerator_for(ss,p_v.size(), Field::vector_object::Nsimd(),{
            coalescedWrite(psi_v[ss], a      *  p_v(ss) + psi_v(ss));
            coalescedWrite(p_v[ss]  , b      *  p_v(ss) + r_v  (ss));
        });
      }
      LinearCombTimer.Stop();
      LinalgTimer.Stop();
      LogIteration(k,a,b);
      IterationTimer.Stop();
      if ( (k % 500) == 0 ) {
        std::cout << GridLogMessage << "ConjugateGradient: Iteration " << k
                << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
      } else { 
        std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
                << " residual " << sqrt(cp/ssq) << " target " << Tolerance << " took " << IterationTimer.Elapsed() << std::endl;
      }
      // Stopping condition
      if (cp <= rsq) {
        usecs +=usecond();
        SolverTimer.Stop();
        Linop.HermOpAndNorm(psi, mmp, d, qq);
        p = mmp - src;
        GridBase *grid = src.Grid();
        RealD DwfFlops = (1452. )*grid->gSites()*4*k
   	               + (8+4+8+4+4)*12*grid->gSites()*k; // CG linear algebra
        RealD srcnorm = std::sqrt(norm2(src));
        RealD resnorm = std::sqrt(norm2(p));
        RealD true_residual = resnorm / srcnorm;
        std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k 
          << "\tComputed residual " << std::sqrt(cp / ssq)
          << "\tTrue residual " << true_residual
          << "\tTarget " << Tolerance << std::endl;
        // GridLogMessage logs the message to the terminal output; GridLogPerformance probably writes to a log file?
        //	std::cout << GridLogMessage << "\tPreamble   " << PreambleTimer.Elapsed() <<std::endl;
        std::cout << GridLogMessage << "\tSolver Elapsed    " << SolverTimer.Elapsed() <<std::endl;
        std::cout << GridLogPerformance << "Time breakdown "<<std::endl;
        std::cout << GridLogPerformance << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
        std::cout << GridLogPerformance << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
        std::cout << GridLogPerformance << "\t\tInner      " << InnerTimer.Elapsed() <<std::endl;
        std::cout << GridLogPerformance << "\t\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
        std::cout << GridLogPerformance << "\t\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
        std::cout << GridLogDebug << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl;
        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
        IterationsToComplete = k;	
        TrueResidual = true_residual;
        return;
      }
    }
    // Failed. Calculate true residual before giving up                                                         
    // Linop.HermOpAndNorm(psi, mmp, d, qq);
    //    p = mmp - src;
    //TrueResidual = sqrt(norm2(p)/ssq);
    //    TrueResidual = 1;
    std::cout << GridLogMessage << "ConjugateGradient did NOT converge "<<k<<" / "<< MaxIterations
    	      <<" residual "<< std::sqrt(cp / ssq)<< std::endl;
    SolverTimer.Stop();
    std::cout << GridLogMessage << "\tPreamble   " << PreambleTimer.Elapsed() <<std::endl;
    std::cout << GridLogMessage << "\tConstruct  " << ConstructTimer.Elapsed() <<std::endl;
    std::cout << GridLogMessage << "\tNorm       " << NormTimer.Elapsed() <<std::endl;
    std::cout << GridLogMessage << "\tAssign     " << AssignTimer.Elapsed() <<std::endl;
    std::cout << GridLogMessage << "\tSolver     " << SolverTimer.Elapsed() <<std::endl;
    std::cout << GridLogMessage << "Solver breakdown "<<std::endl;
    std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
    std::cout << GridLogMessage<< "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
    std::cout << GridLogPerformance << "\t\tInner      " << InnerTimer.Elapsed() <<std::endl;
    std::cout << GridLogPerformance << "\t\tAxpyNorm   " << AxpyNormTimer.Elapsed() <<std::endl;
    std::cout << GridLogPerformance << "\t\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
    if (ErrorOnNoConverge) assert(0);
    IterationsToComplete = k;
  }
 };
 NAMESPACE_END(Grid);
 #endif
@@ -53,18 +53,6 @@ enum IRLdiagonalisation {
  IRLdiagonaliseWithEigen
 };
 enum IRLeigsort { 
  IRLeigsortMax,
  IRLeigsortSqMin
 };
 #if 0
 bool square_comp(RealD a, RealD b){
 	if (a*a<b*b) return true;
 	return false;
 }
 #endif
 template<class Field> class ImplicitlyRestartedLanczosHermOpTester  : public ImplicitlyRestartedLanczosTester<Field>
 {
 public:
@@ -131,9 +119,8 @@ class ImplicitlyRestartedLanczos {
  /////////////////////////
  // Constructor
  /////////////////////////
 public:
  IRLeigsort EigSort;
 public:       
  //////////////////////////////////////////////////////////////////
  // PAB:
@@ -167,7 +154,6 @@ class ImplicitlyRestartedLanczos {
    Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm),
    eresid(_eresid),      betastp(_betastp),
    MaxIter(_MaxIter)  ,      MinRestart(_MinRestart),
    EigSort(IRLeigsortMax), 
    orth_period(_orth_period), diagonalisation(_diagonalisation)  { };
    ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
@@ -184,7 +170,6 @@ class ImplicitlyRestartedLanczos {
    Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm),
    eresid(_eresid),      betastp(_betastp),
    MaxIter(_MaxIter)  ,      MinRestart(_MinRestart),
    EigSort(IRLeigsortMax), 
    orth_period(_orth_period), diagonalisation(_diagonalisation)  { };
  ////////////////////////////////
@@ -331,12 +316,8 @@ until convergence
      // sorting
      //////////////////////////////////
      eval2_copy = eval2;
 //      if (EigSort==IRLeigsortMax)
 //      std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),square_comp);
 //      else
      std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>());
      std::cout<<GridLogIRL <<" evals sorted "<<std::endl;
 //      eval2_copy = eval2;
      const int chunk=8;
      for(int io=0; io<k2;io+=chunk){
 	std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ;
@@ -352,7 +333,6 @@ until convergence
      //////////////////////////////////
      Qt = Eigen::MatrixXd::Identity(Nm,Nm);
      for(int ip=k2; ip<Nm; ++ip){ 
 //        std::cout<<GridLogIRL <<"QR decompose "<<eval2[ip]<<std::endl;
 	QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
      }
      std::cout<<GridLogIRL <<"QR decomposed "<<std::endl;
@@ -395,8 +375,7 @@ until convergence
 	//  power of two search pattern;  not every evalue in eval2 is assessed.
 	int allconv =1;
-//	for(int jj = 1; jj<=Nstop; jj*=2){
+	for(int jj = 1; jj<=Nstop; jj*=2){
 	for(int jj = 1; jj<=Nstop; jj++){
 	  int j = Nstop-jj;
 	  RealD e = eval2_copy[j]; // Discard the evalue
 	  basisRotateJ(B,evec,Qt,j,0,Nk,Nm);	    
@@ -1,276 +0,0 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./Grid/algorithms/iterative/LanczosBidiagonalization.h
 Copyright (C) 2015
 Author: Chulwoo Jung <chulwoo@bnl.gov>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution directory
 *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_LANCZOS_BIDIAGONALIZATION_H
 #define GRID_LANCZOS_BIDIAGONALIZATION_H
 NAMESPACE_BEGIN(Grid);
 /**
 * Lanczos Bidiagonalization (Golub-Kahan)
 *
 * For a linear operator A with adjoint A^dag, constructs the bidiagonal
 * decomposition:
 *
 *   A  V_m = U_m B_m
 *   A^dag U_m = V_m B_m^T + beta_{m+1} v_{m+1} e_m^T
 *
 * where:
 *   V_m = [v_1, ..., v_m]  right Lanczos vectors (orthonormal)
 *   U_m = [u_1, ..., u_m]  left  Lanczos vectors (orthonormal)
 *   B_m is upper bidiagonal with diag(alpha_1,...,alpha_m) and
 *       superdiag(beta_2,...,beta_m)
 *
 * The singular values of A are approximated by those of B_m.
 * The singular values of B_m are the square roots of the eigenvalues of
 * the symmetric tridiagonal matrix B_m^T B_m.
 *
 * Usage:
 *   LanczosBidiagonalization<Field> lb(Linop, grid);
 *   lb.run(src, Nm, tol);
 *   // Access results via getters.
 */
 template <class Field>
 class LanczosBidiagonalization {
  public: 
  LinearOperatorBase<Field> &Linop;
  GridBase *Grid;
  int Nm;           // number of Lanczos steps taken
  RealD Tolerance;  // convergence threshold on beta_{k+1} / alpha_k
  std::vector<Field>  V;       // right Lanczos vectors v_1 ... v_m
  std::vector<Field>  U;       // left  Lanczos vectors u_1 ... u_m
  std::vector<RealD>  alpha;   // diagonal of bidiagonal matrix
  std::vector<RealD>  beta;    // super-diagonal (beta[k] couples u_k and v_{k+1})
  // SVD of the bidiagonal matrix (filled after computeSVD())
  Eigen::VectorXd  singularValues;
  Eigen::MatrixXd  leftSVecs;   // columns are left  singular vectors of B
  Eigen::MatrixXd  rightSVecs;  // columns are right singular vectors of B
 public:
  LanczosBidiagonalization(LinearOperatorBase<Field> &_Linop, GridBase *_Grid,
                           RealD _tol = 1.0e-8)
    : Linop(_Linop), Grid(_Grid), Tolerance(_tol), Nm(0)
  {}
  /**
   * Run the Golub-Kahan Lanczos bidiagonalization.
   *
   * Parameters
   * ----------
   * src  : starting vector (need not be normalised)
   * Nmax : maximum number of Lanczos steps
   * reorth : if true, full reorthogonalisation of both V and U bases
   */
  void run(const Field &src, int Nmax, bool reorth = true)
  {
    assert(norm2(src) > 0.0);
    V.clear(); U.clear();
    alpha.clear(); beta.clear();
    Nm = 0;
    Field p(Grid), r(Grid);
    // --- initialise: v_1 = src / ||src|| ---
    Field v(Grid);
    v = src;
    RealD nrm = std::sqrt(norm2(v));
    v = (1.0 / nrm) * v;
    V.push_back(v);
    for (int k = 0; k < Nmax; ++k) {
      // p = A v_k
      Linop.Op(V[k], p);
      // p = p - beta_k * u_{k-1}   (remove previous left vector)
      if (k > 0) {
        p = p - beta[k-1] * U[k-1];
      }
      // alpha_k = ||p||
      RealD ak = std::sqrt(norm2(p));
      if (ak < 1.0e-14) {
        std::cout << GridLogMessage
                  << "LanczosBidiagonalization: lucky breakdown at step "
                  << k << " (alpha = " << ak << ")" << std::endl;
        break;
      }
      alpha.push_back(ak);
      // u_k = p / alpha_k
      Field u(Grid);
      u = (1.0 / ak) * p;
      // full reorthogonalisation of u against previous U
      if (reorth) {
        for (int j = 0; j < (int)U.size(); ++j) {
          ComplexD ip = innerProduct(U[j], u);
          u = u - ip * U[j];
        }
        RealD unrm = std::sqrt(norm2(u));
        if (unrm > 1.0e-14) u = (1.0 / unrm) * u;
      }
      U.push_back(u);
      // r = A^dag u_k - alpha_k * v_k
      Linop.AdjOp(U[k], r);
      r = r - ak * V[k];
      // full reorthogonalisation of r against previous V
      if (reorth) {
        for (int j = 0; j < (int)V.size(); ++j) {
          ComplexD ip = innerProduct(V[j], r);
          r = r - ip * V[j];
        }
      }
      // beta_{k+1} = ||r||
      RealD bk = std::sqrt(norm2(r));
      beta.push_back(bk);
      Nm = k + 1;
      std::cout << GridLogMessage
                << "LanczosBidiagonalization step " << k
                << "  alpha = " << ak
                << "  beta  = " << bk << std::endl;
      // convergence: residual beta / alpha small enough
      if (bk / ak < Tolerance) {
        std::cout << GridLogMessage
                  << "LanczosBidiagonalization converged at step " << k
                  << "  (beta/alpha = " << bk / ak << ")" << std::endl;
        break;
      }
      if (k == Nmax - 1) break;   // no v_{k+2} needed after last step
      // v_{k+1} = r / beta_{k+1}
      Field vnext(Grid);
      vnext = (1.0 / bk) * r;
      V.push_back(vnext);
    }
  }
  /**
   * Compute the SVD of the bidiagonal matrix B using Eigen.
   * Singular values are stored in descending order.
   */
  void computeSVD()
  {
    int m = Nm;
    Eigen::MatrixXd B = Eigen::MatrixXd::Zero(m, m);
    for (int k = 0; k < m; ++k) {
      B(k, k) = alpha[k];
      if (k + 1 < m && k < (int)beta.size())
        B(k, k+1) = beta[k];
    }
    Eigen::JacobiSVD<Eigen::MatrixXd> svd(B,
        Eigen::ComputeThinU | Eigen::ComputeThinV);
    singularValues = svd.singularValues();   // already sorted descending
    leftSVecs      = svd.matrixU();
    rightSVecs     = svd.matrixV();
    std::cout << GridLogMessage
              << "LanczosBidiagonalization: singular values of B_" << m
              << std::endl;
    for (int k = 0; k < m; ++k)
      std::cout << GridLogMessage << "  sigma[" << k << "] = "
                << singularValues(k) << std::endl;
  }
  /**
   * Return the k-th approximate left singular vector of A in the full
   * lattice space.  computeSVD() must have been called first.
   */
  Field leftSingularVector(int k)
  {
    assert(k < (int)leftSVecs.cols());
    Field svec(Grid);
    svec = Zero();
    for (int j = 0; j < Nm; ++j)
      svec = svec + leftSVecs(j, k) * U[j];
    return svec;
  }
  /**
   * Return the k-th approximate right singular vector of A in the full
   * lattice space.  computeSVD() must have been called first.
   */
  Field rightSingularVector(int k)
  {
    assert(k < (int)rightSVecs.cols());
    Field svec(Grid);
    svec = Zero();
    for (int j = 0; j < Nm; ++j)
      svec = svec + rightSVecs(j, k) * V[j];
    return svec;
  }
  /**
   * Verify the bidiagonalization: returns max residual
   *   max_k || A v_k - alpha_k u_k - beta_k u_{k-1} ||
   */
  RealD verify()
  {
    Field tmp(Grid);
    RealD maxres = 0.0;
    for (int k = 0; k < Nm; ++k) {
      Linop.Op(V[k], tmp);
      tmp = tmp - alpha[k] * U[k];
      if (k > 0 && k-1 < (int)beta.size())
        tmp = tmp - beta[k-1] * U[k-1];
      RealD res = std::sqrt(norm2(tmp));
      if (res > maxres) maxres = res;
      std::cout << GridLogMessage
                << "LanczosBidiagonalization verify step " << k
                << "  ||A v_k - alpha_k u_k - beta_{k-1} u_{k-1}|| = "
                << res << std::endl;
    }
    return maxres;
  }
  /* Getters */
  int                       getNm()           const { return Nm; }
  const std::vector<Field>& getV()            const { return V; }
  const std::vector<Field>& getU()            const { return U; }
  const std::vector<RealD>& getAlpha()        const { return alpha; }
  const std::vector<RealD>& getBeta()         const { return beta; }
  Eigen::VectorXd           getSingularValues() const { return singularValues; }
 };
 NAMESPACE_END(Grid);
 #endif
@@ -60,29 +60,18 @@ public:
  void Level(int lv) { level=lv; };
-  PrecGeneralisedConjugateResidualNonHermitian(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax, int _nstep) : 
+  PrecGeneralisedConjugateResidualNonHermitian(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) : 
    Tolerance(tol), 
    MaxIterations(maxit),
    Linop(_Linop),
    Preconditioner(Prec),
    mmax(_mmax),
-    nstep(_nstep)         // what is nstep vs mmax? one is the number of inner iterations
+    nstep(_nstep)
  { 
    level=1;
    verbose=1;
  };
  // virtual method stubs for updating GCR polynomial
  virtual void LogBegin(void){
    std::cout << "GCR::LogBegin() "<<std::endl;
  };
  virtual void LogIteration(int k, ComplexD a, std::vector<ComplexD> betas){
    std::cout << "GCR::LogIteration() "<<std::endl;
  };
  virtual void LogComplete(std::vector<ComplexD>& alphas, std::vector<std::vector<ComplexD>>& betas) {
    std::cout << "GCR::LogComplete() "<<std::endl;
  };
  void operator() (const Field &src, Field &psi){
    //    psi=Zero();
@@ -107,6 +96,7 @@ public:
      GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
      if(cp<rsq) {
 	SolverTimer.Stop();
 	Linop.Op(psi,r);
@@ -145,9 +135,9 @@ public:
    ////////////////////////////////
    // history for flexible orthog
    ////////////////////////////////
-    std::vector<Field> q(mmax,grid);        // q = Ap
+    std::vector<Field> q(mmax,grid);
-    std::vector<Field> p(mmax,grid);        // store mmax conjugate momenta
+    std::vector<Field> p(mmax,grid);
-    std::vector<RealD> qq(mmax);            // qq = (Ap)^2 = <p|A^\dagger A |p> (denom of \alpha)
+    std::vector<RealD> qq(mmax);
    GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
@@ -165,9 +155,7 @@ public:
    LinalgTimer.Start();
    r=src-Az;
    LinalgTimer.Stop();
-    GCRLogLevel<< "PGCR true residual r = src - A psi   "<< norm2(r) <<std::endl;
+    GCRLogLevel<< "PGCR true residual r = src - A psi   "<<norm2(r) <<std::endl;
    this->LogBegin();       // initialize polynomial GCR if needed (TODO think about placement of this)
    /////////////////////
    // p = Prec(r)
@@ -191,44 +179,31 @@ public:
    q[0]= Az;
    qq[0]= zAAz;
    std::cout << "||init p - src||: " << norm2(p[0] - src) << std::endl;   // for debugging
    cp =norm2(r);
    LinalgTimer.Stop();
    std::vector<ComplexD> all_alphas;
    std::vector<std::vector<ComplexD>> all_betas;
    for(int k=0;k<nstep;k++){
      steps++;
      int kp     = k+1;
-      int peri_k = k %mmax;     // only store mmax vectors; just roll around if needed
+      int peri_k = k %mmax;
      int peri_kp= kp%mmax;
      // std::cout << "peri_kp = " << peri_kp << std::endl;
      LinalgTimer.Start();
      rq= innerProduct(q[peri_k],r); // what if rAr not real?
-      a = rq/qq[peri_k];              // compute alpha_j
+      a = rq/qq[peri_k];
-      all_alphas.push_back(a);
+      axpy(psi,a,p[peri_k],psi);         
-      axpy(psi,a,p[peri_k],psi);      // update psi --> psi + \alpha p
+      cp = axpy_norm(r,-a,q[peri_k],r);
      cp = axpy_norm(r,-a,q[peri_k],r);       // update r --> r - \alpha D p. Note q = Dp
      LinalgTimer.Stop();
-      // LogIterationA(k + 1, a);
+      GCRLogLevel<< "PGCR step["<<steps<<"]  resid " << cp << " target " <<rsq<<std::endl; 
-      GCRLogLevel<< "GCR step["<<steps<<"]  resid " << cp << " target " <<rsq<<std::endl; 
+      if((k==nstep-1)||(cp<rsq)){
-
+	return cp;
-      // moving this to end of loop so that it doesn't exit beforehand
+      }
      // TODO if I want to uncomment this, I have to split the LogIteration again and put LogIterationA() beforehand
      // if((k==nstep-1)||(cp<rsq)){
      //   return cp;
      // }
      PrecTimer.Start();
@@ -246,205 +221,22 @@ public:
      q[peri_kp]=Az;
      p[peri_kp]=z;
      // Field Dsrc (grid);
      // Linop.Op(src, Dsrc);
      // std::cout << "||q[peri_kp] - D(src)||: " << norm2(q[peri_kp] - Dsrc) << std::endl;   // for debugging
          // // delete after testing
          // std::cout << "Testing Dsq on one for GCR: " << std::endl;
          // Field myField (grid);
          // myField = 1.0;
          // Field out1 (grid); Field out2 (grid);
          // Linop.HermOp(myField, out1);
          // Linop.Op(myField, out2);
          // std::cout << "Dsq.Hermop(ones) has norm " << norm2(out1) << std::endl;
          // std::cout << "Dsq.Op(ones) has norm " << norm2(out2) << std::endl;
      // basically northog = k+1 if mmax is large
      int northog = ((kp)>(mmax-1))?(mmax-1):(kp);  // if more than mmax done, we orthog all mmax history.
      // std::cout << "northog: " << northog << std::endl;
      std::vector<ComplexD> betas (northog);
      // std::cout << "peri_kp: " << peri_kp << std::endl;
      // we iterate backwards counting down from the current k+1 index (peri_kp) because we 
      for(int back=0;back<northog;back++){
 	int peri_back=(k-back)%mmax;   	  GRID_ASSERT((k-back)>=0);
-        // b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
+	b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
        b=-(innerProduct(q[peri_back],Az))/qq[peri_back];     // TODO try complex beta
 	p[peri_kp]=p[peri_kp]+b*p[peri_back];
 	q[peri_kp]=q[peri_kp]+b*q[peri_back];
        // LogIterationB(peri_back, b);
        // betas[back] = b;    // may need to change the indexing if I ever do it with restarts
        // std::cout << "[DEBUG] pushing beta for back = " << back << ", peri_back = " << peri_back << std::endl;
        betas[peri_back] = b;    // may need to change the indexing if I ever do it with restarts
      }
      qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
      LinalgTimer.Stop();
      // log iteration and update GCR polynomial if necessary.
      all_betas.push_back(betas);
      LogIteration(k + 1, a, betas);
      // finish if necessary
      if((k==nstep-1)||(cp<rsq)){
        std::cout << "All alphas: " << std::endl << all_alphas << std::endl;
        std::cout << "All betas: " << std::endl << all_betas << std::endl;
        LogComplete(all_alphas, all_betas);
        std::cout << "Exiting GCR." << std::endl;
        return cp;
      }
    }
    GRID_ASSERT(0); // never reached
    return cp;
  }
 };
 class PolynomialFile: Serializable {
  public:
    GRID_SERIALIZABLE_CLASS_MEMBERS(PolynomialFile, 
      std::vector<std::vector<std::complex<double>>>, data,
      std::vector<std::vector<std::complex<double>>>, betas,
      std::vector<std::complex<double>>,              alphas
    );
 };
 // Optionally record the GCR polynomial. [PO]: TODO
 template <class Field>
 class PGCRPolynomial : public PrecGeneralisedConjugateResidualNonHermitian<Field> {
 public:
  std::vector<ComplexD> ak;
  std::vector<std::vector<ComplexD>> bk;
  // std::vector<ComplexD> poly_p;
  std::vector<std::vector<ComplexD>> poly_p;
  std::vector<ComplexD> poly_Ap;        // polynomial in Ap_j (only store it for last p)
  std::vector<ComplexD> poly_r;
  std::vector<ComplexD> polynomial;
  PolynomialFile& PF;
 public:
  PGCRPolynomial(RealD tol, Integer maxit,LinearOperatorBase<Field> &_Linop, LinearFunction<Field> &Prec, int _mmax, int _nstep, PolynomialFile& _PF)
    : PrecGeneralisedConjugateResidualNonHermitian<Field>(tol, maxit, _Linop, Prec, _mmax, _nstep), PF(_PF)
  {};
  // think this applies the polynomial in A = Linop to a field src. The coeffs are 
  // stored in the vector `polynomial`.
  void PolyOp(const Field &src, Field &psi)
  {
    Field tmp(src.Grid());
    Field AtoN(src.Grid());
    AtoN = src;
    psi=AtoN*polynomial[0];
    for(int n=1;n<polynomial.size();n++){
      tmp = AtoN;
      this->Linop.Op(tmp,AtoN);               // iterate A^n
      psi = psi + polynomial[n]*AtoN;       // psi += poly_n A^n src
    }
  }
  // [PO TODO] debug this
  void PGCRsequence(const Field &src, Field &x)
  {
    Field Ap(src.Grid());
    Field r(src.Grid());
    // Field p(src.Grid());
    // p=src;
    std::vector<Field> p;
    p.push_back(src);
    r=src;
    x=Zero();
    x.Checkerboard()=src.Checkerboard();
    for(int k=0;k<ak.size();k++){
      x = x + ak[k]*p[k];
      this->Linop.Op(p[k], Ap);
      r = r - ak[k] * Ap;
      // p[k] = r;
      p.push_back(r);
      for (int i = 0; i < k; i++) {     // [PO TODO] check indices
        p[k+1] += bk[i, k+1] * p[i];
      }
      // p = r + bk[k] * p;
    }
  }
  void Solve(const Field &src, Field &psi)
  {
    psi=Zero();
    this->operator()(src, psi);
  }
  virtual void LogBegin(void)
  {
    std::cout << "PGCR::LogBegin() "<<std::endl;
    ak.resize(0);
    bk.resize(0);
    polynomial.resize(0);
    poly_Ap.push_back(0.0);     // start with (0.0); during first iteration should change to (0.0, 1.0)
    std::vector<ComplexD> p0_tmp;
    p0_tmp.push_back(1.0);
    poly_p.push_back(p0_tmp);
    poly_r.push_back(1.0);
  };
  // Updates vector psi and r and initializes vector p[k+1]
  virtual void LogIteration(int k, ComplexD a, std::vector<ComplexD> betas){
    std::cout << "PGCR::LogIteration(k = " << k << ")" << std::endl;
    ak.push_back(a);
    bk.push_back(betas);
    // update Ap by pushing p[k] to the right
    poly_Ap.push_back(0.0);   // need to pad the end with an element
    poly_Ap[0] = 0.0;         // technically this should be unnecessary, as the first component is never set
    for(int i = 0; i < k; i++){
      poly_Ap[i+1]=poly_p[k-1][i];        // A\vec{p} = (0, \vec{p}) bc A shifts components of p to the right
    }
    // update psi_{k+1} --> psi_k + a_k p_k
    polynomial.push_back(0.0);
    for(int i = 0; i < k; i++) {
      polynomial[i] += a * poly_p[k-1][i];
    }
    {
      std::vector<std::complex<double>> poly_stdcmplx(polynomial.begin(), polynomial.end());
      PF.data.push_back(poly_stdcmplx);
    }
    //  r_{k+1} --> r_k - a_k A p_k
    //  p_{k+1} --> r_k + \sum_{i=0}^k \beta_{ik} p_i, input betas = (\beta_{ik})_i
    poly_r.push_back(0.0);        // should be of size k+1 if we start with k = 1
    std::vector<ComplexD> p_next (k + 1, ComplexD(0.0));     // p_{k+1} = same size as r_{k+1}
    for(int i = 0; i < k + 1; i++){
      poly_r[i] = poly_r[i] - a * poly_Ap[i];     // update r_{k+1} --> r_k - \alpha_k A p_k
      p_next[i] = poly_r[i];                 // init new vector as r_{k+1}
    }
    // p_{k+1} --> p_{k+1} + \sum_i \beta_{ij} p_i
    int nbeta = betas.size();
    std::cout << "Betas: " << betas << std::endl;
    for (int j = 0; j < nbeta; j++) {
      for (int i = 0; i < j+1; i++) {
        p_next[i] += betas[j] * poly_p[j][i];
      }
    }
    poly_p.push_back(p_next);                 // add p_{k+1} to the list of p's
  }
  virtual void LogComplete(std::vector<ComplexD>& alphas, std::vector<std::vector<ComplexD>>& betas) {
    /** Logs all alphas and betas to complete the iterations. */
    std::cout << "PGCR::LogComplete() "<<std::endl;
    for (int i = 0; i < alphas.size(); i++) {
      PF.alphas.push_back(std::complex<double>(alphas[i].real(), alphas[i].imag()));
      std::vector<std::complex<double>> beta_stdcmplx(betas[i].begin(), betas[i].end());
      PF.betas.push_back(beta_stdcmplx);
    }
  };
 };
 NAMESPACE_END(Grid);
 #endif
@@ -1,753 +0,0 @@
 /*************************************************************************************
 Grid physics library, www.github.com/paboyle/Grid
 Source file: ./Grid/algorithms/iterative/RestartedLanczosBidiagonalization.h
 Copyright (C) 2015
 Author: Chulwoo Jung <chulwoo@bnl.gov>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 See the full license in the file "LICENSE" in the top level distribution directory
 *************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_RESTARTED_LANCZOS_BIDIAGONALIZATION_H
 #define GRID_RESTARTED_LANCZOS_BIDIAGONALIZATION_H
 NAMESPACE_BEGIN(Grid);
 /**
 * Implicitly Restarted Lanczos Bidiagonalization (IRLBA)
 *
 * Computes the p largest (or p smallest) singular triplets of a linear
 * operator A using the Golub-Kahan-Lanczos bidiagonalization with implicit
 * restart via thick-restart / QR shifts.
 *
 * Algorithm (Baglama & Reichel, SIAM J. Sci. Comput. 27(1):19-42, 2005):
 *
 *   Outer loop:
 *     1. Extend the p-step (or seed) bidiagonalization to k steps:
 *           A  V_k = U_k B_k
 *           A^dag U_k = V_k B_k^T + beta_{k+1} v_{k+1} e_k^T
 *     2. Compute SVD:  B_k = X Sigma Y^T
 *     3. Check convergence of the p desired singular values via
 *           |beta_{k+1} * y_{k,i}|  <  tol * sigma_i
 *        where y_{k,i} is the last component of the i-th right singular vector.
 *     4. Apply k-p implicit QR shifts to implicitly compress the basis
 *        to p steps (Sorensen-Lehoucq thick restart):
 *           B_p^+ = X_p^T B_k Y_p   (upper bidiagonal, p x p)
 *        and update the lattice vectors:
 *           V_p^+ = V_k Y_p
 *           U_p^+ = U_k X_p
 *        The new residual coupling is
 *           beta_p^+ v_{p+1}^+ = beta_{k+1} v_{k+1} * (e_k^T Y_p)_p
 *                               + B_k(p,p+1) * (orthogonal tail from QR)
 *     5. Go to step 1.
 *
 * Template parameter
 * ------------------
 *   Field : lattice field type (must support Grid algebra operations)
 *
 * Usage
 * -----
 *   RestartedLanczosBidiagonalization<Field> irlba(Linop, grid, p, k, tol, maxIter);
 *   irlba.run(src);
 *   // Results available via getters.
 */
 template <class Field>
 class RestartedLanczosBidiagonalization {
 public:
  LinearOperatorBase<Field> &Linop;
  GridBase *Grid;
  int    Nk;       // number of desired singular triplets
  int    Nm;       // Lanczos basis size (Nm > Nk)
  RealD  Tolerance;
  int    MaxIter;
  bool   largest; // if true, target largest singular values; otherwise smallest
  // Converged singular triplets (filled after run())
  std::vector<RealD>  singularValues;   // sigma_0 >= sigma_1 >= ...
  std::vector<Field>  leftVectors;      // approximate left singular vectors
  std::vector<Field>  rightVectors;     // approximate right singular vectors
 private:
  // Working bases (size up to Nm+1)
  std::vector<Field>  V;    // right Lanczos vectors
  std::vector<Field>  U;    // left  Lanczos vectors
  std::vector<RealD>  alpha;
  std::vector<RealD>  beta;
  // After a thick restart, the column at index restart_col of U^dag A V
  // has extra non-zero entries (rows 0..restart_col-2) beyond what the
  // upper bidiagonal captures.  fvec[j] = <U[j] | A V[restart_col]> for
  // j = 0..restart_col-1.  (fvec[restart_col-1] == beta[restart_col-1].)
  // reset_col == -1 means no restart has occurred yet (pure bidiagonal).
  std::vector<RealD>  fvec;
  int                 restart_col;
 public:
  RestartedLanczosBidiagonalization(LinearOperatorBase<Field> &_Linop,
                                    GridBase *_Grid,
                                    int _Nk, int _Nm,
                                    RealD _tol   = 1.0e-8,
                                    int   _maxIt = 300,
                                    bool  _largest = true)
    : Linop(_Linop), Grid(_Grid),
      Nk(_Nk), Nm(_Nm),
      Tolerance(_tol), MaxIter(_maxIt),
      largest(_largest)
  {
    assert(Nm > Nk);
  }
  /**
   * Run IRLBA starting from src.
   * On exit, singularValues, leftVectors, rightVectors are filled with
   * the Nk converged singular triplets.
   */
  void run(const Field &src)
  {
    assert(norm2(src) > 0.0);
    singularValues.clear();
    leftVectors.clear();
    rightVectors.clear();
    // Allocate working bases
    V.clear(); U.clear();
    alpha.clear(); beta.clear();
    fvec.clear(); restart_col = -1;
    V.reserve(Nm + 1);
    U.reserve(Nm);
    // Seed: v_0 = src / ||src||
    Field vtmp(Grid);
    vtmp = src;
    RealD nrm = std::sqrt(norm2(vtmp));
    vtmp = (1.0 / nrm) * vtmp;
    V.push_back(vtmp);
    int pStart = 0;  // current basis size at start of extension
    RealD betaRestart = 0.0; // coupling from previous restart
    for (int iter = 0; iter < MaxIter; ++iter) {
      // ----------------------------------------------------------------
      // Step 1: extend from pStart steps to Nm steps
      // ----------------------------------------------------------------
      extendBasis(pStart, Nm, betaRestart);
 //      verify();
      // ----------------------------------------------------------------
      // Step 2: SVD of the Nm x Nm B matrix.
      // iter=0 (pStart==0): B is exactly bidiagonal — use buildBidiagonal.
      // iter>0 (pStart==Nk): after a thick restart, column restart_col of
      // U^dag A V has extra off-diagonal entries captured by fvec; use
      // buildFullB so the Ritz values and restart vectors are computed from
      // the exact projected matrix A V = U B_full.
      // ----------------------------------------------------------------
      Eigen::MatrixXd B = (pStart == 0) ? buildBidiagonal(Nm) : buildFullB(Nm);
      Eigen::JacobiSVD<Eigen::MatrixXd> svd(B,
          Eigen::ComputeThinU | Eigen::ComputeThinV);
      Eigen::VectorXd sigma = svd.singularValues();  // descending
      Eigen::MatrixXd X     = svd.matrixU();          // Nm x Nm left SVecs of B
      Eigen::MatrixXd Y     = svd.matrixV();          // Nm x Nm right SVecs of B
      // If targeting smallest, reorder so desired ones come first
      Eigen::VectorXi order = sortOrder(sigma);
      // ----------------------------------------------------------------
      // Step 3: check convergence of the Nk desired singular values
      // ----------------------------------------------------------------
      RealD betaK = beta.back();  // beta_{k+1}
      // In our convention A V = U B (exact), the residual is in the A^dag
      // direction: A^dag u_j - sigma_j v_j = betaK * X[Nm-1,j] * V[Nm].
      // Convergence criterion: |betaK * X[Nm-1, idx]| < tol * sigma_idx.
      int nconv = 0;
      for (int i = 0; i < Nk; ++i) {
        int idx = order(i);
        RealD res = std::abs(betaK * X(Nm - 1, idx));
        RealD thr = Tolerance * std::max(sigma(idx), 1.0e-14);
        std::cout << GridLogMessage
                  << "IRLBA iter " << iter
                  << "  sigma[" << i << "] = " << sigma(idx)
                  << "  res = " << res
                  << "  thr = " << thr << std::endl;
        if (res < thr) ++nconv;
        else break;  // residuals not strictly ordered but break is conservative
      }
      if (nconv >= Nk) {
        std::cout << GridLogMessage
                  << "IRLBA converged: " << nconv << " singular values after "
                  << iter + 1 << " iterations." << std::endl;
        // Collect converged triplets
        extractTriplets(Nm, sigma, X, Y, order, Nk);
        return;
      }
      // ----------------------------------------------------------------
      // Step 4: implicit restart — compress to Nk steps
      // ----------------------------------------------------------------
      implicitRestart(Nm, Nk, sigma, X, Y, order, betaK, betaRestart);
 //      verify();
      // Lucky breakdown: exact invariant subspace found; convergence is exact.
      // B_p^+ = diag(alpha[0..Nk-1]); extract directly from restart basis.
      if (betaRestart < 1.0e-14) {
        std::cout << GridLogMessage
                  << "IRLBA: lucky breakdown after restart (betaRestart = 0)."
                  << " Extracting " << Nk << " exact Ritz triplets." << std::endl;
        // Re-run SVD on the p-step diagonal B^+ to get sorted Ritz triplets.
        Eigen::MatrixXd Bp = buildBidiagonal(Nk);
        Eigen::JacobiSVD<Eigen::MatrixXd> svdp(Bp,
            Eigen::ComputeThinU | Eigen::ComputeThinV);
        Eigen::VectorXi ordp = sortOrder(svdp.singularValues());
        extractTriplets(Nk, svdp.singularValues(), svdp.matrixU(),
                        svdp.matrixV(), ordp, Nk);
        return;
      }
      pStart = Nk;
    }
    std::cout << GridLogMessage
              << "IRLBA: did not converge in " << MaxIter
              << " iterations. Returning best approximations." << std::endl;
    // Return best available approximations
    Eigen::MatrixXd B = buildFullB((int)alpha.size());
    Eigen::JacobiSVD<Eigen::MatrixXd> svd(B,
        Eigen::ComputeThinU | Eigen::ComputeThinV);
    Eigen::VectorXd sigma = svd.singularValues();
    Eigen::MatrixXd X     = svd.matrixU();
    Eigen::MatrixXd Y     = svd.matrixV();
    Eigen::VectorXi order = sortOrder(sigma);
    int nout = std::min(Nk, (int)alpha.size());
    extractTriplets((int)alpha.size(), sigma, X, Y, order, nout);
  }
  /* Getters */
  int getNk() const { return (int)singularValues.size(); }
  const std::vector<RealD>&  getSingularValues() const { return singularValues; }
  const std::vector<Field>&  getLeftVectors()    const { return leftVectors; }
  const std::vector<Field>&  getRightVectors()   const { return rightVectors; }
  /**
   * Print B_k and U^dag A V to verify the bidiagonalization relation
   *   A V_m = U_m B_m   (exact in our GK convention)
   * On the first call (pStart=0), max|B - U^dag A V| should be ~machine epsilon.
   * After a restart and extension, the column p of U^dag A V deviates from B
   * by O(betaK): this is expected because the thick restart breaks the Krylov
   * structure at column p, introducing off-diagonal terms proportional to betaK.
   * These terms vanish as betaK -> 0 (convergence), so the algorithm is correct.
   */
  void verify()
  {
    int m  = (int)alpha.size();
    int nU = (int)U.size();
    int nV = (int)V.size();
    if (m == 0) { std::cout << GridLogMessage << "IRLBA verify: empty basis" << std::endl; return; }
    // Build reference matrix Bref (nU x nV):
    //   Columns 0..m-1 : buildFullB(m)  (bidiagonal + fvec column at restart_col)
    //   Column  m      : residual column, two cases:
    //     (a) restart_col == m (right after implicitRestart, before extendBasis):
    //         V[m] = sgn*V_old[Nm], so <U[i]|A|V[m]> = fvec[i] for all i
    //     (b) otherwise (pure GK or after extendBasis):
    //         only entry (m-1, m) = beta[m-1]  (GK recurrence residual)
    Eigen::MatrixXd Bref = Eigen::MatrixXd::Zero(nU, nV);
    {
      Eigen::MatrixXd Bfull = buildFullB(m);
      int cols = std::min(m, nV);
      Bref.block(0, 0, m, cols) = Bfull.block(0, 0, m, cols);
    }
    if (nV > m && m > 0) {
      if (restart_col == m && (int)fvec.size() == m) {
        // Case (a): right after implicitRestart
        for (int i = 0; i < m; ++i) Bref(i, m) = fvec[i];
      } else if ((int)beta.size() >= m) {
        // Case (b): standard GK residual column
        Bref(m - 1, m) = beta[m - 1];
      }
    }
    // Compute M[i,j] = <U[i] | A | V[j]>
    Eigen::MatrixXd M = Eigen::MatrixXd::Zero(nU, nV);
    Field Avj(Grid);
    for (int j = 0; j < nV; ++j) {
      Linop.Op(V[j], Avj);
      for (int i = 0; i < nU; ++i) {
        ComplexD ip = innerProduct(U[i], Avj);
        M(i, j) = ip.real();
      }
    }
    // Print Bref
    std::cout << GridLogMessage
              << "IRLBA verify: Bref (" << nU << "x" << nV << "):" << std::endl;
    for (int i = 0; i < nU; ++i) {
      std::cout << GridLogMessage << "  row " << i << ": ";
      for (int j = 0; j < nV; ++j) std::cout << Bref(i,j) << " ";
      std::cout << std::endl;
    }
    // Print U^dag A V
    std::cout << GridLogMessage
              << "IRLBA verify: U^dag A V (" << nU << "x" << nV << "):" << std::endl;
    for (int i = 0; i < nU; ++i) {
      std::cout << GridLogMessage << "  row " << i << ": ";
      for (int j = 0; j < nV; ++j) std::cout << M(i,j) << " ";
      std::cout << std::endl;
    }
    // Max deviation over the full nU x nV matrix
    RealD maxdev = (Bref - M).cwiseAbs().maxCoeff();
    std::cout << GridLogMessage
              << "IRLBA verify: max|Bref - U^dag A V| = " << maxdev << std::endl;
    // Beta
    std::cout << GridLogMessage << "IRLBA verify: beta[0.." << (int)beta.size()-1 << "] = ";
    for (auto b : beta) std::cout << b << " ";
    std::cout << std::endl;
  }
 private:
  // ------------------------------------------------------------------
  // Build the m x m upper-bidiagonal matrix from alpha[0..m-1], beta[0..m-2]
  // ------------------------------------------------------------------
  Eigen::MatrixXd buildBidiagonal(int m) const
  {
    Eigen::MatrixXd B = Eigen::MatrixXd::Zero(m, m);
    for (int k = 0; k < m; ++k) {
      B(k, k) = alpha[k];
      if (k + 1 < m && k < (int)beta.size())
        B(k, k + 1) = beta[k];
    }
    return B;
  }
  // ------------------------------------------------------------------
  // Build the full m x m B matrix, including the non-bidiagonal column
  // at restart_col that arises after a thick restart.
  //
  // After restart, A V[restart_col] has projections onto all U[0..restart_col-1]
  // (not just U[restart_col-1]).  These are stored in fvec[0..restart_col-1]
  // and make column restart_col of U^dag A V non-bidiagonal.
  // ------------------------------------------------------------------
  Eigen::MatrixXd buildFullB(int m) const
  {
    Eigen::MatrixXd B = buildBidiagonal(m);
    if (restart_col >= 0 && restart_col < m && (int)fvec.size() > 0) {
      for (int j = 0; j < restart_col && j < (int)fvec.size(); ++j){
        B(j, restart_col) = fvec[j];
        std::cout << GridLogDebug << "buildFullB: B  " <<j<<" "<<restart_col<<B(j, restart_col)<<std::endl;
      }
    }
    return B;
  }
  // ------------------------------------------------------------------
  // Return a permutation vector that puts the desired Nk singular values
  // first (largest first if largest==true, smallest first otherwise).
  // Eigen's JacobiSVD already returns sigma in descending order, so for
  // largest we just return 0,1,...,m-1; for smallest we reverse.
  // ------------------------------------------------------------------
  Eigen::VectorXi sortOrder(const Eigen::VectorXd &sigma) const
  {
    int m = (int)sigma.size();
    Eigen::VectorXi ord(m);
    if (largest) {
      for (int i = 0; i < m; ++i) ord(i) = i;
    } else {
      for (int i = 0; i < m; ++i) ord(i) = m - 1 - i;
    }
    return ord;
  }
  // ------------------------------------------------------------------
  // Extend the Lanczos bidiagonalization from pStart to kEnd steps.
  // On first call pStart==0 (V[0] already set).
  // On restart calls V[0..pStart], U[0..pStart-1], alpha[0..pStart-1],
  // beta[0..pStart-1] are already set; betaRestart is the coupling
  // beta_{pStart} that drives the first new U step.
  // ------------------------------------------------------------------
  void extendBasis(int pStart, int kEnd, RealD betaRestart)
  {
    // Truncate containers to pStart (Lattice has no default constructor)
    if ((int)V.size() > pStart + 1) V.erase(V.begin() + pStart + 1, V.end());
    if ((int)U.size() > pStart)     U.erase(U.begin() + pStart,     U.end());
    alpha.resize(pStart);
    beta.resize(pStart);
    Field p(Grid), r(Grid);
    for (int k = pStart; k < kEnd; ++k) {
      // p = A v_k
      Linop.Op(V[k], p);
      // Remove previous left vector coupling
      if (k > 0) {
        p = p - beta[k - 1] * U[k - 1];
      }
      // On the first step after a restart, beta[pStart-1] was already set;
      // but V[pStart] was already constructed including the beta correction,
      // so no extra subtraction needed here beyond the standard recurrence.
      // Reorthogonalize p against U, then alpha_k = ||p||, u_k = p/alpha_k
      reorthogonalize(p, U);
      RealD ak = std::sqrt(norm2(p));
      if (ak < 1.0e-14) {
        std::cout << GridLogMessage
                  << "IRLBA extendBasis: lucky breakdown at step " << k
                  << " (alpha = " << ak << ")" << std::endl;
        alpha.push_back(ak);
        Field zero(Grid); zero = Zero();
        U.push_back(zero);
        beta.push_back(0.0);
        V.push_back(zero);
        break;
      }
      alpha.push_back(ak);
      Field u(Grid);
      u = (1.0 / ak) * p;
      U.push_back(u);
      // r = A^dag u_k - alpha_k v_k, reorthogonalize, then beta_{k+1} = ||r||
      Linop.AdjOp(U[k], r);
      r = r - ak * V[k];
      reorthogonalize(r, V);
      RealD bk = std::sqrt(norm2(r));
      beta.push_back(bk);
      std::cout << GridLogMessage
                << "IRLBA extend step " << k
                << "  alpha = " << ak
                << "  beta  = " << bk << std::endl;
      // Always push v_{k+1} (needed as residual direction for restart)
      if (bk < 1.0e-14) {
        std::cout << GridLogMessage
                  << "IRLBA extendBasis: lucky breakdown (beta = 0) at step "
                  << k << std::endl;
        Field zero(Grid); zero = Zero();
        V.push_back(zero);
        break;
      }
      Field vnext(Grid);
      vnext = (1.0 / bk) * r;
      V.push_back(vnext);
      if (k == kEnd - 1) break;  // v_{k+1} pushed above; stop here
    }
  }
 public:
  // ------------------------------------------------------------------
  // Block reorthogonalization helpers.
  // Declared public because CUDA extended lambdas cannot live inside
  // private/protected member functions.
  //
  // batchInnerProducts: computes c[j] = <basis[j], vec> for all j
  //   in a single GPU pass (one accelerator_barrier instead of n).
  //   Queues n pairs of (per-site kernel, reduceKernel) to computeStream
  //   without intermediate CPU syncs, then syncs once at the end.
  //
  // batchUpdate: computes vec -= sum_j c[j]*basis[j] in one GPU kernel.
  //
  // reorthogonalize: two-pass Classical Gram-Schmidt (CGS2) using the
  //   two helpers above.  Each pass costs 2 GPU syncs (1 IP + 1 update)
  //   instead of 2n syncs per pass in the old sequential MGS.
  // ------------------------------------------------------------------
  void batchInnerProducts(const Field &vec,
                          const std::vector<Field> &basis,
                          std::vector<ComplexD> &c)
  {
    int n = (int)basis.size();
    c.resize(n);
    if (n == 0) return;
    typedef typename Field::vector_object         vobj;
    typedef decltype(innerProduct(vobj(), vobj())) inner_t;
    typedef decltype(basis[0].View(AcceleratorRead)) View;
    GridBase *grid = vec.Grid();
    uint64_t oSites = grid->oSites();
    uint64_t nsimd  = grid->Nsimd();
    // all_ip[j * oSites + ss] = per-site inner product of basis[j] and vec at site ss.
    // Layout: n contiguous blocks of oSites each.
    deviceVector<inner_t> all_ip((uint64_t)n * oSites);
    inner_t *all_ip_p = &all_ip[0];
    hostVector<View>   h_basis_v(n);
    deviceVector<View> d_basis_v(n);
    for (int j = 0; j < n; ++j) {
      h_basis_v[j] = basis[j].View(AcceleratorRead);
      acceleratorPut(d_basis_v[j], h_basis_v[j]);
    }
    View *basis_vp = &d_basis_v[0];
    // Queue n per-site kernels to the accelerator stream — no intermediate barriers.
    autoView(vec_v, vec, AcceleratorRead);
    for (int j = 0; j < n; ++j) {
      int      jj      = j;
      uint64_t oSites_ = oSites;
      accelerator_for(ss, oSites, nsimd, {
        auto x = coalescedRead(basis_vp[jj][ss]);
        auto y = coalescedRead(vec_v[ss]);
        coalescedWrite(all_ip_p[jj * oSites_ + ss], innerProduct(x, y));
      });
    }
    // ONE sync after all n kernels
    accelerator_barrier();
    // Copy all per-site results to host
    hostVector<inner_t> all_ip_h((uint64_t)n * oSites);
    acceleratorCopyFromDevice(all_ip_p, &all_ip_h[0], (uint64_t)n * oSites * sizeof(inner_t));
    // Reduce on host: sum over oSites, then collapse SIMD lanes via Reduce(TensorRemove(...))
    // TensorRemove strips the iSinglet tensor wrapper to expose the SIMD scalar type.
    // Reduce sums all nsimd lanes and returns a plain scalar (RealD or ComplexD).
    std::vector<ComplexD> raw(n);
    for (int j = 0; j < n; ++j) {
      inner_t sum = Zero();
      for (uint64_t ss = 0; ss < oSites; ++ss)
        sum += all_ip_h[(uint64_t)j * oSites + ss];
      raw[j] = ComplexD(Reduce(TensorRemove(sum)));
    }
    grid->GlobalSumVector(&raw[0], n);
    for (int j = 0; j < n; ++j) c[j] = raw[j];
    for (int j = 0; j < n; ++j) h_basis_v[j].ViewClose();
  }
  void batchUpdate(Field &vec,
                   const std::vector<Field> &basis,
                   const std::vector<ComplexD> &c)
  {
    int n = (int)basis.size();
    if (n == 0) return;
    typedef typename Field::vector_object vobj;
    typedef decltype(basis[0].View(AcceleratorRead)) View;
    GridBase *grid = vec.Grid();
    uint64_t oSites = grid->oSites();
    uint64_t nsimd  = grid->Nsimd();
    // Split complex coefficients into real/imag double arrays on device.
    // Using doubles avoids potential ComplexD-device-code compatibility issues.
    hostVector<double>   h_re(n), h_im(n);
    deviceVector<double> d_re(n), d_im(n);
    for (int k = 0; k < n; ++k) {
      h_re[k] = c[k].real();
      h_im[k] = c[k].imag();
    }
    acceleratorCopyToDevice(&h_re[0], &d_re[0], n * sizeof(double));
    acceleratorCopyToDevice(&h_im[0], &d_im[0], n * sizeof(double));
    double *re_p = &d_re[0];
    double *im_p = &d_im[0];
    // Basis views
    hostVector<View>   h_basis_v(n);
    deviceVector<View> d_basis_v(n);
    for (int k = 0; k < n; ++k) {
      h_basis_v[k] = basis[k].View(AcceleratorRead);
      acceleratorPut(d_basis_v[k], h_basis_v[k]);
    }
    View *basis_vp = &d_basis_v[0];
    // Single kernel: vec[ss] -= sum_k (re[k] + i*im[k]) * basis[k][ss]
    autoView(vec_v, vec, AcceleratorWrite);
    accelerator_for(ss, oSites, nsimd, {
      auto v = coalescedRead(vec_v[ss]);
      for (int k = 0; k < n; ++k) {
        auto b = coalescedRead(basis_vp[k][ss]);
        v = v - re_p[k] * b - timesI(im_p[k] * b);
      }
      coalescedWrite(vec_v[ss], v);
    });
    for (int k = 0; k < n; ++k) h_basis_v[k].ViewClose();
  }
  // ------------------------------------------------------------------
  // Full reorthogonalization using two-pass Classical Gram-Schmidt (CGS2).
  // Each pass calls batchInnerProducts (1 GPU sync) + batchUpdate (1 sync),
  // replacing the old 2n GPU syncs per pass from sequential MGS.
  // ------------------------------------------------------------------
  void reorthogonalize(Field &vec, const std::vector<Field> &basis)
  {
    if (basis.empty()) return;
    std::vector<ComplexD> c;
    for (int pass = 0; pass < 2; ++pass) {
      batchInnerProducts(vec, basis, c);
      batchUpdate(vec, basis, c);
    }
  }
  // ------------------------------------------------------------------
  // Implicit restart: given the Nm-step bidiagonalization and its SVD,
  // compress to Nk steps via implicit QR shifts applied to B_k.
  //
  // The "shifts" are the Nm - Nk singular values we want to deflate
  // (those NOT in the desired set).  We apply them as implicit QR steps
  // to the bidiagonal matrix, then update the lattice bases accordingly.
  //
  // After this call:
  //   V[0..Nk],  U[0..Nk-1],  alpha[0..Nk-1],  beta[0..Nk-1]  are updated.
  //   betaRestart  ← new beta_Nk coupling for the next extension.
  // ------------------------------------------------------------------
  void implicitRestart(int k, int p,
                       const Eigen::VectorXd &sigma,
                       const Eigen::MatrixXd &X,
                       const Eigen::MatrixXd &Y,
                       const Eigen::VectorXi &order,
                       RealD betaK,
                       RealD &betaRestart)
  {
    // Thick restart (Baglama & Reichel, Sec. 2.2):
    //
    // Given B_k = X Sigma Y^T, define the new p-step basis by:
    //   V^+_i = V_k * y_{order(i)}      (right sing. vec. of B_k)
    //   U^+_i = U_k * x_{order(i)}      (left  sing. vec. of B_k)
    //
    // Then A V^+_i = A V_k y_{order(i)} = U_k B_k y_{order(i)}
    //             = sigma_{order(i)} U_k x_{order(i)} = sigma_{order(i)} U^+_i
    //
    // So B_p^+ = diag(sigma_{order(0)}, ..., sigma_{order(p-1)}) — DIAGONAL,
    // all internal betas are zero.
    //
    // The residual coupling comes from A^dag U_k = V_k B_k^T + betaK V[k] e_{k-1}^T:
    //   A^dag U^+_{p-1} - sigma_{order(p-1)} V^+_{p-1}
    //     = V_k (B_k^T x_{order(p-1)} - sigma_{order(p-1)} y_{order(p-1)})
    //       + betaK * X(k-1, order(p-1)) * V[k]
    //     = betaK * X(k-1, order(p-1)) * V[k]   (since B_k^T x_j = sigma_j y_j)
    //
    // Therefore: betaRestart = |betaK * X(k-1, order(p-1))|
    //            V[p] = sign(X(k-1, order(p-1))) * V[k]
    // ---- Build new lattice vectors ----
    std::vector<Field> Vnew, Unew;
    Vnew.reserve(p + 1);
    Unew.reserve(p);
    for (int i = 0; i < p; ++i) {
      int idx = order(i);
      Field vi(Grid); vi = Zero();
      for (int j = 0; j < k; ++j)
        vi = vi + Y(j, idx) * V[j];
      Vnew.push_back(vi);
    }
    for (int i = 0; i < p; ++i) {
      int idx = order(i);
      Field ui(Grid); ui = Zero();
      for (int j = 0; j < k; ++j)
        ui = ui + X(j, idx) * U[j];
      Unew.push_back(ui);
    }
    // New v_{p} (0-indexed: V[p]) = sign * V[k]
    // From A^dag U_k = V_k B_k^T + betaK V[k] e_{k-1}^T:
    //   A^dag U^+_j - sigma_j V^+_j = betaK * X(k-1, order(j)) * V[k]
    // The last Ritz pair (j=p-1) defines betaRestart and the sign of V[p].
    // All p couplings (j=0..p-1) are stored in fvec so that buildFullB can
    // reconstruct the exact column p of U^dag A V after the next extension.
    RealD coeff = betaK * X(k - 1, order(p - 1));
    betaRestart  = std::abs(coeff);
    RealD sgn = (coeff >= 0.0) ? 1.0 : -1.0;
    fvec.resize(p);
    for (int j = 0; j < p; ++j)
      fvec[j] = betaK * X(k - 1, order(j)) * sgn;
    // fvec[p-1] == betaRestart by construction
    restart_col = p;
    Field vp(Grid);
    if (betaRestart > 1.0e-14) {
      vp = sgn * V[k];
    } else {
      betaRestart = 0.0;
      vp = Zero();
    }
    Vnew.push_back(vp);  // V[p]
    // ---- New alpha, beta ----
    // B_p^+ is diagonal: alpha^+_i = sigma_{order(i)}, all internal beta = 0
    std::vector<RealD> alpha_new(p), beta_new(p);
    for (int i = 0; i < p; ++i) alpha_new[i] = sigma(order(i));
    for (int i = 0; i < p - 1; ++i) beta_new[i] = 0.0;
    beta_new[p - 1] = betaRestart;
    // ---- Commit new state ----
    V = Vnew;
    U = Unew;
    alpha = alpha_new;
    beta  = beta_new;
    std::cout << GridLogMessage
              << "IRLBA restart: compressed to " << p << " steps,"
              << "  new beta_p = " << betaRestart << std::endl;
  }
  // ------------------------------------------------------------------
  // Extract the desired singular triplets into the public output vectors.
  // ------------------------------------------------------------------
  void extractTriplets(int m,
                       const Eigen::VectorXd &sigma,
                       const Eigen::MatrixXd &X,
                       const Eigen::MatrixXd &Y,
                       const Eigen::VectorXi &order,
                       int nout)
  {
    singularValues.resize(nout);
    leftVectors.clear();   leftVectors.reserve(nout);
    rightVectors.clear();  rightVectors.reserve(nout);
    for (int i = 0; i < nout; ++i) {
      int idx = order(i);
      singularValues[i] = sigma(idx);
      // Left singular vector of A:  svec_L = U_m * x_i
      Field svL(Grid); svL = Zero();
      for (int j = 0; j < m && j < (int)U.size(); ++j)
        svL = svL + X(j, idx) * U[j];
      leftVectors.push_back(svL);
      // Right singular vector of A:  svec_R = V_m * y_i
      Field svR(Grid); svR = Zero();
      for (int j = 0; j < m && j < (int)V.size(); ++j)
        svR = svR + Y(j, idx) * V[j];
      rightVectors.push_back(svR);
    }
  }
 };
 NAMESPACE_END(Grid);
 #endif
@@ -1,931 +0,0 @@
    /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
    Copyright (C) 2015
 Author: Chulwoo Jung <chulwoo@bnl.gov>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 #ifndef GRID_LANC_H
 #define GRID_LANC_H
 #include <string.h>		//memset
 #ifdef USE_LAPACK
 #ifdef USE_MKL
 #include<mkl_lapack.h>
 #else
 void LAPACK_dstegr (char *jobz, char *range, int *n, double *d, double *e,
 		    double *vl, double *vu, int *il, int *iu, double *abstol,
 		    int *m, double *w, double *z, int *ldz, int *isuppz,
 		    double *work, int *lwork, int *iwork, int *liwork,
 		    int *info);
 //#include <lapacke/lapacke.h>
 #endif
 #endif
 //#include <Grid/algorithms/densematrix/DenseMatrix.h>
 // eliminate temorary vector in calc()
 #define MEM_SAVE
 namespace Grid
 {
  struct Bisection
  {
 #if 0
    static void get_eig2 (int row_num, std::vector < RealD > &ALPHA,
 			  std::vector < RealD > &BETA,
 			  std::vector < RealD > &eig)
    {
      int i, j;
        std::vector < RealD > evec1 (row_num + 3);
        std::vector < RealD > evec2 (row_num + 3);
      RealD eps2;
        ALPHA[1] = 0.;
        BETHA[1] = 0.;
      for (i = 0; i < row_num - 1; i++)
 	{
 	  ALPHA[i + 1] = A[i * (row_num + 1)].real ();
 	  BETHA[i + 2] = A[i * (row_num + 1) + 1].real ();
 	}
      ALPHA[row_num] = A[(row_num - 1) * (row_num + 1)].real ();
        bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-10, 1e-10, evec1, eps2);
        bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-16, 1e-16, evec2, eps2);
      // Do we really need to sort here?
      int begin = 1;
      int end = row_num;
      int swapped = 1;
      while (swapped)
 	{
 	  swapped = 0;
 	  for (i = begin; i < end; i++)
 	    {
 	      if (mag (evec2[i]) > mag (evec2[i + 1]))
 		{
 		  swap (evec2 + i, evec2 + i + 1);
 		  swapped = 1;
 		}
 	    }
 	  end--;
 	  for (i = end - 1; i >= begin; i--)
 	    {
 	      if (mag (evec2[i]) > mag (evec2[i + 1]))
 		{
 		  swap (evec2 + i, evec2 + i + 1);
 		  swapped = 1;
 		}
 	    }
 	  begin++;
 	}
      for (i = 0; i < row_num; i++)
 	{
 	  for (j = 0; j < row_num; j++)
 	    {
 	      if (i == j)
 		H[i * row_num + j] = evec2[i + 1];
 	      else
 		H[i * row_num + j] = 0.;
 	    }
 	}
    }
 #endif
    static void bisec (std::vector < RealD > &c,
 		       std::vector < RealD > &b,
 		       int n,
 		       int m1,
 		       int m2,
 		       RealD eps1,
 		       RealD relfeh, std::vector < RealD > &x, RealD & eps2)
    {
      std::vector < RealD > wu (n + 2);
      RealD h, q, x1, xu, x0, xmin, xmax;
      int i, a, k;
      b[1] = 0.0;
      xmin = c[n] - fabs (b[n]);
      xmax = c[n] + fabs (b[n]);
      for (i = 1; i < n; i++)
 	{
 	  h = fabs (b[i]) + fabs (b[i + 1]);
 	  if (c[i] + h > xmax)
 	    xmax = c[i] + h;
 	  if (c[i] - h < xmin)
 	    xmin = c[i] - h;
 	}
      xmax *= 2.;
      eps2 = relfeh * ((xmin + xmax) > 0.0 ? xmax : -xmin);
      if (eps1 <= 0.0)
 	eps1 = eps2;
      eps2 = 0.5 * eps1 + 7.0 * (eps2);
      x0 = xmax;
      for (i = m1; i <= m2; i++)
 	{
 	  x[i] = xmax;
 	  wu[i] = xmin;
 	}
      for (k = m2; k >= m1; k--)
 	{
 	  xu = xmin;
 	  i = k;
 	  do
 	    {
 	      if (xu < wu[i])
 		{
 		  xu = wu[i];
 		  i = m1 - 1;
 		}
 	      i--;
 	    }
 	  while (i >= m1);
 	  if (x0 > x[k])
 	    x0 = x[k];
 	  while ((x0 - xu) > 2 * relfeh * (fabs (xu) + fabs (x0)) + eps1)
 	    {
 	      x1 = (xu + x0) / 2;
 	      a = 0;
 	      q = 1.0;
 	      for (i = 1; i <= n; i++)
 		{
 		  q =
 		    c[i] - x1 -
 		    ((q != 0.0) ? b[i] * b[i] / q : fabs (b[i]) / relfeh);
 		  if (q < 0)
 		    a++;
 		}
 //      printf("x1=%0.14e a=%d\n",x1,a);
 	      if (a < k)
 		{
 		  if (a < m1)
 		    {
 		      xu = x1;
 		      wu[m1] = x1;
 		    }
 		  else
 		    {
 		      xu = x1;
 		      wu[a + 1] = x1;
 		      if (x[a] > x1)
 			x[a] = x1;
 		    }
 		}
 	      else
 		x0 = x1;
 	    }
 	  printf ("x0=%0.14e xu=%0.14e k=%d\n", x0, xu, k);
 	  x[k] = (x0 + xu) / 2;
 	}
    }
  };
 /////////////////////////////////////////////////////////////
 // Implicitly restarted lanczos
 /////////////////////////////////////////////////////////////
  template < class Field > class SimpleLanczos
  {
    const RealD small = 1.0e-16;
  public:
    int lock;
    int get;
    int Niter;
    int converged;
    int Nstop;			// Number of evecs checked for convergence
    int Nk;			// Number of converged sought
    int Np;			// Np -- Number of spare vecs in kryloc space
    int Nm;			// Nm -- total number of vectors
    RealD OrthoTime;
    RealD eresid;
 //    SortEigen < Field > _sort;
    LinearFunction < Field > &_Linop;
 //    OperatorFunction < Field > &_poly;
    /////////////////////////
    // Constructor
    /////////////////////////
    void init (void)
    {
    };
 //    void Abort (int ff, std::vector < RealD > &evals, DenseVector < Denstd::vector  < RealD > >&evecs);
    SimpleLanczos (LinearFunction < Field > &Linop,	// op
 //		   OperatorFunction < Field > &poly,	// polynmial
 		   int _Nstop,	// sought vecs
 		   int _Nk,	// sought vecs
 		   int _Nm,	// spare vecs
 		   RealD _eresid,	// resid in lmdue deficit 
 		   int _Niter):	// Max iterations
      _Linop (Linop),
 //     _poly (poly),
      Nstop (_Nstop), Nk (_Nk), Nm (_Nm), eresid (_eresid), Niter (_Niter)
    {
      Np = Nm - Nk;
      assert (Np > 0);
    };
    /////////////////////////
    // Sanity checked this routine (step) against Saad.
    /////////////////////////
    void RitzMatrix (std::vector < Field > &evec, int k)
    {
      if (1)
 	return;
      GridBase *grid = evec[0].Grid();
      Field w (grid);
      std::cout << GridLogMessage << "RitzMatrix " << std::endl;
      for (int i = 0; i < k; i++)
 	{
 	  _Linop(evec[i], w);
 //      _poly(_Linop,evec[i],w);
 	  std::cout << GridLogMessage << "[" << i << "] ";
 	  for (int j = 0; j < k; j++)
 	    {
 	      ComplexD in = innerProduct (evec[j], w);
 	      if (fabs ((double) i - j) > 1)
 		{
 		  if (abs (in) > 1.0e-9)
 		    {
 		      std::cout << GridLogMessage << "oops" << std::endl;
 		      abort ();
 		    }
 		  else
 		    std::cout << GridLogMessage << " 0 ";
 		}
 	      else
 		{
 		  std::cout << GridLogMessage << " " << in << " ";
 		}
 	    }
 	  std::cout << GridLogMessage << std::endl;
 	}
    }
    void step (std::vector < RealD > &lmd,
 	       std::vector < RealD > &lme,
 	       Field & last, Field & current, Field & next, uint64_t k)
    {
      if (lmd.size () <= k)
 	lmd.resize (k + Nm);
      if (lme.size () <= k)
 	lme.resize (k + Nm);
 //      _poly(_Linop,current,next );   // 3. wk:=Avk−βkv_{k−1}
      _Linop(current, next);	// 3. wk:=Avk−βkv_{k−1}
      if (k > 0)
 	{
 	  next -= lme[k - 1] * last;
 	}
 //      std::cout<<GridLogMessage << "<last|next>" << innerProduct(last,next) <<std::endl;
      ComplexD zalph = innerProduct (current, next);	// 4. αk:=(wk,vk)
      RealD alph = real (zalph);
      next = next - alph * current;	// 5. wk:=wk−αkvk
 //      std::cout<<GridLogMessage << "<current|next>" << innerProduct(current,next) <<std::endl;
      RealD beta = normalise (next);	// 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
      // 7. vk+1 := wk/βk+1
 //       norm=beta;
      int interval = Nm / 100 + 1;
      if ((k % interval) == 0)
 	std::
 	  cout << GridLogMessage << k << " : alpha = " << zalph << " beta " <<
 	  beta << std::endl;
      const RealD tiny = 1.0e-20;
      if (beta < tiny)
 	{
 	  std::cout << GridLogMessage << " beta is tiny " << beta << std::
 	    endl;
 	}
      lmd[k] = alph;
      lme[k] = beta;
    }
    void qr_decomp (std::vector < RealD > &lmd,
 		    std::vector  < RealD > &lme,
 		    int Nk,
 		    int Nm,
 		    std::vector  < RealD > &Qt, RealD Dsh, int kmin, int kmax)
    {
      int k = kmin - 1;
      RealD x;
      RealD Fden = 1.0 / hypot (lmd[k] - Dsh, lme[k]);
      RealD c = (lmd[k] - Dsh) * Fden;
      RealD s = -lme[k] * Fden;
      RealD tmpa1 = lmd[k];
      RealD tmpa2 = lmd[k + 1];
      RealD tmpb = lme[k];
      lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
      lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
      lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
      x = -s * lme[k + 1];
      lme[k + 1] = c * lme[k + 1];
      for (int i = 0; i < Nk; ++i)
 	{
 	  RealD Qtmp1 = Qt[i + Nm * k];
 	  RealD Qtmp2 = Qt[i + Nm * (k + 1)];
 	  Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
 	  Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
 	}
      // Givens transformations
      for (int k = kmin; k < kmax - 1; ++k)
 	{
 	  RealD Fden = 1.0 / hypot (x, lme[k - 1]);
 	  RealD c = lme[k - 1] * Fden;
 	  RealD s = -x * Fden;
 	  RealD tmpa1 = lmd[k];
 	  RealD tmpa2 = lmd[k + 1];
 	  RealD tmpb = lme[k];
 	  lmd[k] = c * c * tmpa1 + s * s * tmpa2 - 2.0 * c * s * tmpb;
 	  lmd[k + 1] = s * s * tmpa1 + c * c * tmpa2 + 2.0 * c * s * tmpb;
 	  lme[k] = c * s * (tmpa1 - tmpa2) + (c * c - s * s) * tmpb;
 	  lme[k - 1] = c * lme[k - 1] - s * x;
 	  if (k != kmax - 2)
 	    {
 	      x = -s * lme[k + 1];
 	      lme[k + 1] = c * lme[k + 1];
 	    }
 	  for (int i = 0; i < Nk; ++i)
 	    {
 	      RealD Qtmp1 = Qt[i + Nm * k];
 	      RealD Qtmp2 = Qt[i + Nm * (k + 1)];
 	      Qt[i + Nm * k] = c * Qtmp1 - s * Qtmp2;
 	      Qt[i + Nm * (k + 1)] = s * Qtmp1 + c * Qtmp2;
 	    }
 	}
    }
 #if 0
 #ifdef USE_LAPACK
 #ifdef USE_MKL
 #define LAPACK_INT MKL_INT
 #else
 #define LAPACK_INT long long
 #endif
    void diagonalize_lapack (std::vector  < RealD > &lmd, std::vector  < RealD > &lme, int N1,	// all
 			     int N2,	// get
 			     GridBase * grid)
    {
      const int size = Nm;
      LAPACK_INT NN = N1;
      double evals_tmp[NN];
      double DD[NN];
      double EE[NN];
      for (int i = 0; i < NN; i++)
 	for (int j = i - 1; j <= i + 1; j++)
 	  if (j < NN && j >= 0)
 	    {
 	      if (i == j)
 		DD[i] = lmd[i];
 	      if (i == j)
 		evals_tmp[i] = lmd[i];
 	      if (j == (i - 1))
 		EE[j] = lme[j];
 	    }
      LAPACK_INT evals_found;
      LAPACK_INT lwork =
 	((18 * NN) >
 	 (1 + 4 * NN + NN * NN) ? (18 * NN) : (1 + 4 * NN + NN * NN));
      LAPACK_INT liwork = 3 + NN * 10;
      LAPACK_INT iwork[liwork];
      double work[lwork];
      LAPACK_INT isuppz[2 * NN];
      char jobz = 'N';		// calculate evals only
      char range = 'I';		// calculate il-th to iu-th evals
      //    char range = 'A'; // calculate all evals
      char uplo = 'U';		// refer to upper half of original matrix
      char compz = 'I';		// Compute eigenvectors of tridiagonal matrix
      int ifail[NN];
      LAPACK_INT info;
 //  int total = QMP_get_number_of_nodes();
 //  int node = QMP_get_node_number();
 //  GridBase *grid = evec[0]._grid;
      int total = grid->_Nprocessors;
      int node = grid->_processor;
      int interval = (NN / total) + 1;
      double vl = 0.0, vu = 0.0;
      LAPACK_INT il = interval * node + 1, iu = interval * (node + 1);
      if (iu > NN)
 	iu = NN;
      double tol = 0.0;
      if (1)
 	{
 	  memset (evals_tmp, 0, sizeof (double) * NN);
 	  if (il <= NN)
 	    {
 	      printf ("total=%d node=%d il=%d iu=%d\n", total, node, il, iu);
 #ifdef USE_MKL
 	      dstegr (&jobz, &range, &NN,
 #else
 	      LAPACK_dstegr (&jobz, &range, &NN,
 #endif
 			     (double *) DD, (double *) EE, &vl, &vu, &il, &iu,	// these four are ignored if second parameteris 'A'
 			     &tol,	// tolerance
 			     &evals_found, evals_tmp, (double *) NULL, &NN,
 			     isuppz, work, &lwork, iwork, &liwork, &info);
 	      for (int i = iu - 1; i >= il - 1; i--)
 		{
 		  printf ("node=%d evals_found=%d evals_tmp[%d] = %g\n", node,
 			  evals_found, i - (il - 1), evals_tmp[i - (il - 1)]);
 		  evals_tmp[i] = evals_tmp[i - (il - 1)];
 		  if (il > 1)
 		    evals_tmp[i - (il - 1)] = 0.;
 		}
 	    }
 	  {
 	    grid->GlobalSumVector (evals_tmp, NN);
 	  }
 	}
 // cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
    }
 #undef LAPACK_INT
 #endif
    void diagonalize (std::vector  < RealD > &lmd,
 		      std::vector  < RealD > &lme,
 		      int N2, int N1, GridBase * grid)
    {
 #ifdef USE_LAPACK
      const int check_lapack = 0;	// just use lapack if 0, check against lapack if 1
      if (!check_lapack)
 	return diagonalize_lapack (lmd, lme, N2, N1, grid);
 //      diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
 #endif
    }
 #endif
    static RealD normalise (Field & v)
    {
      RealD nn = norm2 (v);
      nn = sqrt (nn);
      v = v * (1.0 / nn);
      return nn;
    }
    void orthogonalize (Field & w, std::vector < Field > &evec, int k)
    {
      double t0 = -usecond () / 1e6;
      typedef typename Field::scalar_type MyComplex;
      MyComplex ip;
      if (0)
 	{
 	  for (int j = 0; j < k; ++j)
 	    {
 	      normalise (evec[j]);
 	      for (int i = 0; i < j; i++)
 		{
 		  ip = innerProduct (evec[i], evec[j]);	// are the evecs normalised? ; this assumes so.
 		  evec[j] = evec[j] - ip * evec[i];
 		}
 	    }
 	}
      for (int j = 0; j < k; ++j)
 	{
 	  ip = innerProduct (evec[j], w);	// are the evecs normalised? ; this assumes so.
 	  w = w - ip * evec[j];
 	}
      normalise (w);
      t0 += usecond () / 1e6;
      OrthoTime += t0;
    }
    void setUnit_Qt (int Nm, std::vector < RealD > &Qt)
    {
      for (int i = 0; i < Qt.size (); ++i)
 	Qt[i] = 0.0;
      for (int k = 0; k < Nm; ++k)
 	Qt[k + k * Nm] = 1.0;
    }
    void calc (std::vector < RealD > &eval, const Field & src, int &Nconv)
    {
      GridBase *grid = src.Grid();
 //      assert(grid == src._grid);
      std::
 	cout << GridLogMessage << " -- Nk = " << Nk << " Np = " << Np << std::
 	endl;
      std::cout << GridLogMessage << " -- Nm = " << Nm << std::endl;
      std::cout << GridLogMessage << " -- size of eval   = " << eval.
 	size () << std::endl;
 //      assert(c.size() && Nm == eval.size());
      std::vector < RealD > lme (Nm);
      std::vector < RealD > lmd (Nm);
      Field current (grid);
      Field last (grid);
      Field next (grid);
      Nconv = 0;
      RealD beta_k;
      // Set initial vector
      // (uniform vector) Why not src??
      //      evec[0] = 1.0;
      current = src;
      std::cout << GridLogMessage << "norm2(src)= " << norm2 (src) << std::
 	endl;
      normalise (current);
      std::
 	cout << GridLogMessage << "norm2(evec[0])= " << norm2 (current) <<
 	std::endl;
      // Initial Nk steps
      OrthoTime = 0.;
      double t0 = usecond () / 1e6;
      RealD norm;		// sqrt norm of last vector
      uint64_t iter = 0;
      bool initted = false;
      std::vector < RealD > low (Nstop * 10);
      std::vector < RealD > high (Nstop * 10);
      RealD cont = 0.;
      while (1) {
 	  cont = 0.;
 	  std::vector < RealD > lme2 (Nm);
 	  std::vector < RealD > lmd2 (Nm);
 	  for (uint64_t k = 0; k < Nm; ++k, iter++) {
 	      step (lmd, lme, last, current, next, iter);
 	      last = current;
 	      current = next;
 	    }
 	  double t1 = usecond () / 1e6;
 	  std::cout << GridLogMessage << "IRL::Initial steps: " << t1 -
 	    t0 << "seconds" << std::endl;
 	  t0 = t1;
 	  std::
 	    cout << GridLogMessage << "IRL::Initial steps:OrthoTime " <<
 	    OrthoTime << "seconds" << std::endl;
 	  // getting eigenvalues
 	  lmd2.resize (iter + 2);
 	  lme2.resize (iter + 2);
 	  for (uint64_t k = 0; k < iter; ++k) {
 	      lmd2[k + 1] = lmd[k];
 	      lme2[k + 2] = lme[k];
 	    }
 	  t1 = usecond () / 1e6;
 	  std::cout << GridLogMessage << "IRL:: copy: " << t1 -
 	    t0 << "seconds" << std::endl;
 	  t0 = t1;
 	  {
 	    int total = grid->_Nprocessors;
 	    int node = grid->_processor;
 	    int interval = (Nstop / total) + 1;
 	    int iu = (iter + 1) - (interval * node + 1);
 	    int il = (iter + 1) - (interval * (node + 1));
 	    std::vector < RealD > eval2 (iter + 3);
 	    RealD eps2;
 	    Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
 			      eps2);
 //        diagonalize(eval2,lme2,iter,Nk,grid);
 	    RealD diff = 0.;
 	    for (int i = il; i <= iu; i++) {
 		if (initted)
 		  diff =
 		    fabs (eval2[i] - high[iu-i]) / (fabs (eval2[i]) +
 						      fabs (high[iu-i]));
 		if (initted && (diff > eresid))
 		  cont = 1.;
 		if (initted)
 		  printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
 			  high[iu-i], diff);
 		high[iu-i] = eval2[i];
 	      }
 	    il = (interval * node + 1);
 	    iu = (interval * (node + 1));
 	    Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2,
 			      eps2);
 	    for (int i = il; i <= iu; i++) {
 		if (initted)
 		  diff =
 		    fabs (eval2[i] - low[i]) / (fabs (eval2[i]) +
 						fabs (low[i]));
 		if (initted && (diff > eresid))
 		  cont = 1.;
 		if (initted)
 		  printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i],
 			  low[i], diff);
 		low[i] = eval2[i];
 	      }
 	    t1 = usecond () / 1e6;
 	    std::cout << GridLogMessage << "IRL:: diagonalize: " << t1 -
 	      t0 << "seconds" << std::endl;
 	    t0 = t1;
 	  }
 	  for (uint64_t k = 0; k < Nk; ++k) {
 //          eval[k] = eval2[k];
 	    }
 	  if (initted)
 	    {
 	      grid->GlobalSumVector (&cont, 1);
 	      if (cont < 1.) return;
 	    }
 	  initted = true;
 	}
    }
 #if 0
 /**
   There is some matrix Q such that for any vector y
   Q.e_1 = y and Q is unitary.
 **/
    template < class T >
      static T orthQ (DenseMatrix < T > &Q, std::vector < T > y)
    {
      int N = y.size ();	//Matrix Size
      Fill (Q, 0.0);
      T tau;
      for (int i = 0; i < N; i++)
 	{
 	  Q[i][0] = y[i];
 	}
      T sig = conj (y[0]) * y[0];
      T tau0 = fabs (sqrt (sig));
      for (int j = 1; j < N; j++)
 	{
 	  sig += conj (y[j]) * y[j];
 	  tau = abs (sqrt (sig));
 	  if (abs (tau0) > 0.0)
 	    {
 	      T gam = conj ((y[j] / tau) / tau0);
 	      for (int k = 0; k <= j - 1; k++)
 		{
 		  Q[k][j] = -gam * y[k];
 		}
 	      Q[j][j] = tau0 / tau;
 	    }
 	  else
 	    {
 	      Q[j - 1][j] = 1.0;
 	    }
 	  tau0 = tau;
 	}
      return tau;
    }
 /**
 	There is some matrix Q such that for any vector y
 	Q.e_k = y and Q is unitary.
 **/
    template < class T >
      static T orthU (DenseMatrix < T > &Q, std::vector < T > y)
    {
      T tau = orthQ (Q, y);
      SL (Q);
      return tau;
    }
 /**
 	Wind up with a matrix with the first con rows untouched
 say con = 2
 	Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum
 	and the matrix is upper hessenberg
 	and with f and Q appropriately modidied with Q is the arnoldi factorization
 **/
    template < class T > static void Lock (DenseMatrix < T > &H,	///Hess mtx     
 					   DenseMatrix < T > &Q,	///Lock Transform
 					   T val,	///value to be locked
 					   int con,	///number already locked
 					   RealD small, int dfg, bool herm)
    {
      //ForceTridiagonal(H);
      int M = H.dim;
      DenseVector < T > vec;
      Resize (vec, M - con);
      DenseMatrix < T > AH;
      Resize (AH, M - con, M - con);
      AH = GetSubMtx (H, con, M, con, M);
      DenseMatrix < T > QQ;
      Resize (QQ, M - con, M - con);
      Unity (Q);
      Unity (QQ);
      DenseVector < T > evals;
      Resize (evals, M - con);
      DenseMatrix < T > evecs;
      Resize (evecs, M - con, M - con);
      Wilkinson < T > (AH, evals, evecs, small);
      int k = 0;
      RealD cold = abs (val - evals[k]);
      for (int i = 1; i < M - con; i++)
 	{
 	  RealD cnew = abs (val - evals[i]);
 	  if (cnew < cold)
 	    {
 	      k = i;
 	      cold = cnew;
 	    }
 	}
      vec = evecs[k];
      ComplexD tau;
      orthQ (QQ, vec);
      //orthQM(QQ,AH,vec);
      AH = Hermitian (QQ) * AH;
      AH = AH * QQ;
      for (int i = con; i < M; i++)
 	{
 	  for (int j = con; j < M; j++)
 	    {
 	      Q[i][j] = QQ[i - con][j - con];
 	      H[i][j] = AH[i - con][j - con];
 	    }
 	}
      for (int j = M - 1; j > con + 2; j--)
 	{
 	  DenseMatrix < T > U;
 	  Resize (U, j - 1 - con, j - 1 - con);
 	  DenseVector < T > z;
 	  Resize (z, j - 1 - con);
 	  T nm = norm (z);
 	  for (int k = con + 0; k < j - 1; k++)
 	    {
 	      z[k - con] = conj (H (j, k + 1));
 	    }
 	  normalise (z);
 	  RealD tmp = 0;
 	  for (int i = 0; i < z.size () - 1; i++)
 	    {
 	      tmp = tmp + abs (z[i]);
 	    }
 	  if (tmp < small / ((RealD) z.size () - 1.0))
 	    {
 	      continue;
 	    }
 	  tau = orthU (U, z);
 	  DenseMatrix < T > Hb;
 	  Resize (Hb, j - 1 - con, M);
 	  for (int a = 0; a < M; a++)
 	    {
 	      for (int b = 0; b < j - 1 - con; b++)
 		{
 		  T sum = 0;
 		  for (int c = 0; c < j - 1 - con; c++)
 		    {
 		      sum += H[a][con + 1 + c] * U[c][b];
 		    }		//sum += H(a,con+1+c)*U(c,b);}
 		  Hb[b][a] = sum;
 		}
 	    }
 	  for (int k = con + 1; k < j; k++)
 	    {
 	      for (int l = 0; l < M; l++)
 		{
 		  H[l][k] = Hb[k - 1 - con][l];
 		}
 	    }			//H(Hb[k-1-con][l] , l,k);}}
 	  DenseMatrix < T > Qb;
 	  Resize (Qb, M, M);
 	  for (int a = 0; a < M; a++)
 	    {
 	      for (int b = 0; b < j - 1 - con; b++)
 		{
 		  T sum = 0;
 		  for (int c = 0; c < j - 1 - con; c++)
 		    {
 		      sum += Q[a][con + 1 + c] * U[c][b];
 		    }		//sum += Q(a,con+1+c)*U(c,b);}
 		  Qb[b][a] = sum;
 		}
 	    }
 	  for (int k = con + 1; k < j; k++)
 	    {
 	      for (int l = 0; l < M; l++)
 		{
 		  Q[l][k] = Qb[k - 1 - con][l];
 		}
 	    }			//Q(Qb[k-1-con][l] , l,k);}}
 	  DenseMatrix < T > Hc;
 	  Resize (Hc, M, M);
 	  for (int a = 0; a < j - 1 - con; a++)
 	    {
 	      for (int b = 0; b < M; b++)
 		{
 		  T sum = 0;
 		  for (int c = 0; c < j - 1 - con; c++)
 		    {
 		      sum += conj (U[c][a]) * H[con + 1 + c][b];
 		    }		//sum += conj( U(c,a) )*H(con+1+c,b);}
 		  Hc[b][a] = sum;
 		}
 	    }
 	  for (int k = 0; k < M; k++)
 	    {
 	      for (int l = con + 1; l < j; l++)
 		{
 		  H[l][k] = Hc[k][l - 1 - con];
 		}
 	    }			//H(Hc[k][l-1-con] , l,k);}}
 	}
    }
 #endif
  };
 }
 #endif
@@ -260,8 +260,7 @@ class GridLimeReader : public BinaryIO {
              << " / field= " << n2ck << " / rdiff= " << GRID_FIELD_NORM_CALC(FieldNormMetaData_,n2ck) << std::endl;
 	  GRID_FIELD_NORM_CHECK(FieldNormMetaData_,n2ck);
 	}
-//	assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
+	assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
 	scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
 	// find out if next field is a GridFieldNorm
 	return;
@@ -122,7 +122,7 @@ public:
    field.checksum = std::stoul(header["CHECKSUM"],0,16);
    field.ensemble_id      = header["ENSEMBLE_ID"];
    field.ensemble_label   = header["ENSEMBLE_LABEL"];
-//    field.sequence_number  = std::stol(header["SEQUENCE_NUMBER"]);
+    field.sequence_number  = std::stol(header["SEQUENCE_NUMBER"]);
    field.creator          = header["CREATOR"];
    field.creator_hardware = header["CREATOR_HARDWARE"];
    field.creation_date    = header["CREATION_DATE"];
@@ -28,11 +28,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 /*  END LEGAL */
 #pragma once 
 #ifndef MIN
 #define MIN(x,y) ((x)>(y)?(y):(x))
 #endif
 // Introduce a class to gain deterministic bit reproducible reduction.
 // make static; perhaps just a namespace is required.
 NAMESPACE_BEGIN(Grid);
@@ -877,7 +877,7 @@ int main (int argc, char ** argv)
  int do_su4=0;
  int do_memory=1;
  int do_comms =1;
-  int do_blas  =0;
+  int do_blas  =1;
  int do_dslash=1;
  int sel=4;
@@ -174,7 +174,7 @@ esac
 ############### fermions
 AC_ARG_ENABLE([fermion-instantiations],
     [AS_HELP_STRING([--enable-fermion-instantiations=yes|no],[enable fermion instantiations])],
-     [ac_FERMION_INSTANTIATIONS=${enable_fermion_instantiations}], [ac_FERMION_INSTANTIATIONS=yes])
+     [ac_FERMION_REPS=${enable_fermion_instantiations}], [ac_FERMION_INSTANTIATIONS=yes])
 AM_CONDITIONAL(BUILD_FERMION_INSTANTIATIONS, [ test "${ac_FERMION_INSTANTIATIONS}X" == "yesX" ])
@@ -1,430 +0,0 @@
 /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/Test_padded_cell.cc
    Copyright (C) 2023
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 // copied here from Test_general_coarse_pvdagm.cc
 #include <cstdlib>
 #include <Grid/Grid.h>
 #include <Grid/lattice/PaddedCell.h>
 #include <Grid/stencil/GeneralLocalStencil.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
 #include <Grid/algorithms/iterative/BiCGSTAB.h>
 using namespace std;
 using namespace Grid;
 // Hermitize a DWF operator by squaring it
 template<class Matrix,class Field>
 class SquaredLinearOperator : public LinearOperatorBase<Field> {
  public:
  Matrix &_Mat;
  public:
    SquaredLinearOperator(Matrix &Mat): _Mat(Mat) {};
    void OpDiag (const Field &in, Field &out) {    assert(0);  }
    void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
    void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
    void Op     (const Field &in, Field &out){
      // std::cout << "Op is overloaded as HermOp" << std::endl;
      HermOp(in, out);
    }
    void AdjOp     (const Field &in, Field &out){
      HermOp(in, out);
    }
    void _Op     (const Field &in, Field &out){
      // std::cout << "Op: M "<<std::endl;
      _Mat.M(in, out);
    }
    void _AdjOp     (const Field &in, Field &out){
      // std::cout << "AdjOp: Mdag "<<std::endl;
      _Mat.Mdag(in, out);
    }
    void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
    void HermOp(const Field &in, Field &out){
      // std::cout << "HermOp: Mdag M Mdag M"<<std::endl;
      Field tmp(in.Grid());
      _Op(in,tmp);
      _AdjOp(tmp,out);
    }
 };
 template<class Matrix,class Field>
 class PVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
 public:
  PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(in,tmp);
    _Mat.Mdag(tmp,out);
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    //    _Mat.M(in,tmp);
    //    _PV.Mdag(tmp,out);
    //    _PV.M(out,tmp);
    //    _Mat.Mdag(tmp,out);
    Op(in,tmp);
    AdjOp(tmp,out);
    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
  }
 };
 template<class Matrix,class Field>
 class ShiftedPVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
  RealD shift;
 public:
  ShiftedPVdagMLinearOperator(RealD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(tmp,out);
    _Mat.Mdag(in,tmp);
    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    Op(in,tmp);
    AdjOp(tmp,out);
  }
 };
 template<class Matrix, class Field>
 class ShiftedComplexPVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
  ComplexD shift;
 public:
 ShiftedComplexPVdagMLinearOperator(ComplexD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(tmp,out);
    _Mat.Mdag(in,tmp);
    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    Op(in,tmp);
    AdjOp(tmp,out);
  }
  void resetShift(ComplexD newShift) {
    shift = newShift;
  }
 };
 template<class Fobj,class CComplex,int nbasis>
 class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
 public:
  using LinearFunction<Lattice<Fobj> >::operator();
  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
  typedef LinearOperatorBase<FineField>                            FineOperator;
  typedef LinearFunction    <FineField>                            FineSmoother;
  typedef LinearOperatorBase<CoarseVector>                         CoarseOperator;
  typedef LinearFunction    <CoarseVector>                         CoarseSolver;
  Aggregates     & _Aggregates;
  FineOperator   & _FineOperator;
  FineSmoother   & _PreSmoother;
  FineSmoother   & _PostSmoother;
  CoarseOperator & _CoarseOperator;
  CoarseSolver   & _CoarseSolve;
  int    level;  void Level(int lv) {level = lv; };
  MGPreconditioner(Aggregates &Agg,
 		   FineOperator &Fine,
 		   FineSmoother &PreSmoother,
 		   FineSmoother &PostSmoother,
 		   CoarseOperator &CoarseOperator_,
 		   CoarseSolver &CoarseSolve_)
    : _Aggregates(Agg),
      _FineOperator(Fine),
      _PreSmoother(PreSmoother),
      _PostSmoother(PostSmoother),
      _CoarseOperator(CoarseOperator_),
      _CoarseSolve(CoarseSolve_),
      level(1)  {  }
  virtual void operator()(const FineField &in, FineField & out) 
  {
    GridBase *CoarseGrid = _Aggregates.CoarseGrid;
    //    auto CoarseGrid = _CoarseOperator.Grid();
    CoarseVector Csrc(CoarseGrid);
    CoarseVector Csol(CoarseGrid);
    FineField vec1(in.Grid());
    FineField vec2(in.Grid());
    std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
    double t;
    // Fine Smoother
    //    out = in;
    out = Zero();
    t=-usecond();
    _PreSmoother(in,out);
    t+=usecond();
    std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
    // Update the residual
    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
    //    std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
    // Fine to Coarse 
    t=-usecond();
    _Aggregates.ProjectToSubspace  (Csrc,vec1);
    t+=usecond();
    std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
    // Coarse correction
    t=-usecond();
    Csol = Zero();
    _CoarseSolve(Csrc,Csol);
    //Csol=Zero();
    t+=usecond();
    std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
    // Coarse to Fine
    t=-usecond();  
    //    _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
    _Aggregates.PromoteFromSubspace(Csol,vec1); 
    add(out,out,vec1);
    t+=usecond();
    std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
    // Residual
    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
    //    std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
    // Fine Smoother
    t=-usecond();
    //    vec2=vec1;
    vec2=Zero();
    _PostSmoother(vec1,vec2);
    t+=usecond();
    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
    add( out,out,vec2);
    std::cout<<GridLogMessage << "Done " <<std::endl;
  }
 };
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  const int Ls=16;
 //   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
  std::vector<int> lat_size {16, 16, 16, 32};
  std::cout << "Lattice size: " << lat_size << std::endl;
  GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(lat_size, 
 								          GridDefaultSimd(Nd,vComplex::Nsimd()),
 								          GridDefaultMpi());
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
  // Construct a coarsened grid
  // poare TODO: replace this with the following line?
  Coordinate clatt = lat_size;
 //   Coordinate clatt = GridDefaultLatt();              // [PO] initial line before I edited it
  for(int d=0;d<clatt.size();d++){
    clatt[d] = clatt[d]/2;
    //    clatt[d] = clatt[d]/4;
  }
  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
  GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
  std::vector<int> seeds4({1,2,3,4});
  std::vector<int> seeds5({5,6,7,8});
  std::vector<int> cseeds({5,6,7,8});
  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
  LatticeFermion    src(FGrid); random(RNG5,src);
  LatticeFermion result(FGrid); result=Zero();
  LatticeFermion    ref(FGrid); ref=Zero();
  LatticeFermion    tmp(FGrid);
  LatticeFermion    err(FGrid);
  LatticeGaugeField Umu(UGrid);
  FieldMetaData header;
 //   std::string file("ckpoint_lat.4000");
  std::string file("/Users/patrickoare/libraries/PETSc-Grid/ckpoint_lat.4000");
  NerscIO::readConfiguration(Umu,header,file);
  RealD mass=0.01;
  RealD M5=1.8;
  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
  DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
  // const int nbasis = 20;            // size of approximate basis for low-mode space
  const int nbasis = 3;            // size of approximate basis for low-mode space
  const int cb = 0 ;
  LatticeFermion prom(FGrid);
  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
  typedef LittleDiracOperator::CoarseVector CoarseVector;
  NextToNearestStencilGeometry5D geom(Coarse5d);
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
  typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
  typedef ShiftedComplexPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedComplexPVdagM_t;
  PVdagM_t PVdagM(Ddwf, Dpv);
  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv);
  SquaredLinearOperator<DomainWallFermionD, LatticeFermionD> Dsq (Ddwf);
  NonHermitianLinearOperator<DomainWallFermionD, LatticeFermionD> DLinOp (Ddwf);
  // PowerMethod<LatticeFermion> PM; PM(PVdagM, src);
  int Nm = 10;
  int Nk = 6;
  // int Nm = 6;         // Nm = 6 case is acting really strangely... with Nm = 6 and Nm = 3 it zeros out the Hessenberg and also makes it imaginary?
  // int Nk = 2;
  // int Nk = Nm+1;     // if just running once
  // int maxIter = 5;
  // int maxIter = 1;
  int maxIter = 3;
  // int maxIter = 100;
  int Nstop = 6;
  Coordinate origin ({0,0,0,0});
  auto tmpSrc = peekSite(src, origin);
  std::cout << "[DEBUG] Source at origin = " <<  tmpSrc << std::endl;
  LatticeFermion src2 = src;
  // Run Lanczos and Arnoldi on a Hermitian matrix
  // Arnoldi Arn (Dsq, FGrid, 1e-8, false);
  // Arn(src, 1, Nm, -1);
  Arnoldi Arn (Dsq, FGrid, 1e-8, EvalNormLarge);      // for comparison to Lanczos
  // Arn(src, maxIter, Nm, Nk, Nstop);
  // auto tmpSrcDup = peekSite(src, origin);
  // std::cout << "[DEBUG] Source at origin = " <<  tmpSrcDup << std::endl;
  // auto tmpSrc2Dup = peekSite(src2, origin);
  // std::cout << "[DEBUG] Source2 at origin = " <<  tmpSrc2Dup << std::endl;
  Arn(src, maxIter, Nm, Nk, Nstop);
  std::cout << "Hessenberg mat for symmetric N = " << Nm << std::endl;
  std::cout << Arn.getHessenbergMat() << std::endl;
  // ImplicitlyRestartedLanczosHermOpTester<LatticeFermionD> SimpleTester (Dsq);
  // ImplicitlyRestartedLanczos<LatticeFermionD> Lanc (Dsq, Dsq, SimpleTester, Nm, Nm, Nm, 1e-8, Nm);
  int Nconv;
  PlainHermOp DsqHermOp (Dsq);
  // std::vector<RealD> levals (Nm+1); std::vector<LatticeFermionD> levecs (Nm+1, src);
  // ImplicitlyRestartedLanczos<LatticeFermionD> Lanc (DsqHermOp, DsqHermOp, Nm, Nm, Nm + 1, 1e-8, Nm);
  std::vector<RealD> levals (Nm+1); std::vector<LatticeFermionD> levecs (Nm, src);
  ImplicitlyRestartedLanczos<LatticeFermionD> Lanc (DsqHermOp, DsqHermOp, Nstop, Nk, Nm, 1e-8, maxIter);
  std::cout << GridLogMessage << "Calculating with Lanczos" << std::endl;
  // auto tmpSrc1 = peekSite(src, origin);
  // std::cout << "[DEBUG] Source at origin = " <<  tmpSrc1 << std::endl;
  // auto tmpSrc2 = peekSite(src2, origin);
  // std::cout << "[DEBUG] Source2 at origin = " <<  tmpSrc2 << std::endl;
  // std::cout << "[DEBUG] Source norm2: " << norm2(src) << std::endl;
  std::cout << "running Lanczos now" << std::endl;
  Lanc.calc(levals, levecs, src2, Nconv);
  std::cout<<GridLogMessage << "*******************************************" << std::endl;
  std::cout<<GridLogMessage << "***************** RESULTS *****************" << std::endl;
  std::cout<<GridLogMessage << "*******************************************" << std::endl;
  std::cout << GridLogMessage << "Arnoldi eigenvalues: " << std::endl << Arn.getEvals() << std::endl;
  std::cout << GridLogMessage << "Lanczos eigenvalues: " << std::endl << levals << std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage << "Done "<< std::endl;
  Grid_finalize();
  return 0;
 }
@@ -1,405 +0,0 @@
 /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/Test_padded_cell.cc
    Copyright (C) 2023
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 // Tests code written to read off the Krylov coefficients
 #include <Grid/Grid.h>
 #include <Grid/lattice/PaddedCell.h>
 #include <Grid/stencil/GeneralLocalStencil.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
 #include <Grid/algorithms/iterative/BiCGSTAB.h>
 #include <Grid/algorithms/iterative/ConjugateGradient.h>
 using namespace std;
 using namespace Grid;
 // Hermitize a DWF operator by squaring it
 template<class Matrix,class Field>
 class SquaredLinearOperator : public LinearOperatorBase<Field> {
  public:
  Matrix &_Mat;
  public:
    SquaredLinearOperator(Matrix &Mat): _Mat(Mat) {};
    void OpDiag (const Field &in, Field &out) {    assert(0);  }
    void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
    void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
    void Op     (const Field &in, Field &out){
      // std::cout << "Op is overloaded as HermOp" << std::endl;
      HermOp(in, out);
    }
    void AdjOp     (const Field &in, Field &out){
      HermOp(in, out);
    }
    void _Op     (const Field &in, Field &out){
      // std::cout << "Op: M "<<std::endl;
      _Mat.M(in, out);
    }
    void _AdjOp     (const Field &in, Field &out){
      // std::cout << "AdjOp: Mdag "<<std::endl;
      _Mat.Mdag(in, out);
    }
    void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
    void HermOp(const Field &in, Field &out){
      // std::cout << "HermOp: Mdag M Mdag M"<<std::endl;
      Field tmp(in.Grid());
      _Op(in,tmp);
      _AdjOp(tmp,out);
    }
 };
 /**
 * Computes the coefficients in the Krylov expansion for 1/D ~ \sum_{i=0}^N c_i D^i. 
 * 
 * Parameters
 * ----------
 * std::vector<double> &coeffs
 *    Polynomial coeffients to return, with indexing order (c_0, c_1, c_2, ..., c_n). 
 * LinearOperatorBase<FineField> &DiracOp
 *    Dirac operator D. 
 * FineField src
 *    Source field b. 
 * FineField psiStar
 *    Output approximation for D^{-1} b coming from a Krylov method. 
 * int N
 *    Dimension of the polynomial approximation (Krylov space K_{N-1} = {b, Db, D^2 b, ..., D^{N-1} b}).
 */
 void poly_coeffs(std::vector<ComplexD> &coeffs, LinearOperatorBase<LatticeFermion> &DiracOp, LatticeFermion src,
                  LatticeFermion psiStar, GridCartesian* FGrid, int N, bool use_herm = false)
 {
  // stdBasis = {b, Db, D^2 b, ..., D^N b}, kryBasis = {k0, k1, ..., kN}
  std::vector<LatticeFermion> kryBasis;
  Eigen::VectorXcd psiStarCoeffs (N);
  // Normalize by 1 / ||src||; does not change the polynomial coefficients
  double srcNorm   = 1 / std::sqrt(norm2(src));
  kryBasis.push_back(srcNorm * src);                // normalized source
  psiStar          = srcNorm * psiStar;
  psiStarCoeffs(0) = innerProduct(kryBasis[0], psiStar);
  // orthonormalize canonical Krylov basis {b, Db, D^2 b, ..., D^{N-1} b} <--> {k_i} and compute components <k_i | psi*>
  LatticeFermion tmp (FGrid);
  for (int i = 0; i < N - 1; i++) {               // construct ONB for {b, Db, ..., D^{i+1} b}
    if (use_herm) {
      DiracOp.HermOp(kryBasis.back(), tmp);         // tmp \in span{(D^\dag D)^{i+1} b} \oplus span{(D^\dag D)^i b, ..., D^\dag D b, b}
    } else {
      DiracOp.Op(kryBasis.back(), tmp);             // tmp \in span{D^{i+1} b} \oplus span{D^i b, ..., Db, b}
    }
    for (int j = 0; j < i+1; j++) {               // orthogonalize tmp with previous basis vectors
      ComplexD coeff = innerProduct(kryBasis[j], tmp);      // <k_j | tmp>
      tmp -= coeff * kryBasis[j];                           // subtract off |k_j><k_j | tmp>; now tmp is perp to |k_j>
    }
    double tmpNorm = 1 / std::sqrt(norm2(tmp));
    kryBasis.push_back(
      tmpNorm * tmp
    );                                                      // normalize |k_i> and add to kryBasis
    psiStarCoeffs(i+1) = innerProduct(kryBasis[i+1], psiStar);  // compute < k_i | psi* >
  }
  // To verify the basis is ONB
  // for (int i = 0; i < N; i++) {
  //   for (int j = 0; j < N; j++) {
  //     std::cout << "<ki|kj> for (i,j) = (" << i << ", " << j << ") = "  << innerProduct(kryBasis[i], kryBasis[j]) << std::endl;
  //   }
  // }
  // Compute change of basis matrix
  LatticeFermion tmp2 (FGrid);
  Eigen::MatrixXcd M = Eigen::MatrixXcd::Zero(N, N);
  tmp = kryBasis[0];       // current Krylov vector; starts with tmp = src (normalized)
  for (int i = 0; i < N; i++) {
    for (int j = 0; j < i + 1; j++) {    // fill column with components of kryVec. Only need j <= i to get orthonormal components
      M(j, i) = innerProduct(kryBasis[j], tmp);
    }    
    if (use_herm) {     // tmp --> D^\dag D(tmp)
      DiracOp.HermOp(tmp, tmp2);
      tmp = tmp2;
    } else {      // tmp --> D(tmp). Note that DiracOp.Op(tmp, tmp) will cause a bug
      DiracOp.Op(tmp, tmp2);
      tmp = tmp2;
    }
  }
  // Compute M^{-1} @ psiStarCoeffs and copy to coeffs
  Eigen::VectorXcd res (N);
  res = M.inverse() * psiStarCoeffs;
  for (int i = 0; i < N; i++) {
    coeffs[i] = res(i);
  }
 }
 // out file for poly coefficients (should it be complex?)
 // class PolynomialFile: Serializable {
 // public:
 //   GRID_SERIALIZABLE_CLASS_MEMBERS(OutputFile, std::vector< Real >, data);
 // };
 std::complex<double> poly_approx(std::complex<double> x, std::vector<std::complex<double>> coeffs) {
  std::complex<double> px;
  for (int i = 0; i < coeffs.size(); i++) {
    px += coeffs[i] * std::pow(x, i);
  }
  return px;
 }
 /**
 * Returns the approximation psi = \sum_i c_i D^i b resulting from a Krylov solver.
 * 
 * Parameters
 * ----------
 * LatticeFermion &psi
 *    Approximation field, returned psi = \sum_i c_i D^i b.
 * LatticeFermion src
 *    Source b used to generate the Krylov space K_n(D, b).
 * LinearOperatorBase<LatticeFermion> &Linop
 *    Dirac operator used to generate the Krylov space K_n(D, b).
 * std::vector<std::complex<double>> coeffs
 *    Polynomial coefficients returned from the solver. 
 */
 void krylovApprox(LatticeFermion &psi, LatticeFermion src, LinearOperatorBase<LatticeFermion> &Linop, std::vector<ComplexD> coeffs) {
  psi = Zero();
  LatticeFermion tmp (psi.Grid());
  tmp = src;
  LatticeFermion tmp2 (psi.Grid());
  for (int i = 0; i < coeffs.size(); i++) {
      psi = psi + coeffs[i] * tmp;
      Linop.Op(tmp, tmp2);              // tmp = D*tmp
      tmp = tmp2;
  }
 }
 int main (int argc, char ** argv)
 {
  Grid_init(&argc, &argv);
  const int Ls = 8;
  std::vector<int> lat_size {16, 16, 16, 32};
  std::cout << "Lattice size: " << lat_size << std::endl;
  GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(lat_size, 
 								          GridDefaultSimd(Nd,vComplex::Nsimd()),
 								          GridDefaultMpi());
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
  //////////////////////////////////////////////////////////////////////
  // You can manage seeds however you like.
  // Recommend SeedUniqueString.
  //////////////////////////////////////////////////////////////////////
  // std::vector<int> seeds4({1, 2, 3, 4}); 
  // GridParallelRNG RNG4(UGrid);
  // RNG4.SeedFixedIntegers(seeds4);
  // std::vector<int> seeds5({1, 2, 3, 4, 5}); 
  // GridParallelRNG RNG5(FGrid);
  // RNG5.SeedFixedIntegers(seeds5);
  // std::string outStrStem = "/Users/patrickoare/Dropbox (MIT)/research/multigrid/grid_out/";
  LatticeGaugeField Umu(UGrid);
  FieldMetaData header;
  std::string file("/Users/patrickoare/libraries/PETSc-Grid/ckpoint_lat.4000");
  NerscIO::readConfiguration(Umu, header, file);
  RealD mass=0.01;
  RealD M5=1.8;
  // RealD M5=1.0;
  RealD b=1.5;// Scale factor b+c=2, b-c=1
  RealD c=0.5;
  // load in Dirac operators that we'll use; square it to Hermitize
  // Dsq just needs to be a Hermitian operator so we can use CG on it
  DomainWallFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
  SquaredLinearOperator<DomainWallFermionD, LatticeFermionD> Dsq (Ddwf);
  NonHermitianLinearOperator<DomainWallFermionD, LatticeFermionD> DLinOp (Ddwf);
  LatticeFermion src (FGrid); src = 1.0;                              // Source to use 
  LatticeFermion psiCG (FGrid); psiCG = Zero();                       // Field to solve with for CG
  LatticeFermion psiGCR (FGrid); psiGCR = Zero();                     // Field to solve with for GCR
  std::cout << GridLogMessage << "*******************************************" << std::endl;
  std::cout << GridLogMessage << "********** TESTING CG POLY COEFFS *********" << std::endl;
  std::cout << GridLogMessage << "*******************************************" << std::endl << std::endl;
  double tol = 1.0e-8;
  int N = 5;           // max iterations (size of Krylov basis)
  // GCR variables
  int outer_iters = 1;                  // num restarts for GCR
  TrivialPrecon<LatticeFermionD> prec;  // trivial preconditioner
  ConjugateGradientPolynomial<LatticeFermion> CGP(tol, N, false);
  CGP(Dsq, src, psiCG);
  // Compute Krylov coeffs directly and compare
  std::vector<ComplexD> cg_coeffs (N);
  poly_coeffs(cg_coeffs, Dsq, src, psiCG, FGrid, N, true);
  PolynomialFile PF;
  // Use GCR solver, also get poly coeffs
  std::vector<ComplexD> gcr_sym_coeffs (N);     // Can try N --> N + 3 to test to see if the last 3 comps are 0
  PGCRPolynomial<LatticeFermionD> GCRPolySym(tol, outer_iters, Dsq, prec, N+1, N, PF);    // mmax sets the memory, note the last beta doesn't really matter for updating the polynomial
  GCRPolySym(src, psiGCR);
  // poly_coeffs(gcr_sym_coeffs, Dsq, src, psi, FGrid, N, true);
  poly_coeffs(gcr_sym_coeffs, Dsq, src, psiGCR, FGrid, N, true);
  std::cout << GridLogMessage << std::endl << "******** CG POLYNOMIAL COEFFICIENTS *******" << std::endl;
  std::cout << GridLogMessage << CGP.polynomial << std::endl << std::endl;
  std::cout << GridLogMessage << "****** DIRECT POLYNOMIAL COEFFICIENTS *****" << std::endl;
  std::cout << GridLogMessage << cg_coeffs << std::endl << std::endl;
  // TODO: try GCR with a Hermitian operator (Dsq)
  std::cout << GridLogMessage << "****** GCR COEFFICIENTS *****" << std::endl;
  std::cout << GridLogMessage << GCRPolySym.polynomial << std::endl << std::endl;
  std::cout << GridLogMessage << "****** DIRECT GCR COEFFICIENTS *****" << std::endl;
  std::cout << GridLogMessage << gcr_sym_coeffs << std::endl << std::endl;
  // test how good the decomposition is
  std::cout << "Testing fidelity of decomposition by computing ||psi* - sum_i c_i D^i b||^2!" << std::endl;
  LatticeFermion psiPrime (FGrid);
  // for CG
  krylovApprox(psiPrime, src, Dsq, cg_coeffs);
  std::cout << "CG with Dsq, ||psi - psiPrime||^2 = " << norm2(psiCG - psiPrime) << std::endl;
  // for GCR with alpha / beta computation
  krylovApprox(psiPrime, src, Dsq, GCRPolySym.polynomial);
  std::cout << "GCR with Dsq, ||psi - psiPrime||^2 = " << norm2(psiGCR - psiPrime) << std::endl;
  // for GCR with alpha / beta computation
  krylovApprox(psiPrime, src, Dsq, gcr_sym_coeffs);
  std::cout << "GCR direct with Dsq, ||psi - psiPrime||^2 = " << norm2(psiGCR - psiPrime) << std::endl;
  // std::vector<double> real_cg_diff (N);
  // for (int i = 0; i < N; i++) { real_cg_diff[i] = std::abs(cg_coeffs[i].real() - CGP.polynomial[i]); }
  // std::cout << GridLogMessage << "************* COEFF DIFFERENCE ************" << std::endl;
  // std::cout << GridLogMessage << real_cg_diff << std::endl << std::endl;
  // GCR polynomial reconstruction with Ddwf!
  std::cout << GridLogMessage << "*******************************************" << std::endl;
  std::cout << GridLogMessage << "********* TESTING GCR POLY COEFFS *********" << std::endl;
  std::cout << GridLogMessage << "*******************************************" << std::endl << std::endl;
  // re-init variables
  src = 1.0;
  src = (1 / std::sqrt(norm2(src))) * src;
  psiGCR = Zero(); psiPrime = Zero();
  // test GCR poly
  PGCRPolynomial<LatticeFermionD> GCRPoly(tol, outer_iters, DLinOp, prec, N+1, N, PF);    // mmax sets the memory, note the last beta doesn't really matter for updating the polynomial
  GCRPoly(src, psiGCR);
  // Compute Krylov coeffs directly and compare
  // N = 1;    // compare the N > 1 decomposition with the psi* resulting from N = 1
  std::vector<ComplexD> gcr_coeffs (N);   // note N --> N + k should just give k coeffs that are 0; this works as intended
  poly_coeffs(gcr_coeffs, DLinOp, src, psiGCR, FGrid, N, false);
  std::cout << GridLogMessage << "******* GCR POLYNOMIAL COEFFICIENTS *******" << std::endl;
  std::cout << GridLogMessage << GCRPoly.polynomial << std::endl << std::endl;
  std::cout << GridLogMessage << "****** DIRECT POLYNOMIAL COEFFICIENTS *****" << std::endl;
  std::cout << GridLogMessage << gcr_coeffs << std::endl << std::endl;
  // test how good the decomposition is
  std::cout << "Testing fidelity of decomposition by computing ||psi* - sum_i c_i D^i b||^2!" << std::endl;
  // for GCR with alpha / beta computation
  krylovApprox(psiPrime, src, DLinOp, GCRPoly.polynomial);
  std::cout << "GCR with Dsq, ||psi - psiPrime||^2 = " << norm2(psiGCR - psiPrime) << std::endl;
  // for GCR with alpha / beta computation
  krylovApprox(psiPrime, src, DLinOp, gcr_coeffs);
  std::cout << "GCR direct with Dsq, ||psi - psiPrime||^2 = " << norm2(psiGCR - psiPrime) << std::endl;
  // TESTS TO DO THE N = 2 CASE DIRECTLY
  /*
  std::vector<std::complex<double>> alphas {
    std::complex(0.244300601, 0.00013007545), 
    std::complex(0.285370971, -0.000160704481)
  };
  std::complex<double> beta00 (-0.184661284, -6.52153945e-05);
  LatticeFermion psi2 (FGrid);
  LatticeFermion Dsrc (FGrid);
  DLinOp.Op(src, Dsrc);
  std::complex<double> c1 = alphas[0] + alphas[1] * (1. + beta00);
  std::complex<double> c2 = -alphas[0] * alphas[1];
  psi2 = c1 * src + c2 * Dsrc;
  std::cout << "||b|| = " << norm2(src) << std::endl;
  std::cout << "||Db|| = " << norm2(Dsrc) << std::endl;
  // fail; so far this is giving something different than what's being computed in krylovApprox (idk how?)
  std::cout << "c1 and c2 are: " << c1 << " and " << c2 << std::endl;
  std::cout << "GCRPoly polynomial coeffs are (should equal c1 and c2): " << GCRPoly.polynomial << std::endl;
  std::cout << "||GCRpsi - psi2||_2^2 = " << norm2(psiGCR - psi2) << std::endl;
  // pass
  LatticeFermion src2 (FGrid);
  src2 = 1.0;
  src2 = (1 / std::sqrt(norm2(src2))) * src2;
  std::cout << "||ones - src|| (to verify that src is the same throughout, should be 0) = " << norm2(src2 - src) << std::endl;
  // pass
  krylovApprox(psiPrime, src, DLinOp, GCRPoly.polynomial);
  std::cout << "GCR with Dsq, ||psi2 - psiPrime||^2 = " << norm2(psi2 - psiPrime) << std::endl;
  std::vector<ComplexD> psi2_coeffs (N);   // note N --> N + k should just give k coeffs that are 0; this works as intended
  poly_coeffs(psi2_coeffs, DLinOp, src, psi2, FGrid, N, false);
  krylovApprox(psiPrime, src, DLinOp, psi2_coeffs);
  std::cout << "GCR direct with Dsq, ||psi - psiPrime||^2 = " << norm2(psi2 - psiPrime) << std::endl;
  */
  // std::complex z (10.0, 0.0);     // z = 10
  // std::cout << GridLogMessage << "************* GCR POLY(z = 10) *************" << std::endl;
  // std::cout << GridLogMessage << poly_approx(z, GCRPoly.polynomial) << std::endl;
  // std::cout << GridLogMessage << "************ DIRECT POLY(z = 10) ***********" << std::endl;
  // std::cout << GridLogMessage << poly_approx(z, gcr_coeffs) << std::endl;
  // std::vector<std::complex<double>> gcr_diff (N);
  // for (int i = 0; i < N; i++) { gcr_diff[i] = gcr_coeffs[i] - GCRPoly.polynomial[i]; }
  // std::cout << GridLogMessage << "*********** GCR COEFF DIFFERENCE **********" << std::endl;
  // std::cout << GridLogMessage << gcr_diff << std::endl << std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage << "Done "<< std::endl;
  Grid_finalize();
  return 0;
 }
@@ -1,380 +0,0 @@
 /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/Test_padded_cell.cc
    Copyright (C) 2023
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 // copied here from Test_general_coarse_pvdagm.cc
 #include <cstdlib>
 #include <Grid/Grid.h>
 #include <Grid/lattice/PaddedCell.h>
 #include <Grid/stencil/GeneralLocalStencil.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
 #include <Grid/algorithms/iterative/BiCGSTAB.h>
 using namespace std;
 using namespace Grid;
 namespace Grid {
 struct LanczosParameters: Serializable {
  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
 		  		RealD, mass , 
 		  		RealD, mstep , 
 				Integer, Nstop,
                                Integer, Nk,
                                Integer, Np,
                                Integer, ReadEvec,
                                Integer, maxIter,
 	  			RealD, resid,
 	  			RealD, ChebyLow,
 	  			RealD, ChebyHigh,
 	  			Integer, ChebyOrder)
  LanczosParameters() {
    ////////////////////////////// Default values
      mass = 0;
    /////////////////////////////////
  }
  template <class ReaderClass >
  LanczosParameters(Reader<ReaderClass> & TheReader){
    initialize(TheReader);
  }
  template < class ReaderClass > 
  void initialize(Reader<ReaderClass> &TheReader){
 //    std::cout << GridLogMessage << "Reading HMC\n";
    read(TheReader, "HMC", *this);
  }
  void print_parameters() const {
 //    std::cout << GridLogMessage << "[HMC parameters] Trajectories            : " << Trajectories << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Start trajectory        : " << StartTrajectory << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs    : " << NoMetropolisUntil << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Starting type           : " << StartingType << "\n";
 //    MD.print_parameters();
  }
 };
 }
 template <class T> void writeFile(T& in, std::string const fname){
 #if 1
  // Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
  std::cout << Grid::GridLogMessage << "Writes to: " << fname << std::endl;
  Grid::emptyUserRecord record;
  Grid::ScidacWriter WR(in.Grid()->IsBoss());
  WR.open(fname);
  WR.writeScidacFieldRecord(in,record,0);
  WR.close();
 #endif
  // What is the appropriate way to throw error?
 }
 typedef WilsonFermionD WilsonOp;
 typedef typename WilsonFermionD::FermionField FermionField;
 template<class Matrix,class Field>
 class InvertNonHermitianLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  RealD _stp;
 public:
  InvertNonHermitianLinearOperator(Matrix &Mat,RealD stp=1e-8): _Mat(Mat),_stp(stp){};
  // Support for coarsening to a multigrid
  void OpDiag (const Field &in, Field &out) {
 //    _Mat.Mdiag(in,out);
 //    out = out + shift*in;
    assert(0);
  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {
 //    _Mat.Mdir(in,out,dir,disp);
    assert(0);
  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 //    _Mat.MdirAll(in,out);
    assert(0);
  };
  void Op     (const Field &in, Field &out){
    Field tmp(in.Grid());
 //    _Mat.M(in,out);
 //  RealD mass=-shift;
 //  WilsonCloverFermionD Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t);
 //  NonHermitianLinearOperator<Matrix,Field> HermOp(_Mat);
 //  BiCGSTAB<Field> CG(_stp,10000);
    _Mat.Mdag(in,tmp);
    MdagMLinearOperator<Matrix,Field> HermOp(_Mat);
    ConjugateGradient<Field> CG(_stp,10000);
    CG(HermOp,tmp,out);
 //    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
    _Mat.Mdag(in,out);
 //    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
    assert(0);
  }
  void HermOp(const Field &in, Field &out){
    assert(0);
  }
 };
 template<class Field>
 void testSchurFromHess(Arnoldi<Field>& Arn, Field& src, int Nlarge, int Nm, int Nk) {
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout << GridLogMessage << "Testing Schur reordering, Nm = " << Nm << ", Nk = " << Nk << std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout << GridLogMessage << "Running Arnoldi for 1 iteration to get a Hessenberg." << std::endl;
  Arn(src, 1, Nlarge, Nm, Nlarge);
  Eigen::MatrixXcd Hess = Arn.getHessenbergMat();
  std::cout << GridLogMessage << "Hessenberg for use: " << std::endl << Hess << std::endl;
  ComplexSchurDecomposition schur (Hess, true);
  bool isDecomposed = schur.checkDecomposition();
  std::cout << "Schur decomp holds? " << isDecomposed << std::endl;
  std::cout << GridLogMessage << "S = " << std::endl << schur.getMatrixS() << std::endl;
  std::cout << GridLogMessage << "Swapping S(3, 3) with S(4, 4)" << std::endl;
  schur.swapEvals(3);
  std::cout << GridLogMessage << "S after swap = " << std::endl << schur.getMatrixS() << std::endl;
  std::cout << "Schur decomp still holds? " << schur.checkDecomposition() << std::endl;
  // Now move last diagonal element all the way to the front.
  std::cout << GridLogMessage << "Moving last eval to front. S at start = " << std::endl << schur.getMatrixS() << std::endl;
  for (int i = 0; i < Nk - 1; i++) {
    int swapIdx = Nk - 2 - i;
    schur.swapEvals(swapIdx);
    std::cout << GridLogMessage << "S after swap of index " << swapIdx << " = " << std::endl << schur.getMatrixS() << std::endl;
    std::cout << "Schur decomp still holds? " << schur.checkDecomposition() << std::endl;
  }
  std::cout << GridLogMessage << "Testing Schur reorder" << std::endl;
  schur.schurReorder(Nk);
  std::cout << GridLogMessage << "S after reorder = " << std::endl << schur.getMatrixS() << std::endl;
  std::cout << "Schur decomp still holds? " << schur.checkDecomposition() << std::endl;
 }
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  const int Ls=16;
 //   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 //  std::vector<int> lat_size {32, 32, 32, 32};
 //  std::cout << "Lattice size: " << lat_size << std::endl;
  GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), 
 								          GridDefaultSimd(Nd,vComplex::Nsimd()),
 								          GridDefaultMpi());
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 //  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 //  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
  GridCartesian         * FGrid   = UGrid;
  GridRedBlackCartesian * FrbGrid = UrbGrid;
  // Construct a coarsened grid
  // poare TODO: replace this with the following line?
  Coordinate clatt = GridDefaultLatt();
 //   Coordinate clatt = GridDefaultLatt();              // [PO] initial line before I edited it
  for(int d=0;d<clatt.size();d++){
  std::cout << GridLogMessage<< clatt[d] <<std::endl;
    clatt[d] = clatt[d]/2;
    //    clatt[d] = clatt[d]/4;
  }
  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
  GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
  std::vector<int> seeds4({1,2,3,4});
  std::vector<int> seeds5({5,6,7,8});
  std::vector<int> cseeds({5,6,7,8});
  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
  LatticeFermion result(FGrid); result=Zero();
  LatticeFermion    ref(FGrid); ref=Zero();
  LatticeFermion    tmp(FGrid);
  LatticeFermion    err(FGrid);
  LatticeGaugeField Umu(UGrid);
  FieldMetaData header;
  std::string file("config");
 //  std::string file("Users/patrickoare/libraries/PETSc-Grid/ckpoint_lat.4000");
  NerscIO::readConfiguration(Umu,header,file);
  LanczosParameters LanParams;
  {
    XmlReader  HMCrd("LanParams.xml");
    read(HMCrd,"LanczosParameters",LanParams);
  }
  std::cout << GridLogMessage<< LanParams <<std::endl;
  {
    XmlWriter HMCwr("LanParams.xml.out");
    write(HMCwr,"LanczosParameters",LanParams);
  }
  RealD mass=0.01;
  RealD M5=1.8;
  // PowerMethod<LatticeFermion> PM; PM(PVdagM, src);
  int Nm = 50;
  int Nk = 12; 
  int Np = 38; 
  // int Nk = Nm+1;     // if just running once
  int maxIter = 10000;
  int Nstop = 10;
  RealD resid = 1.0e-5;
  std::vector<Complex> boundary = {1,1,1,-1};
  WilsonOp::ImplParams Params(boundary);
 //  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 //  DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
  mass=LanParams.mass;
  std::cout << GridLogIRL<< "mass "<<mass<<std::endl;
  WilsonOp WilsonOperator(Umu,*UGrid,*UrbGrid,mass,Params);
  // const int nbasis = 20;            // size of approximate basis for low-mode space
  const int nbasis = 3;            // size of approximate basis for low-mode space
  const int cb = 0 ;
  LatticeFermion prom(FGrid);
  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
  typedef LittleDiracOperator::CoarseVector CoarseVector;
  NextToNearestStencilGeometry5D geom(Coarse5d);
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
 //  typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
 //  typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
 //  typedef ShiftedComplexPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedComplexPVdagM_t;
 //  PVdagM_t PVdagM(Ddwf, Dpv);
 //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv);
 //  SquaredLinearOperator<DomainWallFermionD, LatticeFermionD> Dsq (Ddwf);
 //  NonHermitianLinearOperator<DomainWallFermionD, LatticeFermionD> DLinOp (Ddwf);
  NonHermitianLinearOperator<WilsonOp,FermionField> Dwilson(WilsonOperator); /// <-----
  InvertNonHermitianLinearOperator<WilsonOp,FermionField> Iwilson(WilsonOperator); /// <-----
  MdagMLinearOperator<WilsonOp,FermionField> HermOp(WilsonOperator); /// <-----
  Gamma5HermitianLinearOperator <WilsonOp,LatticeFermion> HermOp2(WilsonOperator); /// <----
  // PowerMethod<LatticeFermion> PM; PM(PVdagM, src);
  resid=LanParams.resid;
  Nstop=LanParams.Nstop;
  Nk=LanParams.Nk;
  Np=LanParams.Np;
  maxIter=LanParams.maxIter;
  Nm = Nk + Np;
  int Nu=16;
  std::vector<LatticeFermion> src(Nu,FGrid); 
  for(int i=0;i<Nu;i++) random(RNG5,src[i]);
  if(LanParams.ReadEvec) {
    std::string evecs_file="evec_in";
    std::cout << GridLogIRL<< "Reading evecs from "<<evecs_file<<std::endl;
    emptyUserRecord record;
    Grid::ScidacReader RD;
    RD.open(evecs_file);
    RD.readScidacFieldRecord(src[0],record);
    RD.close();
  }
  Coordinate origin ({0,0,0,0});
  auto tmpSrc = peekSite(src[0], origin);
  std::cout << "[DEBUG] Source at origin = " <<  tmpSrc << std::endl;
  LatticeFermion src2 = src[0];
  // Run KrylovSchur and Arnoldi on a Hermitian matrix
  std::cout << GridLogMessage << "Running Krylov Schur" << std::endl;
  // KrylovSchur KrySchur (Dsq, FGrid, 1e-8, EvalNormLarge);
 //  KrylovSchur KrySchur (Dsq, FGrid, 1e-8,EvalImNormSmall);
 //  KrySchur(src, maxIter, Nm, Nk, Nstop);
 //  KrylovSchur KrySchur (HermOp2, UGrid, resid,EvalNormSmall);
 //  Hacked, really EvalImagSmall
 #if 1
    RealD shift=1.5;
    KrylovSchur KrySchur (Dwilson, UGrid, resid,EvalImNormSmall);
    KrySchur(src[0], maxIter, Nm, Nk, Nstop,&shift);
 #else
    KrylovSchur KrySchur (Iwilson, UGrid, resid,EvalImNormSmall);
    KrySchur(src[0], maxIter, Nm, Nk, Nstop);
 #endif
  std::cout << GridLogMessage << "evec.size= " << KrySchur.evecs.size()<< std::endl;
  src[0]=KrySchur.evecs[0];
  for (int i=1;i<Nstop;i++) src[0]+=KrySchur.evecs[i];
  for (int i=0;i<Nstop;i++) 
  {
 	std::string evfile ("./evec_"+std::to_string(mass)+"_"+std::to_string(i));
        auto evdensity = localInnerProduct(KrySchur.evecs[i],KrySchur.evecs[i] );
        writeFile(evdensity,evfile);
  }
  {
        std::string evfile ("./evec_"+std::to_string(mass)+"_sum");
 //        auto evdensity = localInnerProduct(evec[i],evec[i] );
        writeFile(src[0],evfile);
  }
  /*
  std::cout << GridLogMessage << "Running Arnoldi" << std::endl;
  // Arnoldi Arn (Dsq, FGrid, 1e-8);
  Arnoldi Arn (DLinOp, FGrid, 1e-8);
  testSchurFromHess<LatticeFermion>(Arn, src, 10, 6, 4);
  Arnoldi Arn2 (DLinOp, FGrid, 1e-8);
  testSchurFromHess<LatticeFermion>(Arn2, src, 16, 12, 8);
  */
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage << "Done "<< std::endl;
  Grid_finalize();
  return 0;
 }
@@ -1,498 +0,0 @@
 /*************************************************************************************
    Runs the Krylov-Schur algorithm on a (pre-conditioned) domain-wall fermion operator 
    to determine part of its spectrum. 
    Usage : 
      $ ./Example_spec_kryschur <Nm> <Nk> <maxiter> <Nstop> <inFile> <outDir> <?rf>
      Nm = Maximum size of approximation subspace.
      Nk = Size of truncation subspace
      maxiter = Maximum number of iterations.
      Nstop   = Stop when Nstop eigenvalues have converged. 
      inFile  = Gauge configuration to read in.
      outDir  = Directory to write output to.
      rf      = (Optional) RitzFilter to sort with. Takes in any string in 
                  {EvalNormSmall, EvalNormLarge, EvalReSmall, EvalReLarge, EvalImSmall, EvalImLarge}
    Output:
      ${outDir}/evals.txt  = Contains all eigenvalues. Each line is formatted as `$idx $eval $ritz`, where:
                              - $idx is the index of the eigenvalue.
                              - $eval is the eigenvalue, formated as "(re,im)".
                              - $ritz is the Ritz estimate of the eigenvalue (deviation from being a true eigenvalue)
      ${outDir}/evec${idx} = Eigenvector $idx written out in SCIDAC format (if LIME is enabled).
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/Test_padded_cell.cc
    Copyright (C) 2023
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    Author: Patrick Oare <poare@bnl.edu>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 #include <cstdlib>
 #include <Grid/Grid.h>
 #include <Grid/lattice/PaddedCell.h>
 #include <Grid/stencil/GeneralLocalStencil.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
 #include <Grid/algorithms/iterative/BiCGSTAB.h>
 #include <Grid/parallelIO/IldgIOtypes.h>
 #include <Grid/parallelIO/IldgIO.h>
 using namespace std;
 using namespace Grid;
 namespace Grid {
 struct LanczosParameters: Serializable {
  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
 		  		RealD, mass ,
 		  		RealD, mstep ,
 				Integer, Nstop,
                                Integer, Nk,
                                Integer, Np,
                                Integer, ReadEvec,
 	  			RealD, resid,
 	  			RealD, ChebyLow,
 	  			RealD, ChebyHigh,
 	  			Integer, ChebyOrder)
  LanczosParameters() {
    /////////////////////////////////
  }
  template <class ReaderClass >
  LanczosParameters(Reader<ReaderClass> & TheReader){
    initialize(TheReader);
  }
  template < class ReaderClass >
  void initialize(Reader<ReaderClass> &TheReader){
    read(TheReader, "HMC", *this);
  }
  void print_parameters() const {
 //    std::cout << GridLogMessage << "[HMC parameters] Trajectories            : " << Trajectories << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Start trajectory        : " << StartTrajectory << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs    : " << NoMetropolisUntil << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Starting type           : " << StartingType << "\n";
 //    MD.print_parameters();
  }
 };
 }
 template <class T> void writeFile(T& in, std::string const fname){
 #ifdef HAVE_LIME
  // Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
  std::cout << Grid::GridLogMessage << "Writes to: " << fname << std::endl;
  Grid::emptyUserRecord record;
  Grid::ScidacWriter WR(in.Grid()->IsBoss());
  WR.open(fname);
  WR.writeScidacFieldRecord(in,record,0); // Lexico
  WR.close();
 #endif
 }
 /**
 * Writes the eigensystem of a Krylov Schur object to a directory.
 *
 * Parameters
 * ----------
 * std::string path
 *    Directory to write to.
 */
 template <class Field>
 void writeEigensystem(KrylovSchur<Field> KS, std::string outDir) {
  int Nk = KS.getNk();
  std::cout << GridLogMessage << "Writing output to directory: " << outDir << std::endl;
  // Write evals
  std::string evalPath = outDir + "/evals.txt";
  std::ofstream fEval;
  fEval.open(evalPath);
  Eigen::VectorXcd evals = KS.getEvals();
  std::vector<RealD> ritz  = KS.getRitzEstimates();
  for (int i = 0; i < Nk; i++) {
    // write eigenvalues and Ritz estimates
    fEval << i << " " << evals(i) << " " << ritz[i];
    if (i < Nk - 1) { fEval << "\n"; }
  }
  fEval.close();
  // Write evecs (TODO: very heavy on storage costs! Don't write them all out)
  // std::vector<Field> evecs = KS.getEvecs();
  // for (int i = 0; i < Nk; i++) {
  //   std::string fName = outDir + "/evec" + std::to_string(i);
  //   writeFile(evecs[i], fName);     // using method from Grid/HMC/ComputeWilsonFlow.cc
  // }
 }
 // Hermitize a DWF operator by squaring it
 template<class Matrix,class Field>
 class SquaredLinearOperator : public LinearOperatorBase<Field> {
  public:
  Matrix &_Mat;
  public:
    SquaredLinearOperator(Matrix &Mat): _Mat(Mat) {};
    void OpDiag (const Field &in, Field &out) {    assert(0);  }
    void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
    void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
    void Op     (const Field &in, Field &out){
      // std::cout << "Op is overloaded as HermOp" << std::endl;
      HermOp(in, out);
    }
    void AdjOp     (const Field &in, Field &out){
      HermOp(in, out);
    }
    void _Op     (const Field &in, Field &out){
      // std::cout << "Op: M "<<std::endl;
      _Mat.M(in, out);
    }
    void _AdjOp     (const Field &in, Field &out){
      // std::cout << "AdjOp: Mdag "<<std::endl;
      _Mat.Mdag(in, out);
    }
    void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
    void HermOp(const Field &in, Field &out){
      // std::cout << "HermOp: Mdag M Mdag M"<<std::endl;
      Field tmp(in.Grid());
      _Op(in,tmp);
      _AdjOp(tmp,out);
    }
 };
 template<class Matrix,class Field>
 class PVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
 public:
  PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(in,tmp);
    _Mat.Mdag(tmp,out);
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    //    _Mat.M(in,tmp);
    //    _PV.Mdag(tmp,out);
    //    _PV.M(out,tmp);
    //    _Mat.Mdag(tmp,out);
    Op(in,tmp);
    AdjOp(tmp,out);
    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
  }
 };
 template<class Matrix,class Field>
 class ShiftedPVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
  RealD shift;
 public:
  ShiftedPVdagMLinearOperator(RealD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(tmp,out);
    _Mat.Mdag(in,tmp);
    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    Op(in,tmp);
    AdjOp(tmp,out);
  }
 };
 template<class Matrix, class Field>
 class ShiftedComplexPVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
  ComplexD shift;
 public:
 ShiftedComplexPVdagMLinearOperator(ComplexD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(tmp,out);
    _Mat.Mdag(in,tmp);
    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    Op(in,tmp);
    AdjOp(tmp,out);
  }
  void resetShift(ComplexD newShift) {
    shift = newShift;
  }
 };
 #if 0
 template<class Fobj,class CComplex,int nbasis>
 class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
 public:
  using LinearFunction<Lattice<Fobj> >::operator();
  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
  typedef LinearOperatorBase<FineField>                            FineOperator;
  typedef LinearFunction    <FineField>                            FineSmoother;
  typedef LinearOperatorBase<CoarseVector>                         CoarseOperator;
  typedef LinearFunction    <CoarseVector>                         CoarseSolver;
  Aggregates     & _Aggregates;
  FineOperator   & _FineOperator;
  FineSmoother   & _PreSmoother;
  FineSmoother   & _PostSmoother;
  CoarseOperator & _CoarseOperator;
  CoarseSolver   & _CoarseSolve;
  int    level;  void Level(int lv) {level = lv; };
  MGPreconditioner(Aggregates &Agg,
 		   FineOperator &Fine,
 		   FineSmoother &PreSmoother,
 		   FineSmoother &PostSmoother,
 		   CoarseOperator &CoarseOperator_,
 		   CoarseSolver &CoarseSolve_)
    : _Aggregates(Agg),
      _FineOperator(Fine),
      _PreSmoother(PreSmoother),
      _PostSmoother(PostSmoother),
      _CoarseOperator(CoarseOperator_),
      _CoarseSolve(CoarseSolve_),
      level(1)  {  }
  virtual void operator()(const FineField &in, FineField & out) 
  {
    GridBase *CoarseGrid = _Aggregates.CoarseGrid;
    //    auto CoarseGrid = _CoarseOperator.Grid();
    CoarseVector Csrc(CoarseGrid);
    CoarseVector Csol(CoarseGrid);
    FineField vec1(in.Grid());
    FineField vec2(in.Grid());
    std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
    double t;
    // Fine Smoother
    //    out = in;
    out = Zero();
    t=-usecond();
    _PreSmoother(in,out);
    t+=usecond();
    std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
    // Update the residual
    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
    //    std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
    // Fine to Coarse 
    t=-usecond();
    _Aggregates.ProjectToSubspace  (Csrc,vec1);
    t+=usecond();
    std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
    // Coarse correction
    t=-usecond();
    Csol = Zero();
    _CoarseSolve(Csrc,Csol);
    //Csol=Zero();
    t+=usecond();
    std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
    // Coarse to Fine
    t=-usecond();  
    //    _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
    _Aggregates.PromoteFromSubspace(Csol,vec1); 
    add(out,out,vec1);
    t+=usecond();
    std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
    // Residual
    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
    //    std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
    // Fine Smoother
    t=-usecond();
    //    vec2=vec1;
    vec2=Zero();
    _PostSmoother(vec1,vec2);
    t+=usecond();
    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
    add( out,out,vec2);
    std::cout<<GridLogMessage << "Done " <<std::endl;
  }
 };
 #endif
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  // Usage : $ ./Example_spec_kryschur <Nm> <Nk> <maaxiter> <Nstop> <inFile> <outDir>
  std::string NmStr      = argv[1];
  std::string NkStr      = argv[2];
  std::string maxIterStr = argv[3];
  std::string NstopStr   = argv[4];
  std::string file       = argv[5];
  std::string outDir     = argv[6];
  RitzFilter RF;
  if (argc == 8) {
    std::string rf       = argv[7];
    RF = selectRitzFilter(rf);
  } else {
    RF = EvalReSmall;
  }
  std::cout << "Sorting eigenvalues using " << rfToString(RF) << std::endl;
  //const int Ls=16;
  const int Ls = 8;
 //   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
  //std::vector<int> lat_size {16, 16, 16, 32};
  std::vector<int> lat_size {8, 8, 8, 8};
  std::cout << "Lattice size: " << lat_size << std::endl;
  GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(lat_size, 
 								          GridDefaultSimd(Nd,vComplex::Nsimd()),
 								          GridDefaultMpi());
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
  std::vector<int> seeds4({1,2,3,4});
  std::vector<int> seeds5({5,6,7,8});
  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
  LatticeFermion    src(FGrid); random(RNG5,src);
  LatticeGaugeField Umu(UGrid);
  FieldMetaData header;
  NerscIO::readConfiguration(Umu,header,file);
  // RealD mass=0.01;
  RealD mass=0.001;
  RealD M5=1.8;
  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
  DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
  typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
  typedef ShiftedComplexPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedComplexPVdagM_t;
  PVdagM_t PVdagM(Ddwf, Dpv);
  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv);
  SquaredLinearOperator<DomainWallFermionD, LatticeFermionD> Dsq (Ddwf);
  NonHermitianLinearOperator<DomainWallFermionD, LatticeFermionD> DLinOp (Ddwf);
  int Nm = std::stoi(NmStr);
  int Nk = std::stoi(NkStr);
  int maxIter = std::stoi(maxIterStr);
  int Nstop = std::stoi(NstopStr);
  std::cout << GridLogMessage << "Runnning Krylov Schur. Nm = " << Nm << ", Nk = " << Nk << ", maxIter = " << maxIter 
                  << ", Nstop = " << Nstop << std::endl;
  KrylovSchur KrySchur (PVdagM, FGrid, 1e-8, RF);      // use preconditioned PV^\dag D_{dwf}
  // KrylovSchur KrySchur (DLinOp, FGrid, 1e-8, RF);         // use D_{dwf}
  KrySchur(src, maxIter, Nm, Nk, Nstop);
  std::cout<<GridLogMessage << "*******************************************" << std::endl;
  std::cout<<GridLogMessage << "***************** RESULTS *****************" << std::endl;
  std::cout<<GridLogMessage << "*******************************************" << std::endl;
  std::cout << GridLogMessage << "Krylov Schur eigenvalues: " << std::endl << KrySchur.getEvals() << std::endl;
  writeEigensystem(KrySchur, outDir);
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage << "Done "<< std::endl;
  Grid_finalize();
  return 0;
 }
@@ -1,383 +0,0 @@
 /*************************************************************************************
    Script for studying the Wilson eigenvectors resulting from the Krylov-Schur process. 
    Usage : 
      $ ./Example_spec_kryschur <Nm> <Nk> <maxiter> <Nstop> <inFile> <outDir> <?rf>
      Nm = Maximum size of approximation subspace.
      Nk = Size of truncation subspace
      maxiter = Maximum number of iterations.
      Nstop   = Stop when Nstop eigenvalues have converged. 
      inFile  = Gauge configuration to read in.
      outDir  = Directory to write output to.
      rf      = (Optional) RitzFilter to sort with. Takes in any string in 
                  {EvalNormSmall, EvalNormLarge, EvalReSmall, EvalReLarge, EvalImSmall, EvalImLarge}
    Output:
      ${outDir}/evals.txt  = Contains all eigenvalues. Each line is formatted as `$idx $eval $ritz`, where:
                              - $idx is the index of the eigenvalue.
                              - $eval is the eigenvalue, formated as "(re,im)".
                              - $ritz is the Ritz estimate of the eigenvalue (deviation from being a true eigenvalue)
      ${outDir}/evec${idx} = Eigenvector $idx written out in SCIDAC format (if LIME is enabled).
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/Test_padded_cell.cc
    Copyright (C) 2023
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    Author: Patrick Oare <poare@bnl.edu>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 #include <cstdlib>
 #include <Grid/Grid.h>
 #include <Grid/lattice/PaddedCell.h>
 #include <Grid/stencil/GeneralLocalStencil.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
 #include <Grid/algorithms/iterative/BiCGSTAB.h>
 #include <Grid/parallelIO/IldgIOtypes.h>
 #include <Grid/parallelIO/IldgIO.h>
 using namespace std;
 using namespace Grid;
 template <class T> void writeFile(T& in, std::string const fname){  
  #ifdef HAVE_LIME
    // Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
    std::cout << Grid::GridLogMessage << "Writes to: " << fname << std::endl;
    Grid::emptyUserRecord record;
    Grid::ScidacWriter WR(in.Grid()->IsBoss());
    WR.open(fname);
    WR.writeScidacFieldRecord(in,record,0); // Lexico
    WR.close();
  #endif
 }
 template <class T> void readFile(T& out, std::string const fname){  
  #ifdef HAVE_LIME
    // Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
    std::cout << Grid::GridLogMessage << "Reads at: " << fname << std::endl;
    Grid::emptyUserRecord record;
    // Grid::ScidacReader SR(out.Grid()->IsBoss());
    Grid::ScidacReader SR;
    SR.open(fname);
    SR.readScidacFieldRecord(out, record);
    SR.close();
  #endif
 }
 /**
 * Writes the eigensystem of a Krylov Schur object to a directory. 
 * 
 * Parameters
 * ----------
 * std::string path
 *    Directory to write to. 
 */
 template <class Field>
 void writeEigensystem(KrylovSchur<Field> KS, std::string outDir) {
  int Nk = KS.getNk();
  std::cout << GridLogMessage << "Writing output to directory: " << outDir << std::endl;
  // Write evals
  std::string evalPath = outDir + "/evals.txt";
  std::ofstream fEval;
  fEval.open(evalPath);
  Eigen::VectorXcd evals = KS.getEvals();
  std::vector<RealD> ritz  = KS.getRitzEstimates();
  for (int i = 0; i < Nk; i++) {
    // write eigenvalues and Ritz estimates
    fEval << i << " " << evals(i) << " " << ritz[i];
    if (i < Nk - 1) { fEval << "\n"; }
  }
  fEval.close();
  // Write evecs
  int Nevecs = Nk;          // don't write all of them
  std::vector<Field> evecs = KS.getEvecs();
  for (int i = 0; i < Nevecs; i++) {
    std::string fName = outDir + "/evec" + std::to_string(i);
    writeFile(evecs[i], fName);     // using method from Grid/HMC/ComputeWilsonFlow.cc
  }
 }
 // Hermitize a DWF operator by squaring it
 template<class Matrix,class Field>
 class SquaredLinearOperator : public LinearOperatorBase<Field> {
  public:
  Matrix &_Mat;
  public:
    SquaredLinearOperator(Matrix &Mat): _Mat(Mat) {};
    void OpDiag (const Field &in, Field &out) {    assert(0);  }
    void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
    void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
    void Op     (const Field &in, Field &out){
      // std::cout << "Op is overloaded as HermOp" << std::endl;
      HermOp(in, out);
    }
    void AdjOp     (const Field &in, Field &out){
      HermOp(in, out);
    }
    void _Op     (const Field &in, Field &out){
      // std::cout << "Op: M "<<std::endl;
      _Mat.M(in, out);
    }
    void _AdjOp     (const Field &in, Field &out){
      // std::cout << "AdjOp: Mdag "<<std::endl;
      _Mat.Mdag(in, out);
    }
    void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
    void HermOp(const Field &in, Field &out){
      // std::cout << "HermOp: Mdag M Mdag M"<<std::endl;
      Field tmp(in.Grid());
      _Op(in,tmp);
      _AdjOp(tmp,out);
    }
 };
 template<class Matrix,class Field>
 class PVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
 public:
  PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(in,tmp);
    _Mat.Mdag(tmp,out);
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    //    _Mat.M(in,tmp);
    //    _PV.Mdag(tmp,out);
    //    _PV.M(out,tmp);
    //    _Mat.Mdag(tmp,out);
    Op(in,tmp);
    AdjOp(tmp,out);
    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
  }
 };
 template<class Matrix,class Field>
 class ShiftedPVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
  RealD shift;
 public:
  ShiftedPVdagMLinearOperator(RealD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(tmp,out);
    _Mat.Mdag(in,tmp);
    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    Op(in,tmp);
    AdjOp(tmp,out);
  }
 };
 template<class Matrix, class Field>
 class ShiftedComplexPVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
  ComplexD shift;
 public:
 ShiftedComplexPVdagMLinearOperator(ComplexD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(tmp,out);
    _Mat.Mdag(in,tmp);
    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    Op(in,tmp);
    AdjOp(tmp,out);
  }
  void resetShift(ComplexD newShift) {
    shift = newShift;
  }
 };
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  // Usage : $ ./Example_wilson_evecs ${inFile}
  std::string file       = argv[1];
  const int Ls=16;
 //   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
  //std::vector<int> lat_size {16, 16, 16, 32};
  std::vector<int> lat_size {32, 32, 32, 32};
  std::cout << "Lattice size: " << lat_size << std::endl;
  GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(lat_size, 
 								          GridDefaultSimd(Nd,vComplex::Nsimd()),
 								          GridDefaultMpi());
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
  // GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
  // GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
  GridCartesian * FGrid = UGrid;
  GridRedBlackCartesian * FrbGrid = UrbGrid;
  std::vector<int> seeds4({1,2,3,4});
  GridParallelRNG RNG4(UGrid);
  RNG4.SeedFixedIntegers(seeds4);
  LatticeFermion    src(FGrid); random(RNG4, src);
  LatticeGaugeField Umu(UGrid);
  FieldMetaData header;
  NerscIO::readConfiguration(Umu, header, file);
  std::cout << GridLogMessage << "Loaded configuration" << std::endl;
  // RealD mass = 0.01;
  RealD M5 = 1.8;
  // Wilson mass
  RealD mass = -1.6;
  std::cout << GridLogMessage << "masses specified" << std::endl;
  std::vector<Complex> boundary = {1,1,1,-1};
  WilsonFermionD::ImplParams Params(boundary);
  // DomainWallFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
  // NonHermitianLinearOperator<DomainWallFermionD, LatticeFermionD> DLinOp (Ddwf);
  // WilsonFermionD Dwilson(Umu, *FGrid, *FrbGrid, mass);
  WilsonFermionD Dwilson(Umu, *UGrid, *UrbGrid, mass, Params);
  NonHermitianLinearOperator<WilsonFermionD, LatticeFermionD> DLinOp (Dwilson);
  std::cout << GridLogMessage << "Dirac operator defined" << std::endl;
  std::string eigenPath = "/home/poare/lqcd/multigrid/spectra/32cube-rho0.124-tau4/U_smr_3.000000/Nm72_Nk24_8111835.aurora-pbs-0001.hostmgmt.cm.aurora.alcf.anl.gov/";
  std::cout << GridLogMessage << "Loading eigenvalues" << std::endl;
  std::ifstream evalFile(eigenPath + "evals.txt");
  std::string str;
  std::vector<ComplexD> evals;
  while (std::getline(evalFile, str)) {
      std::cout << GridLogMessage << "Reading line: " << str << std::endl;
      int i1 = str.find("(") + 1;
      int i2 = str.find(",") + 1;
      int i3 = str.find(")");
      std::cout << "i1,i2,i3 = " << i1 << "," << i2 << "," << i3 << std::endl;
      std::string reStr = str.substr(i1, i2 - i1);
      std::string imStr = str.substr(i2, i3 - i2);
      std::cout << GridLogMessage << "Parsed re = " << reStr << " and im = " << imStr << std::endl;
      // ComplexD z (std::stof(reStr), std::stof(imStr));
      ComplexD z (std::stod(reStr), std::stod(imStr));
      evals.push_back(z);
  }
  std::cout << GridLogMessage << "Eigenvalues: " << evals << std::endl;
  int Nevecs = 24;
  std::vector<LatticeFermion> evecs;
  LatticeFermion evec (FGrid);
  for (int i = 0; i < Nevecs; i++) {
    std::string evecPath = eigenPath + "evec" + std::to_string(i);
    readFile(evec, evecPath);
    evecs.push_back(evec);
  }
  std::cout << GridLogMessage << "Evecs loaded" << std::endl;
  // Compute < evec | D - \lambda | evec >
  std::cout << GridLogMessage << "Testing eigenvectors" << std::endl;
  LatticeFermion Devec (FGrid);
  ComplexD ritz;
  for (int i = 0; i < Nevecs; i++) {
    Devec = Zero();
    DLinOp.Op(evecs[i], Devec);
    ritz = std::sqrt(norm2(Devec - evals[i] * evecs[i]));
    std::cout << GridLogMessage << "i = " << i << ", || (D - lambda) |vi> || = " << ritz << std::endl;
  }
  // Eigen::MatrixXcd Dw_evecs;
  // Dw_evecs = Eigen::MatrixXcd::Zero(Nevecs, Nevecs);
  // for (int i = 0; i < Nevecs; i++) {
  //   Linop.Op(evecs[i], Devec);
  //   for (int j = 0; j < Nevecs; j++) {
  //   }
  // }
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage << "Done "<< std::endl;
  Grid_finalize();
  return 0;
 }
@@ -1,374 +0,0 @@
 /*************************************************************************************
    Runs the Krylov-Schur algorithm on a Wilson fermion operator to determine part of its spectrum. 
    TODO rename this file: really is running the topology change jobs on Aurora. 
    Usage : 
      $ ./Example_spec_kryschur <Nm> <Nk> <maxiter> <Nstop> <inFile> <outDir> <?rf>
      Nm = Maximum size of approximation subspace.
      Nk = Size of truncation subspace
      maxiter = Maximum number of iterations.
      Nstop   = Stop when Nstop eigenvalues have converged. 
      inFile  = Gauge configuration to read in.
      outDir  = Directory to write output to.
      rf      = (Optional) RitzFilter to sort with. Takes in any string in 
                  {EvalNormSmall, EvalNormLarge, EvalReSmall, EvalReLarge, EvalImSmall, EvalImLarge}
    Output:
      ${outDir}/evals.txt  = Contains all eigenvalues. Each line is formatted as `$idx $eval $ritz`, where:
                              - $idx is the index of the eigenvalue.
                              - $eval is the eigenvalue, formated as "(re,im)".
                              - $ritz is the Ritz estimate of the eigenvalue (deviation from being a true eigenvalue)
      ${outDir}/evec${idx} = Eigenvector $idx written out in SCIDAC format (if LIME is enabled).
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/Test_padded_cell.cc
    Copyright (C) 2023
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    Author: Patrick Oare <poare@bnl.edu>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 #include <cstdlib>
 #include <Grid/Grid.h>
 #include <Grid/lattice/PaddedCell.h>
 #include <Grid/stencil/GeneralLocalStencil.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
 #include <Grid/algorithms/iterative/BiCGSTAB.h>
 #include <Grid/parallelIO/IldgIOtypes.h>
 #include <Grid/parallelIO/IldgIO.h>
 using namespace std;
 using namespace Grid;
 template <class T> void writeFile(T& in, std::string const fname){  
  #ifdef HAVE_LIME
    // Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
    std::cout << Grid::GridLogMessage << "Writes to: " << fname << std::endl;
    Grid::emptyUserRecord record;
    Grid::ScidacWriter WR(in.Grid()->IsBoss());
    WR.open(fname);
    WR.writeScidacFieldRecord(in,record,0); // Lexico
    WR.close();
  #endif
 }
 /**
 * Writes the eigensystem of a Krylov Schur object to a directory. 
 * 
 * Parameters
 * ----------
 * std::string path
 *    Directory to write to. 
 */
 template <class Field>
 void writeEigensystem(KrylovSchur<Field> KS, std::string outDir) {
  int Nk = KS.getNk();
  std::cout << GridLogMessage << "Writing output to directory: " << outDir << std::endl;
  // Write evals
  std::string evalPath = outDir + "/evals.txt";
  std::ofstream fEval;
  fEval.open(evalPath);
  Eigen::VectorXcd evals = KS.getEvals();
  std::vector<RealD> ritz  = KS.getRitzEstimates();
  for (int i = 0; i < Nk; i++) {
    // write eigenvalues and Ritz estimates
    fEval << i << " " << evals(i) << " " << ritz[i];
    if (i < Nk - 1) { fEval << "\n"; }
  }
  fEval.close();
  // Write evecs
  int Nevecs = Nk;          // don't write all of them
  std::vector<Field> evecs = KS.getEvecs();
  for (int i = 0; i < Nevecs; i++) {
    std::string fName = outDir + "/evec" + std::to_string(i);
    writeFile(evecs[i], fName);     // using method from Grid/HMC/ComputeWilsonFlow.cc
  }
 }
 // Hermitize a DWF operator by squaring it
 template<class Matrix,class Field>
 class SquaredLinearOperator : public LinearOperatorBase<Field> {
  public:
  Matrix &_Mat;
  public:
    SquaredLinearOperator(Matrix &Mat): _Mat(Mat) {};
    void OpDiag (const Field &in, Field &out) {    assert(0);  }
    void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
    void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
    void Op     (const Field &in, Field &out){
      // std::cout << "Op is overloaded as HermOp" << std::endl;
      HermOp(in, out);
    }
    void AdjOp     (const Field &in, Field &out){
      HermOp(in, out);
    }
    void _Op     (const Field &in, Field &out){
      // std::cout << "Op: M "<<std::endl;
      _Mat.M(in, out);
    }
    void _AdjOp     (const Field &in, Field &out){
      // std::cout << "AdjOp: Mdag "<<std::endl;
      _Mat.Mdag(in, out);
    }
    void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
    void HermOp(const Field &in, Field &out){
      // std::cout << "HermOp: Mdag M Mdag M"<<std::endl;
      Field tmp(in.Grid());
      _Op(in,tmp);
      _AdjOp(tmp,out);
    }
 };
 template<class Matrix,class Field>
 class PVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
 public:
  PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(in,tmp);
    _Mat.Mdag(tmp,out);
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    //    _Mat.M(in,tmp);
    //    _PV.Mdag(tmp,out);
    //    _PV.M(out,tmp);
    //    _Mat.Mdag(tmp,out);
    Op(in,tmp);
    AdjOp(tmp,out);
    //    std::cout << "HermOp done "<<norm2(out)<<std::endl;
  }
 };
 template<class Matrix,class Field>
 class ShiftedPVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
  RealD shift;
 public:
  ShiftedPVdagMLinearOperator(RealD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(tmp,out);
    _Mat.Mdag(in,tmp);
    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    Op(in,tmp);
    AdjOp(tmp,out);
  }
 };
 template<class Matrix, class Field>
 class ShiftedComplexPVdagMLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  Matrix &_PV;
  ComplexD shift;
 public:
 ShiftedComplexPVdagMLinearOperator(ComplexD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
  void OpDiag (const Field &in, Field &out) {    assert(0);  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(tmp,out);
    _Mat.Mdag(in,tmp);
    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    Op(in,tmp);
    AdjOp(tmp,out);
  }
  void resetShift(ComplexD newShift) {
    shift = newShift;
  }
 };
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  // Usage : $ ./Example_spec_kryschur <Nm> <Nk> <maaxiter> <Nstop> <inFile> <outDir>
  std::string NmStr      = argv[1];
  std::string NkStr      = argv[2];
  std::string maxIterStr = argv[3];
  std::string NstopStr   = argv[4];
  std::string file       = argv[5];
  std::string outDir     = argv[6];
  // RitzFilter RF;
  // if (argc == 8) {
  //   std::string rf       = argv[7];
  //   RF = selectRitzFilter(rf);
  // } else {
  //   RF = EvalReSmall;
  // }
  // RitzFilter RF;
  std::string rf       = argv[7];
  RitzFilter RF        = selectRitzFilter(rf);
  std::cout << "Sorting eigenvalues using " << rfToString(RF) << std::endl;
  const int Ls=16;
 //   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
  //std::vector<int> lat_size {16, 16, 16, 32};
  std::vector<int> lat_size {32, 32, 32, 32};
  std::cout << "Lattice size: " << lat_size << std::endl;
  GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(lat_size, 
 								          GridDefaultSimd(Nd,vComplex::Nsimd()),
 								          GridDefaultMpi());
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
  // GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
  // GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
  GridCartesian * FGrid = UGrid;
  GridRedBlackCartesian * FrbGrid = UrbGrid;
  std::vector<int> seeds4({1,2,3,4});
  GridParallelRNG RNG4(UGrid);
  RNG4.SeedFixedIntegers(seeds4);
  LatticeFermion    src(FGrid); random(RNG4, src);
  LatticeGaugeField Umu(UGrid);
  FieldMetaData header;
  NerscIO::readConfiguration(Umu, header, file);
  std::cout << GridLogMessage << "Loaded configuration" << std::endl;
  // RealD mass = 0.01;
  RealD M5 = 1.8;
  // Wilson mass
  RealD mass = -1.6;
  std::cout << GridLogMessage << "masses specified" << std::endl;
  std::vector<Complex> boundary = {1,1,1,-1};
  WilsonFermionD::ImplParams Params(boundary);
  // DomainWallFermionD Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
  // NonHermitianLinearOperator<DomainWallFermionD, LatticeFermionD> DLinOp (Ddwf);
  // WilsonFermionD Dwilson(Umu, *FGrid, *FrbGrid, mass);
  WilsonFermionD Dwilson(Umu, *UGrid, *UrbGrid, mass, Params);
  NonHermitianLinearOperator<WilsonFermionD, LatticeFermionD> DLinOp (Dwilson);
  std::cout << GridLogMessage << "Dirac operator defined" << std::endl;
  // Define PV^dag D (if we want)
  // DomainWallFermionD Dpv(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, 1.0, M5);
  // typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
  // PVdagM_t PVdagM(Ddwf, Dpv);
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  // SquaredLinearOperator<WilsonFermionD, LatticeFermionD> Dsq (DWilson);
  // NonHermitianLinearOperator<WilsonFermionD, LatticeFermionD> DLinOp (DWilson);
  int Nm = std::stoi(NmStr);
  int Nk = std::stoi(NkStr);
  int maxIter = std::stoi(maxIterStr);
  int Nstop = std::stoi(NstopStr);
  std::cout << GridLogMessage << "Runnning Krylov Schur. Nm = " << Nm << ", Nk = " << Nk << ", maxIter = " << maxIter 
                  << ", Nstop = " << Nstop << std::endl;
  // KrylovSchur KrySchur (PVdagM, FGrid, 1e-8, RF);         // use PV^\dag M
  KrylovSchur KrySchur (DLinOp, FGrid, 1e-8, RF);         // use Ddwf
  KrySchur(src, maxIter, Nm, Nk, Nstop);
  std::cout << GridLogMessage << "Checking eigensystem." << std::endl;
  KrySchur.checkRitzEstimate();
  std::cout<<GridLogMessage << "*******************************************" << std::endl;
  std::cout<<GridLogMessage << "***************** RESULTS *****************" << std::endl;
  std::cout<<GridLogMessage << "*******************************************" << std::endl;
  std::cout << GridLogMessage << "Krylov Schur eigenvalues: " << std::endl << KrySchur.getEvals() << std::endl;
  writeEigensystem(KrySchur, outDir);
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage << "Done "<< std::endl;
  Grid_finalize();
  return 0;
 }
@@ -1,18 +1,14 @@
 <?xml version="1.0"?>
 <grid>
  <LanczosParameters>
-    <mass>0</mass>
+    <mass>0.00107</mass>
    <mstep>-0.025</mstep>
    <M5>1.8</M5>
    <Ls>48</Ls>
-    <Nstop>5</Nstop>
+    <Nstop>10</Nstop>
-    <Nk>5</Nk>
+    <Nk>15</Nk>
-    <Np>5</Np>
+    <Np>85</Np>
-    <ReadEvec>0</ReadEvec>
+    <ChebyLow>0.003</ChebyLow>
-    <maxIter>10000</maxIter>
+    <ChebyHigh>60</ChebyHigh>
-    <resid>1e-10</resid>
+    <ChebyOrder>201</ChebyOrder>
    <ChebyLow>1</ChebyLow>
    <ChebyHigh>100</ChebyHigh>
    <ChebyOrder>51</ChebyOrder>
  </LanczosParameters>
 </grid>
@@ -32,13 +32,9 @@ directory
 using namespace std;
 using namespace Grid;
-#if 0
+//typedef WilsonFermionD FermionOp;
 typedef DomainWallFermionD FermionOp;
 typedef typename DomainWallFermionD::FermionField FermionField;
 #else
 typedef MobiusFermionD FermionOp;
 typedef typename MobiusFermionD::FermionField FermionField;
 #endif
 template <class T> void writeFile(T& in, std::string const fname){  
 #ifdef HAVE_LIME
@@ -183,14 +179,12 @@ int main(int argc, char** argv) {
  Np=LanParams.Np;
  int Nm = Nk + Np;
  int MaxIt = 100;
  RealD resid = 1.0e-4;
  int MaxIt = 10000;
  RealD resid = 1.0e-5;
  RealD mob_b=1.5;
 //while ( mass > - 5.0){
-//  FermionOp Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  FermionOp Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
  FermionOp Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.);
  MdagMLinearOperator<FermionOp,FermionField> HermOp(Ddwf); /// <-----
 //  Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
  Gamma5R5HermitianLinearOperator<FermionOp, LatticeFermion> G5R5Herm(Ddwf);
@@ -113,9 +113,6 @@ struct LanczosParameters: Serializable {
  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
 		  		RealD, mass , 
 				RealD, resid,
 				Integer, Nstop,
                                Integer, Nk,
                                Integer, Np,
 	  			RealD, ChebyLow,
 	  			RealD, ChebyHigh,
 	  			Integer, ChebyOrder)
@@ -207,6 +204,7 @@ int main(int argc, char** argv) {
  int Nstop = 5;
  int Nk = 10;
  int Np = 90;
  int Nm = Nk + Np;
  int MaxIt = 10000;
  RealD resid = 1.0e-5;
@@ -228,14 +226,10 @@ int main(int argc, char** argv) {
    XmlWriter HMCwr("LanParams.xml.out");
    write(HMCwr,"LanczosParameters",LanParams);
  }
-  Nstop=LanParams.Nstop;
+
  Nk=LanParams.Nk;
  Np=LanParams.Np;
  mass=LanParams.mass;
  resid=LanParams.resid;
  int Nm = Nk + Np;
 while ( mass > - 5.0){
  FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,2.+mass);
@@ -1,377 +0,0 @@
 /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./tests/lanczos/Test_wilson_bilanczos.cc
    Copyright (C) 2025
 Author: Chulwoo Jung <chulwoo@bnl.gov>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 #include <cstdlib>
 #include <Grid/Grid.h>
 #include <Grid/lattice/PaddedCell.h>
 #include <Grid/stencil/GeneralLocalStencil.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
 #include <Grid/algorithms/iterative/BiCGSTAB.h>
 using namespace std;
 using namespace Grid;
 namespace Grid {
 struct LanczosParameters: Serializable {
  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
 		  		RealD, mass , 
 		  		RealD, mstep , 
 				Integer, Nstop,
                                Integer, Nk,
                                Integer, Np,
                                Integer, ReadEvec,
                                Integer, maxIter,
 	  			RealD, resid,
 	  			RealD, ChebyLow,
 	  			RealD, ChebyHigh,
 	  			Integer, ChebyOrder)
  LanczosParameters() {
    ////////////////////////////// Default values
      mass = 0;
    /////////////////////////////////
  }
  template <class ReaderClass >
  LanczosParameters(Reader<ReaderClass> & TheReader){
    initialize(TheReader);
  }
  template < class ReaderClass > 
  void initialize(Reader<ReaderClass> &TheReader){
 //    std::cout << GridLogMessage << "Reading HMC\n";
    read(TheReader, "HMC", *this);
  }
  void print_parameters() const {
 //    std::cout << GridLogMessage << "[HMC parameters] Trajectories            : " << Trajectories << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Start trajectory        : " << StartTrajectory << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs    : " << NoMetropolisUntil << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Starting type           : " << StartingType << "\n";
 //    MD.print_parameters();
  }
 };
 }
 template <class T> void writeFile(T& in, std::string const fname){
 #if 1
  // Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
  std::cout << Grid::GridLogMessage << "Writes to: " << fname << std::endl;
  Grid::emptyUserRecord record;
  Grid::ScidacWriter WR(in.Grid()->IsBoss());
  WR.open(fname);
  WR.writeScidacFieldRecord(in,record,0);
  WR.close();
 #endif
  // What is the appropriate way to throw error?
 }
 typedef WilsonFermionD WilsonOp;
 typedef typename WilsonFermionD::FermionField FermionField;
 template<class Matrix,class Field>
 class InvertNonHermitianLinearOperator : public LinearOperatorBase<Field> {
  Matrix &_Mat;
  RealD _stp;
 public:
  InvertNonHermitianLinearOperator(Matrix &Mat,RealD stp=1e-8): _Mat(Mat),_stp(stp){};
  // Support for coarsening to a multigrid
  void OpDiag (const Field &in, Field &out) {
 //    _Mat.Mdiag(in,out);
 //    out = out + shift*in;
    assert(0);
  }
  void OpDir  (const Field &in, Field &out,int dir,int disp) {
 //    _Mat.Mdir(in,out,dir,disp);
    assert(0);
  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){
 //    _Mat.MdirAll(in,out);
    assert(0);
  };
  void Op     (const Field &in, Field &out){
    Field tmp(in.Grid());
    _Mat.Mdag(in,tmp);
    MdagMLinearOperator<Matrix,Field> HermOp(_Mat);
    ConjugateGradient<Field> CG(_stp,10000);
    CG(HermOp,tmp,out);
  }
  void AdjOp     (const Field &in, Field &out){
    _Mat.Mdag(in,out);
 //    out = out + shift * in;
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
    assert(0);
  }
  void HermOp(const Field &in, Field &out){
    assert(0);
  }
 };
 template<class Field>
 void testSchurFromHess(Arnoldi<Field>& Arn, Field& src, int Nlarge, int Nm, int Nk) {
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout << GridLogMessage << "Testing Schur reordering, Nm = " << Nm << ", Nk = " << Nk << std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout << GridLogMessage << "Running Arnoldi for 1 iteration to get a Hessenberg." << std::endl;
  Arn(src, 1, Nlarge, Nm, Nlarge);
  Eigen::MatrixXcd Hess = Arn.getHessenbergMat();
  std::cout << GridLogMessage << "Hessenberg for use: " << std::endl << Hess << std::endl;
  ComplexSchurDecomposition schur (Hess, true);
  bool isDecomposed = schur.checkDecomposition();
  std::cout << "Schur decomp holds? " << isDecomposed << std::endl;
  std::cout << GridLogMessage << "S = " << std::endl << schur.getMatrixS() << std::endl;
  std::cout << GridLogMessage << "Swapping S(3, 3) with S(4, 4)" << std::endl;
  schur.swapEvals(3);
  std::cout << GridLogMessage << "S after swap = " << std::endl << schur.getMatrixS() << std::endl;
  std::cout << "Schur decomp still holds? " << schur.checkDecomposition() << std::endl;
  // Now move last diagonal element all the way to the front.
  std::cout << GridLogMessage << "Moving last eval to front. S at start = " << std::endl << schur.getMatrixS() << std::endl;
  for (int i = 0; i < Nk - 1; i++) {
    int swapIdx = Nk - 2 - i;
    schur.swapEvals(swapIdx);
    std::cout << GridLogMessage << "S after swap of index " << swapIdx << " = " << std::endl << schur.getMatrixS() << std::endl;
    std::cout << "Schur decomp still holds? " << schur.checkDecomposition() << std::endl;
  }
  std::cout << GridLogMessage << "Testing Schur reorder" << std::endl;
  schur.schurReorder(Nk);
  std::cout << GridLogMessage << "S after reorder = " << std::endl << schur.getMatrixS() << std::endl;
  std::cout << "Schur decomp still holds? " << schur.checkDecomposition() << std::endl;
 }
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
  const int Ls=16;
 //   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 //  std::vector<int> lat_size {32, 32, 32, 32};
 //  std::cout << "Lattice size: " << lat_size << std::endl;
  GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), 
 								          GridDefaultSimd(Nd,vComplex::Nsimd()),
 								          GridDefaultMpi());
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 //  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 //  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
  GridCartesian         * FGrid   = UGrid;
  GridRedBlackCartesian * FrbGrid = UrbGrid;
  // Construct a coarsened grid
  // poare TODO: replace this with the following line?
  Coordinate clatt = GridDefaultLatt();
 //   Coordinate clatt = GridDefaultLatt();              // [PO] initial line before I edited it
  for(int d=0;d<clatt.size();d++){
  std::cout << GridLogMessage<< clatt[d] <<std::endl;
    clatt[d] = clatt[d]/2;
    //    clatt[d] = clatt[d]/4;
  }
  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
  GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
  std::vector<int> seeds4({1,2,3,4});
  std::vector<int> seeds5({5,6,7,8});
  std::vector<int> cseeds({5,6,7,8});
  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
  LatticeFermion result(FGrid); result=Zero();
  LatticeFermion    ref(FGrid); ref=Zero();
  LatticeFermion    tmp(FGrid);
  LatticeFermion    err(FGrid);
  LatticeGaugeField Umu(UGrid);
  FieldMetaData header;
  std::string file("config");
 //  std::string file("Users/patrickoare/libraries/PETSc-Grid/ckpoint_lat.4000");
  NerscIO::readConfiguration(Umu,header,file);
  LanczosParameters LanParams;
  {
    XmlReader  HMCrd("LanParams.xml");
    read(HMCrd,"LanczosParameters",LanParams);
  }
  std::cout << GridLogMessage<< LanParams <<std::endl;
  {
    XmlWriter HMCwr("LanParams.xml.out");
    write(HMCwr,"LanczosParameters",LanParams);
  }
  RealD mass=0.01;
  RealD M5=1.8;
  // PowerMethod<LatticeFermion> PM; PM(PVdagM, src);
  int Nm = 50;
  int Nk = 12; 
  int Np = 38; 
  // int Nk = Nm+1;     // if just running once
  int maxIter = 10000;
  int Nstop = 10;
  RealD resid = 1.0e-5;
  std::vector<Complex> boundary = {1,1,1,-1};
  WilsonOp::ImplParams Params(boundary);
 //  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 //  DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
  mass=LanParams.mass;
  std::cout << GridLogIRL<< "mass "<<mass<<std::endl;
  WilsonOp WilsonOperator(Umu,*UGrid,*UrbGrid,mass,Params);
  // const int nbasis = 20;            // size of approximate basis for low-mode space
  const int nbasis = 3;            // size of approximate basis for low-mode space
  const int cb = 0 ;
  LatticeFermion prom(FGrid);
  typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
  typedef LittleDiracOperator::CoarseVector CoarseVector;
  NextToNearestStencilGeometry5D geom(Coarse5d);
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
 //  typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
 //  typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
 //  typedef ShiftedComplexPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedComplexPVdagM_t;
 //  PVdagM_t PVdagM(Ddwf, Dpv);
 //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv);
 //  SquaredLinearOperator<DomainWallFermionD, LatticeFermionD> Dsq (Ddwf);
 //  NonHermitianLinearOperator<DomainWallFermionD, LatticeFermionD> DLinOp (Ddwf);
  NonHermitianLinearOperator<WilsonOp,FermionField> Dwilson(WilsonOperator); /// <-----
 //  InvertNonHermitianLinearOperator<WilsonOp,FermionField> Iwilson(WilsonOperator); /// <-----
  MdagMLinearOperator<WilsonOp,FermionField> HermOp(WilsonOperator); /// <-----
  Gamma5HermitianLinearOperator <WilsonOp,LatticeFermion> HermOp2(WilsonOperator); /// <----
  // PowerMethod<LatticeFermion> PM; PM(PVdagM, src);
  resid=LanParams.resid;
  Nstop=LanParams.Nstop;
  Nk=LanParams.Nk;
  Np=LanParams.Np;
  maxIter=LanParams.maxIter;
  Nm = Nk + Np;
  int Nu=16;
  std::vector<LatticeFermion> src(Nu,FGrid); 
  for(int i=0;i<Nu;i++) random(RNG5,src[i]);
  if(LanParams.ReadEvec) {
    std::string evecs_file="evec_in";
    std::cout << GridLogIRL<< "Reading evecs from "<<evecs_file<<std::endl;
    emptyUserRecord record;
    Grid::ScidacReader RD;
    RD.open(evecs_file);
    RD.readScidacFieldRecord(src[0],record);
    RD.close();
  }
  Coordinate origin ({0,0,0,0});
  auto tmpSrc = peekSite(src[0], origin);
  std::cout << "[DEBUG] Source at origin = " <<  tmpSrc << std::endl;
  LatticeFermion src2 = src[0];
  // Run KrylovSchur and Arnoldi on a Hermitian matrix
  std::cout << GridLogMessage << "Running Krylov Schur" << std::endl;
 #if 0
 #if 1
    RealD shift=1.5;
    KrylovSchur KrySchur (Dwilson, UGrid, resid,EvalImNormSmall);
    KrySchur(src[0], maxIter, Nm, Nk, Nstop,&shift);
 #else
    KrylovSchur KrySchur (Iwilson, UGrid, resid,EvalImNormSmall);
    KrySchur(src[0], maxIter, Nm, Nk, Nstop);
 #endif
    std::cout << GridLogMessage << "evec.size= " << KrySchur.evecs.size()<< std::endl;
 #else
  LanczosBidiagonalization<FermionField> LB(Dwilson, UGrid);
  LB.run(src[0], Nm, resid);
  RestartedLanczosBidiagonalization<FermionField> IRLBA(Dwilson, UGrid, Nstop, Nm, resid, maxIter,false);
  IRLBA.run(src[0]);
 #endif
 #if 0
  src[0]=KrySchur.evecs[0];
  for (int i=1;i<Nstop;i++) src[0]+=KrySchur.evecs[i];
  for (int i=0;i<Nstop;i++) 
  {
 	std::string evfile ("./evec_"+std::to_string(mass)+"_"+std::to_string(i));
        auto evdensity = localInnerProduct(KrySchur.evecs[i],KrySchur.evecs[i] );
        writeFile(evdensity,evfile);
  }
  {
        std::string evfile ("./evec_"+std::to_string(mass)+"_sum");
 //        auto evdensity = localInnerProduct(evec[i],evec[i] );
        writeFile(src[0],evfile);
  }
 #endif
  /*
  std::cout << GridLogMessage << "Running Arnoldi" << std::endl;
  // Arnoldi Arn (Dsq, FGrid, 1e-8);
  Arnoldi Arn (DLinOp, FGrid, 1e-8);
  testSchurFromHess<LatticeFermion>(Arn, src, 10, 6, 4);
  Arnoldi Arn2 (DLinOp, FGrid, 1e-8);
  testSchurFromHess<LatticeFermion>(Arn2, src, 16, 12, 8);
  */
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
  std::cout<<GridLogMessage<<std::endl;
  std::cout<<GridLogMessage << "Done "<< std::endl;
  Grid_finalize();
  return 0;
 }
@@ -6,7 +6,7 @@ Source file: ./tests/Test_dwf_lanczos.cc
 Copyright (C) 2015
-Author: Chulwoo Jung <chulwoo@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -27,9 +27,6 @@ directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 #include <Grid/parallelIO/IldgIOtypes.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h>
 using namespace std;
 using namespace Grid;
@@ -41,111 +38,18 @@ typedef typename WilsonFermionD::FermionField FermionField;
 RealD AllZero(RealD x) { return 0.; }
 template <class T> void writeFile(T& in, std::string const fname){
 #if 1
  // Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
  std::cout << Grid::GridLogMessage << "Writes to: " << fname << std::endl;
  Grid::emptyUserRecord record;
  Grid::ScidacWriter WR(in.Grid()->IsBoss());
  WR.open(fname);
  WR.writeScidacFieldRecord(in,record,0);
  WR.close();
 #endif
  // What is the appropriate way to throw error?
 }
 namespace Grid {
 struct LanczosParameters: Serializable {
  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
 		  		RealD, mass , 
 		  		RealD, mstep , 
 				Integer, Nstop,
                                Integer, Nk,
                                Integer, Np,
                                Integer, ReadEvec,
 	  			RealD, resid,
 	  			RealD, ChebyLow,
 	  			RealD, ChebyHigh,
 	  			Integer, ChebyOrder)
 //                                  Integer, StartTrajectory,
 //                                  Integer, Trajectories, /* @brief Number of sweeps in this run */
 //                                  bool, MetropolisTest,
 //                                  Integer, NoMetropolisUntil,
 //                                  std::string, StartingType,
 //                                  Integer, SW,
 //				  RealD, Kappa,
 //                                  IntegratorParameters, MD)
  LanczosParameters() {
    ////////////////////////////// Default values
      mass = 0;
 //    MetropolisTest    = true;
 //    NoMetropolisUntil = 10;
 //    StartTrajectory   = 0;
 //    SW                = 2;
 //    Trajectories      = 10;
 //    StartingType      = "HotStart";
    /////////////////////////////////
  }
  template <class ReaderClass >
  LanczosParameters(Reader<ReaderClass> & TheReader){
    initialize(TheReader);
  }
  template < class ReaderClass > 
  void initialize(Reader<ReaderClass> &TheReader){
 //    std::cout << GridLogMessage << "Reading HMC\n";
    read(TheReader, "HMC", *this);
  }
  void print_parameters() const {
 //    std::cout << GridLogMessage << "[HMC parameters] Trajectories            : " << Trajectories << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Start trajectory        : " << StartTrajectory << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs    : " << NoMetropolisUntil << "\n";
 //    std::cout << GridLogMessage << "[HMC parameters] Starting type           : " << StartingType << "\n";
 //    MD.print_parameters();
  }
 };
 }
 int main(int argc, char** argv) {
  Grid_init(&argc, &argv);
  int Ndir=4;
  auto mpi_layout  = GridDefaultMpi();
  std::vector<int> nblock(4,1);
  std::vector<int> mpi_split(4,1);
 //Interested in avoiding degeneracy only for now
  nblock[3]=2;
  int mrhs=1;
  for(int i =0;i<Ndir;i++){
      mpi_split[i] = mpi_layout[i] / nblock[i];
      mrhs *= nblock[i];
  }
  GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
      GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
      GridDefaultMpi());
-
+  GridRedBlackCartesian* UrbGrid =
-  GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
+      SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
                                                    GridDefaultSimd(Nd,vComplex::Nsimd()),
                                                    mpi_split,
                                                    *UGrid);
  GridRedBlackCartesian* UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
  GridCartesian* FGrid = UGrid;
  GridRedBlackCartesian* FrbGrid = UrbGrid;
-//  printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, FrbGrid);
+  printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid,
         FrbGrid);
  std::vector<int> seeds4({1, 2, 3, 4});
  std::vector<int> seeds5({5, 6, 7, 8});
@@ -157,16 +61,7 @@ int main(int argc, char** argv) {
  RNG5.SeedFixedIntegers(seeds5);
  LatticeGaugeField Umu(UGrid);
-//  SU<Nc>::HotConfiguration(RNG4, Umu);
+  SU<Nc>::HotConfiguration(RNG4, Umu);
 //  SU<Nc>::ColdConfiguration(Umu);
  FieldMetaData header;
  std::string file("./config");
 //  int precision32 = 0;
 //  int tworow      = 0;
 //  NerscIO::writeConfiguration(Umu,file,tworow,precision32);
  NerscIO::readConfiguration(Umu,header,file);
 /*
  std::vector<LatticeColourMatrix> U(4, UGrid);
@@ -175,100 +70,30 @@ int main(int argc, char** argv) {
  }
 */
-  int Nstop = 10;
+  RealD mass = -0.1;
-  int Nu = 1;
+  FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass);
-  int Nk = 20;
+  MdagMLinearOperator<FermionOp,LatticeFermion> HermOp(WilsonOperator); /// <-----
  int Np = 80;
  int Nm = Nk + Np;
  int MaxIt = 10000;
  RealD resid = 1.0e-5;
  RealD mass = -1.0;
  LanczosParameters LanParams;
 #if 1
  {
    XmlReader  HMCrd("LanParams.xml");
    read(HMCrd,"LanczosParameters",LanParams);
  }
 #else
  {
    LanParams.mass = mass;
  }
 #endif
  std::cout << GridLogMessage<< LanParams <<std::endl;
  { 
    XmlWriter HMCwr("LanParams.xml.out");
    write(HMCwr,"LanczosParameters",LanParams);
  }
  mass=LanParams.mass;
  resid=LanParams.resid;
  Nstop=LanParams.Nstop;
  Nu = mrhs;
  Nk=LanParams.Nk;
  Np=LanParams.Np;
  Nm = Nk + Np;
 //  FermionField src(FGrid);
  std::vector<FermionField> src(Nu,FGrid);
  for(int i =0;i<Nu;i++) gaussian(RNG5, src[i]);
  if(LanParams.ReadEvec) {
    std::string evecs_file="evec_in";
    std::cout << GridLogIRL<< "Reading evecs from "<<evecs_file<<std::endl;
    emptyUserRecord record;
    Grid::ScidacReader RD;
    RD.open(evecs_file);
    RD.readScidacFieldRecord(src[0],record);
    RD.close();
  }
  std::vector<Complex> boundary = {1,1,1,-1};
 //  std::vector<Complex> boundary = {1,1,1,1};
  FermionOp::ImplParams Params(boundary);
  GridCartesian         * SFGrid   = SGrid;
  GridRedBlackCartesian * SFrbGrid  = SpaceTimeGrid::makeFourDimRedBlackGrid(SFGrid);
 //  GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,SGrid);
  LatticeGaugeField s_Umu(SGrid);
  Grid_split  (Umu,s_Umu);
 while ( mass > - 2.0){
  FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass,Params);
  MdagMLinearOperator<FermionOp,FermionField> HermOp(WilsonOperator); /// <-----
  FermionOp WilsonSplit(s_Umu,*SFGrid,*SFrbGrid,mass,Params);
  MdagMLinearOperator<FermionOp,FermionField> SHermOp(WilsonSplit); /// <-----
  //SchurDiagTwoOperator<FermionOp,FermionField> HermOp(WilsonOperator);
-  Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
+
  const int Nstop = 20;
  const int Nk = 60;
  const int Np = 60;
  const int Nm = Nk + Np;
  const int MaxIt = 10000;
  RealD resid = 1.0e-6;
  std::vector<double> Coeffs{0, 1.};
  Polynomial<FermionField> PolyX(Coeffs);
-//  Chebyshev<FermionField> Cheby(0.5, 60., 31);
+  Chebyshev<FermionField> Cheby(0.0, 10., 12);
 //                                  RealD, ChebyLow,
 //                                RealD, ChebyHigh,
 //                                Integer, ChebyOrder)
  Chebyshev<FermionField> Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder);
  FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
     PlainHermOp<FermionField> Op     (HermOp);
     PlainHermOp<FermionField> Op2     (HermOp2);
-//  ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op2, Nstop, Nk, Nm, resid, MaxIt);
+  ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt);
-//  SimpleLanczos<FermionField> IRL(Op,Nstop, Nk, Nm, resid, MaxIt);
+
    ImplicitlyRestartedBlockLanczos<FermionField> IRBL(HermOp, SHermOp,
                                                     FrbGrid,SFrbGrid,mrhs,
                                                     Cheby,
                                                     Nstop, Nstop*2,
                                                     Nu, Nk, Nm,
                                                     resid, MaxIt,
                                                     IRBLdiagonaliseWithEigen);
  IRBL.split_test=1;
  std::vector<RealD> eval(Nm);
  FermionField src(FGrid);
  gaussian(RNG5, src);
  std::vector<FermionField> evec(Nm, FGrid);
  for (int i = 0; i < 1; i++) {
    std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid()
@@ -276,40 +101,9 @@ while ( mass > - 2.0){
  };
  int Nconv;
-//  IRL.calc(eval, evec, src, Nconv);
+  IRL.calc(eval, evec, src, Nconv);
  IRBL.calc(eval, evec, src, Nconv,LanczosType::irbl);
-  std::cout << mass <<" : " << eval << std::endl;
+  std::cout << eval << std::endl;
  Gamma g5(Gamma::Algebra::Gamma5) ;
  ComplexD dot;
  FermionField tmp(FGrid);
  FermionField sav(FGrid);
  sav=evec[0];
  for (int i = 0; i < Nstop ; i++) {
    tmp = g5*evec[i];
    dot = innerProduct(tmp,evec[i]);
    std::cout << mass << " : " << eval[i]  << " " << real(dot) << " " << imag(dot)  << std::endl ;
 //    if ( i<1)
    {
 	std::string evfile ("./evec_"+std::to_string(mass)+"_"+std::to_string(i));
        auto evdensity = localInnerProduct(evec[i],evec[i] );
 	writeFile(evdensity,evfile);
    }
    if (i>0) sav += evec[i];
  }
  {
 	std::string evfile ("./evec_"+std::to_string(mass)+"_sum");
 //        auto evdensity = localInnerProduct(evec[i],evec[i] );
 	writeFile(sav,evfile);
  }
  for(int i =0;i<Nu;i++) src[i]=evec[i];
  for(int i=Nu;i<Nstop;i++) src[i%Nu] +=evec[i];
 //  src  = evec[0]+evec[1]+evec[2];
 //  src  += evec[3]+evec[4]+evec[5];
 //  src  += evec[6]+evec[7]+evec[8];
  mass += LanParams.mstep;
 }
  Grid_finalize();
 }
@@ -27,7 +27,6 @@ directory
 *************************************************************************************/
 /*  END LEGAL */
 #include <Grid/Grid.h>
 #include <Grid/parallelIO/IldgIOtypes.h>
 using namespace std;
 using namespace Grid;
@@ -39,32 +38,11 @@ typedef typename WilsonFermionD::FermionField FermionField;
 RealD AllZero(RealD x) { return 0.; }
 template <class T> void writeFile(T& in, std::string const fname){
 #if 1
  // Ref: https://github.com/paboyle/Grid/blob/feature/scidac-wp1/tests/debug/Test_general_coarse_hdcg_phys48.cc#L111
  std::cout << Grid::GridLogMessage << "Writes to: " << fname << std::endl;
  Grid::emptyUserRecord record;
  Grid::ScidacWriter WR(in.Grid()->IsBoss());
  WR.open(fname);
  WR.writeScidacFieldRecord(in,record,0);
  WR.close();
 #endif
  // What is the appropriate way to throw error?
 }
 namespace Grid {
 struct LanczosParameters: Serializable {
  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
 		  		RealD, mass , 
 		  		RealD, mstep , 
 				Integer, Nstop,
                                Integer, Nk,
                                Integer, Np,
                                Integer, ReadEvec,
                                Integer, maxIter,
 	  			RealD, resid,
 	  			RealD, ChebyLow,
 	  			RealD, ChebyHigh,
 	  			Integer, ChebyOrder)
@@ -137,13 +115,12 @@ int main(int argc, char** argv) {
  LatticeGaugeField Umu(UGrid);
 //  SU<Nc>::HotConfiguration(RNG4, Umu);
 //  SU<Nc>::ColdConfiguration(Umu);
  FieldMetaData header;
  std::string file("./config");
-//  int precision32 = 0;
+  int precision32 = 0;
-//  int tworow      = 0;
+  int tworow      = 0;
 //  NerscIO::writeConfiguration(Umu,file,tworow,precision32);
  NerscIO::readConfiguration(Umu,header,file);
@@ -181,32 +158,10 @@ int main(int argc, char** argv) {
  }
  mass=LanParams.mass;
  resid=LanParams.resid;
  Nstop=LanParams.Nstop;
  Nk=LanParams.Nk;
  Np=LanParams.Np;
  MaxIt=LanParams.maxIter;
  Nm = Nk + Np;
  FermionField src(FGrid);
  gaussian(RNG5, src);
  if(LanParams.ReadEvec) {
    std::string evecs_file="evec_in";
    std::cout << GridLogIRL<< "Reading evecs from "<<evecs_file<<std::endl;
    emptyUserRecord record;
    Grid::ScidacReader RD;
    RD.open(evecs_file);
    RD.readScidacFieldRecord(src,record);
    RD.close();
  }
  std::vector<Complex> boundary = {1,1,1,-1};
 //  std::vector<Complex> boundary = {1,1,1,1};
  FermionOp::ImplParams Params(boundary);
-while ( mass > - 2.0){
+while ( mass > - 5.0){
-  FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass,Params);
+  FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass);
  MdagMLinearOperator<FermionOp,FermionField> HermOp(WilsonOperator); /// <-----
  //SchurDiagTwoOperator<FermionOp,FermionField> HermOp(WilsonOperator);
  Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
@@ -224,10 +179,11 @@ while ( mass > - 2.0){
     PlainHermOp<FermionField> Op     (HermOp);
     PlainHermOp<FermionField> Op2     (HermOp2);
-  ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt);
+  ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op2, Nstop, Nk, Nm, resid, MaxIt);
 //  SimpleLanczos<FermionField> IRL(Op,Nstop, Nk, Nm, resid, MaxIt);
  std::vector<RealD> eval(Nm);
  FermionField src(FGrid);
  gaussian(RNG5, src);
  std::vector<FermionField> evec(Nm, FGrid);
  for (int i = 0; i < 1; i++) {
    std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid()
@@ -236,46 +192,19 @@ while ( mass > - 2.0){
  int Nconv;
  IRL.calc(eval, evec, src, Nconv);
 //  IRL.calc(eval,  src, Nconv);
  std::cout << mass <<" : " << eval << std::endl;
  Gamma g5(Gamma::Algebra::Gamma5) ;
  ComplexD dot;
  FermionField tmp(FGrid);
  FermionField sav(FGrid);
  sav=evec[0];
  for (int i = 0; i < Nstop ; i++) {
    tmp = g5*evec[i];
    dot = innerProduct(tmp,evec[i]);
    std::cout << mass << " : " << eval[i]  << " " << real(dot) << " " << imag(dot)  << std::endl ;
 //    if ( i<1)
    {
 	std::string evfile ("./evec_"+std::to_string(mass)+"_"+std::to_string(i));
        auto evdensity = localInnerProduct(evec[i],evec[i] );
 	writeFile(evdensity,evfile);
 //  if(LanParams.ReadEvec) {
 //    std::string evecs_file="evec_in";
  {
    std::cout << GridLogIRL<< "Reading evecs from "<<evfile<<std::endl;
    emptyUserRecord record;
    Grid::ScidacReader RD;
    RD.open(evfile);
    RD.readScidacFieldRecord(evdensity,record);
    RD.close();
  }
    }
    if (i>0) sav += evec[i];
  }
  {
 	std::string evfile ("./evec_"+std::to_string(mass)+"_sum");
 //        auto evdensity = localInnerProduct(evec[i],evec[i] );
 	writeFile(sav,evfile);
  }
  src  = evec[0]+evec[1]+evec[2];
-  src  += evec[3]+evec[4]+evec[5];
+  mass += -0.1;
  src  += evec[6]+evec[7]+evec[8];
  mass += LanParams.mstep;
 }
  Grid_finalize();
@@ -0,0 +1,171 @@
 # CLAUDE.md
 This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 ## What This Is
 VTK-based visualisation and analysis tools for Grid lattice QCD eigenvector density and HMC force data. All programmes link against both Grid (for reading Scidac/ILDG lattice files) and VTK (for rendering).
 ## Build
 ```bash
 cd /Users/peterboyle/QCD/AmSC/Grid/visualisation/build
 cmake .. -DVTK_DIR=$HOME/QCD/vtk/VTK-9.4.2-install/lib/cmake/vtk-9.4
 make <target>    # e.g. make ControlledVisualise5D
 ```
 All executables are built as macOS bundles (`.app`) except `ForceAnalysis`, `FindPeak`, and `DumpField`.
 ## Programmes
 ### ControlledVisualise5D
 Interactive VTK renderer for 5D DWF eigenvector density (`LatticeComplexD`). Driven via named pipe `/tmp/visualise_cmd`.
 **Launch script**: `/Volumes/X9Pro/visualisation/Grid/visualisation/build/Hdwf_1_long/visualise_controlled.sh`
 **Wire protocol** (one command per line to `/tmp/visualise_cmd`):
 | Command | Effect |
 |---------|--------|
 | `file <N>` / `file +N` / `file -N` | Jump to file by index or relative |
 | `slice <dim> <N>` / `+N` / `-N` | Set or shift slice coordinate in dimension dim |
 | `spin <deg>` | Continuous azimuth rotation at deg/tick (100ms tick); `spin 0` stops |
 | `azimuth <deg>` | Single azimuth rotation step |
 | `elevation <deg>` | Single elevation rotation step |
 | `zoom <factor>` | Camera dolly (>1 = in) |
 | `iso <value>` | Isosurface threshold in RMS units |
 | `status` | Print current state |
 | `quit` | Exit |
 **Dimension indices for 5D DWF grid** (`--grid 48.32.32.32.32`):
 | dim | axis | size |
 |-----|------|------|
 | 0   | s (Ls) | 48 |
 | 1   | x    | 32   |
 | 2   | y    | 32   |
 | 3   | z    | 32   |
 | 4   | t    | 32   |
 **MD time mapping** for trajectory 702 (241 files, τ=3.3–4.0):
 - File index N → τ = 3.300000 + N × (1/480)
 - τ → file index = round((τ − 3.3) × 480)
 **Display axes**: `--xyz 0.3.4` shows s, z, t. The `--slice` argument sets initial values for all dims; dims not in `--xyz`, `--sum`, or `--loop` are the fixed slice dimensions (x=dim1, y=dim2 with `--xyz 0.3.4`).
 **Spin**: Implemented via `g_spinDeg` global applied on every 100ms poll timer tick inside `CommandHandler::Execute()`. Does not flood the pipe.
 ### FindPeak
 Reads a `LatticeComplexD` Scidac file, prints the top-N sites by real value to stderr.
 ```bash
 ./FindPeak --grid 48.32.32.32.32 --mpi 1.1.1.1.1 <file> 2>peaks.txt
 ```
 Key result: At τ=3.670833 the tunneling hotsite on the s=0 wall is (x=21, y=24, z=2, t=23).
 ### ForceAnalysis
 Reads 4D `LatticeComplexD` force snapshot files (Shuhei's snapshots at `/Volumes/X9Pro/visualisation/Shuhei/snapshots/`). Outputs TSV of RMS and hotsite value per file to stderr.
 ```bash
 ./ForceAnalysis --grid 32.32.32.32 --mpi 1.1.1 --hotsite 21.24.2.23 \
    <files...> 2>force.tsv 1>/dev/null
 ```
 Force components: `Gauge_lat`, `Gauge_smr`, `Jacobian_smr`, `Ferm0047_lat`, `Ferm0047_smr`.
 ### DumpField
 Reads a `LatticeComplexD` and dumps via Grid's `<<` operator to stdout for verification.
 ### TranscriptToVideo
 Renders a conversation transcript to an MP4 video (1280×720, 10 fps) with a typewriter animation effect, scrolling history, and optional captions. Does **not** link against Grid — pure VTK only.
 #### Transcript format
 ```
 [USER] First question text, possibly
 continuing on the next line.
 A blank line within a turn creates a paragraph break (visual spacer).
 [ASSISTANT] Response text.
 Multiple continuation lines are preserved
 as separate display lines, not merged.
 [CAPTION] Caption text shown at bottom of screen in white italic.
 [CAPTION]    (whitespace-only body clears the caption)
 [USER] Next question...
 ```
 - Lines beginning `[USER]`, `[ASSISTANT]`, `[CAPTION]` start a new turn.
 - Continuation lines (no `[TAG]` prefix) are joined with `\n` — each becomes its own wrapped display line.
 - Blank lines within a turn become paragraph-break spacers.
 - Markdown emphasis markers (`**`, `*`, `` ` ``) are stripped automatically.
 - UTF-8 smart quotes, em-dashes, ellipses, arrows are transliterated to ASCII.
 #### Usage
 ```bash
 cd /Users/peterboyle/QCD/AmSC/Grid/visualisation/build
 # Set runtime library paths first (see Runtime Environment below)
 ./TranscriptToVideo <transcript_file> <output.mp4>
 ```
 Transcript files live in `/Users/peterboyle/QCD/AmSC/Grid/visualisation/` (e.g. `transcript`, `transcript2`, `transcript3`).
 #### Visual layout
 | Element | Detail |
 |---------|--------|
 | Background | Near-black navy `(0.04, 0.04, 0.10)` |
 | `[USER]` text | Gold `(1.00, 0.84, 0.00)` |
 | `[ASSISTANT]` text | Steel blue `(0.68, 0.85, 0.90)` |
 | History | Up to 18 lines; brightness fades linearly from 0.85 (newest) to 0.20 (oldest) |
 | Caption | Arial italic 20pt white with shadow, centred at bottom |
 | Progress bar | Blue, top of frame |
 | Typewriter speed | 50 chars/sec (5 chars/frame at 10 fps) |
 | Pause between lines | 3 frames (0.3 s) |
 | Word-wrap column | 60 chars (body only, after prefix) |
 #### Key implementation notes
 - **Persistent render context**: a single `vtkRenderWindow` is created once and reused for all frames. Creating a new window per frame exhausts the macOS Metal GPU command buffer after ~33 frames (`MTLCommandBufferErrorDomain Code=8`).
 - **`SanitiseASCII()`**: replaces multi-byte UTF-8 sequences before passing to VTK's font renderer (which crashes on non-ASCII input).
 - Output format is MP4 via `vtkFFMPEGWriter`. `SetOffScreenRendering(1)` is required for headless rendering.
 ## Runtime Environment
 All executables in `build/` require Spack-installed HDF5/FFTW/GMP/MPFR on the dynamic linker path:
 ```bash
 SPACK=/Users/peterboyle/QCD/Spack/spack/opt/spack/darwin-m1
 export DYLD_LIBRARY_PATH=\
 $SPACK/hdf5-1.14.6-2265ms4kymgw6hcnwi6vqehslyfv74t4/lib:\
 $SPACK/fftw-3.3.10-aznn6h3nac5cycidlhrhgjxvntpcbg57/lib:\
 $SPACK/gmp-6.3.0-cwiz4n7ww33fnb3aban2iup4orcr6c7i/lib:\
 $SPACK/mpfr-4.2.1-exgbz4qshmet6tmmuttdewdlunfvtrlb/lib:\
 $DYLD_LIBRARY_PATH
 ```
 (These paths are also set by the ControlledVisualise5D launch script.)
 ## Key Physics Context
 See `/Volumes/X9Pro/visualisation/analysis_notes_20260407.md` for full analysis. Summary:
 - Near-zero mode of H_DWF localises on the two walls (s=0 and s=47) of the 5D domain wall geometry
 - Topology change transfers norm between walls, mediated by a near-zero mode of H_w (Hermitian Wilson at m=−1.8)
 - Tunneling hotsite on s=0 wall: (x=21, y=24, z=2, t=23); s=47 wall: (x=4, y=8, z=0, t=20)
 - Light fermion pseudofermion force (Ferm0047_smr) peaks at ~20× RMS at the hotsite during tunneling — this is the restoring force that causes topological bounces
 ## Grid/VTK interaction notes
 - Grid log messages go to stdout; all data output in analysis programmes uses stderr to avoid interleaving
 - `TensorRemove()` is required when extracting a scalar from `peekSite()` result: `real(TensorRemove(peekSite(field, site)))`
 - For runtime-determined grid dimensionality use `GridDefaultSimd(latt_size.size(), vComplex::Nsimd())`
 - DYLD_LIBRARY_PATH must include Spack HDF5/FFTW/GMP/MPFR paths (see launch script)
@@ -0,0 +1,891 @@
 // ControlledVisualise5D.cxx
 // Derived from Visualise5D.cxx by Peter Boyle
 //
 // A minimal-protocol rendering engine for 5D DWF eigenvector-density data.
 // Intended to be driven by an external intelligent controller (e.g. Claude)
 // that handles all natural-language interpretation and state tracking.
 //
 // Commands are sent one per line to the named pipe /tmp/visualise_cmd.
 // State is reported to stdout after every command.
 //
 // Wire protocol (all fields whitespace-separated):
 //
 //   slice <dim> <N>       set Slice[dim] = N  (0-based, wraps to lattice size)
 //   slice <dim> +<N>      increment Slice[dim] by N
 //   slice <dim> -<N>      decrement Slice[dim] by N
 //   zoom <factor>         camera Dolly by factor  (>1 = in, <1 = out)
 //   iso <value>           set isosurface threshold to <value> x RMS
 //   file <index>          jump to file by absolute index
 //   file +<N>             advance N files
 //   file -<N>             go back N files
 //   render                force a render with current state
 //   status                print current state to stdout
 //   quit                  exit cleanly
 //
 // Dimension indices for 5D DWF grid (e.g. --grid 48.32.32.32.32):
 //   s=0 (Ls)  x=1  y=2  z=3  t=4
 // For a 4D grid (--grid 32.32.32.32):
 //   x=0  y=1  z=2  t=3
 #include <vtkActor.h>
 #include <vtkCamera.h>
 #include <vtkNamedColors.h>
 #include <vtkNew.h>
 #include <vtkOutlineFilter.h>
 #include <vtkPolyDataMapper.h>
 #include <vtkProperty.h>
 #include <vtkRenderWindow.h>
 #include <vtkRenderWindowInteractor.h>
 #include <vtkRenderer.h>
 #include <vtkStripper.h>
 #include <vtkImageData.h>
 #include <vtkCallbackCommand.h>
 #include <vtkTextActor.h>
 #include <vtkTextProperty.h>
 #include <vtkProperty2D.h>
 #include <vtkWindowToImageFilter.h>
 #define MPEG
 #ifdef MPEG
 #include <vtkFFMPEGWriter.h>
 #endif
 #include <array>
 #include <string>
 #include <vector>
 #include <queue>
 #include <mutex>
 #include <thread>
 #include <atomic>
 #include <sstream>
 #include <iostream>
 #include <fstream>
 #include <cmath>
 #include <cstdlib>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <Grid/Grid.h>
 #define USE_FLYING_EDGES
 #ifdef USE_FLYING_EDGES
 #include <vtkFlyingEdges3D.h>
 typedef vtkFlyingEdges3D isosurface;
 #else
 #include <vtkMarchingCubes.h>
 typedef vtkMarchingCubes isosurface;
 #endif
 #define CMD_PIPE "/tmp/visualise_cmd"
 static int g_mpeg      = 0;
 static int g_framerate = 10;
 // ─── Thread-safe command queue ────────────────────────────────────────────────
 static std::queue<std::string> g_cmdQueue;
 static std::mutex              g_cmdMutex;
 static std::atomic<bool>       g_running{true};
 static double                  g_spinDeg = 0.0;   // degrees per poll tick; 0 = stopped
 // ─── MPEG recording state ─────────────────────────────────────────────────────
 static bool                        g_recording   = false;
 static vtkFFMPEGWriter*            g_mpegWriter  = nullptr;
 static vtkWindowToImageFilter*     g_imageFilter = nullptr;
 static std::string                 g_recordingFile;   // AVI filename for mux step
 // ─── Audio state (PCM audio track, synced to video frames) ───────────────────
 static const int    AUDIO_RATE        = 44100;
 static const double BEEP_FREQ         = 800.0;
 static const int    BEEP_SAMPLES      = AUDIO_RATE * 4 / 100;  // 40ms beep
 static std::vector<int16_t> g_audioBuffer;
 static int          g_beepRemaining   = 0;
 static double       g_beepPhase       = 0.0;
 static int          g_samplesPerFrame = AUDIO_RATE / 10; // updated at record start
 // Write one video frame worth of audio samples (beep or silence) to the buffer.
 static void GenerateAudioFrame()
 {
    for (int i = 0; i < g_samplesPerFrame; i++) {
        int16_t s = 0;
        if (g_beepRemaining > 0) {
            int pos = BEEP_SAMPLES - g_beepRemaining;
            double env = 1.0;
            int fade = AUDIO_RATE / 100;  // 10ms fade
            if (pos   < fade) env = (double)pos   / fade;
            if (g_beepRemaining < fade) env = (double)g_beepRemaining / fade;
            s = (int16_t)(16000.0 * env * std::sin(2.0 * M_PI * BEEP_FREQ * g_beepPhase / AUDIO_RATE));
            g_beepPhase += 1.0;
            --g_beepRemaining;
        } else {
            g_beepPhase = 0.0;
        }
        g_audioBuffer.push_back(s);
    }
 }
 static void TriggerBeep() { g_beepRemaining = BEEP_SAMPLES; }
 // Simple mono 16-bit PCM WAV writer.
 static void WriteWAV(const std::string& path, const std::vector<int16_t>& buf, int rate)
 {
    std::ofstream f(path, std::ios::binary);
    int dataBytes  = (int)(buf.size() * 2);
    int chunkSize  = 36 + dataBytes;
    int byteRate   = rate * 2;
    f.write("RIFF", 4); f.write((char*)&chunkSize, 4);
    f.write("WAVE", 4);
    f.write("fmt ", 4);
    int fmtSz = 16;       f.write((char*)&fmtSz,  4);
    int16_t pcm = 1;      f.write((char*)&pcm,    2);
    int16_t ch  = 1;      f.write((char*)&ch,     2);
    f.write((char*)&rate,     4);
    f.write((char*)&byteRate, 4);
    int16_t blk = 2;      f.write((char*)&blk, 2);
    int16_t bps = 16;     f.write((char*)&bps, 2);
    f.write("data", 4);   f.write((char*)&dataBytes, 4);
    f.write((char*)buf.data(), dataBytes);
 }
 // Play a short audible beep on the local machine (non-blocking).
 static void PlayBeepAudible()
 {
    system("afplay /System/Library/Sounds/Tink.aiff -v 0.4 &");
 }
 // ─── Grid I/O ─────────────────────────────────────────────────────────────────
 template <class T>
 void readFile(T& out, const std::string& fname)
 {
    Grid::emptyUserRecord record;
    Grid::ScidacReader RD;
    RD.open(fname);
    RD.readScidacFieldRecord(out, record);
    RD.close();
 }
 using namespace Grid;
 // ─── Command reader thread ────────────────────────────────────────────────────
 void CommandReaderThread()
 {
    mkfifo(CMD_PIPE, 0666);
    std::cout << "[cmd] Listening on " << CMD_PIPE << std::endl;
    while (g_running) {
        int fd = open(CMD_PIPE, O_RDONLY | O_NONBLOCK);
        if (fd < 0) { usleep(200000); continue; }
        int flags = fcntl(fd, F_GETFL);
        fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
        char buf[4096];
        std::string partial;
        ssize_t n;
        while (g_running && (n = read(fd, buf, sizeof(buf) - 1)) > 0) {
            buf[n] = '\0';
            partial += buf;
            size_t pos;
            while ((pos = partial.find('\n')) != std::string::npos) {
                std::string line = partial.substr(0, pos);
                if (!line.empty() && line.back() == '\r') line.pop_back();
                if (!line.empty()) {
                    std::lock_guard<std::mutex> lk(g_cmdMutex);
                    g_cmdQueue.push(line);
                }
                partial = partial.substr(pos + 1);
            }
        }
        close(fd);
    }
 }
 // ─── FrameUpdater ─────────────────────────────────────────────────────────────
 class FrameUpdater : public vtkCallbackCommand
 {
 public:
    FrameUpdater() : ffile(0), TimerCount(0), old_file(-1), timerId(-2), maxCount(-1) {}
    static FrameUpdater* New() { return new FrameUpdater; }
    int ffile;
    int old_file;
    int timerId;
    int maxCount;
    Coordinate latt;
    Coordinate xyz_dims, xyz_ranges, g_xyz_ranges;
    uint64_t   xyz_vol;
    Coordinate loop_dims, loop_ranges;
    uint64_t   loop_vol;
    Coordinate sum_dims, sum_ranges;
    uint64_t   sum_vol;
    Coordinate slice_dims;
    Coordinate Slice;
    std::vector<std::string> files;
    int        Nd;
    GridBase*  grid;
    Grid::LatticeComplexD* grid_data;
    double rms;
    vtkImageData* imageData    = nullptr;
    vtkTextActor* text         = nullptr;
    isosurface*   posExtractor = nullptr;
    isosurface*   negExtractor = nullptr;
    void SetGrid(GridBase* _grid)
    {
        grid      = _grid;
        Nd        = grid->Nd();
        latt      = grid->GlobalDimensions();
        grid_data = new Grid::LatticeComplexD(grid);
    }
    void SetFiles(std::vector<std::string> list)  { files = list; old_file = -1; }
    void SetSlice(Coordinate _Slice)              { Slice = _Slice; }
    void SetSumDimensions(Coordinate _SumDims)
    {
        sum_dims = _SumDims; sum_ranges = Coordinate(Nd); sum_vol = 1;
        for (int d = 0; d < Nd; d++) { sum_ranges[d] = sum_dims[d] ? latt[d] : 1; sum_vol *= sum_ranges[d]; }
    }
    void SetLoopDimensions(Coordinate _LoopDims)
    {
        loop_dims = _LoopDims; loop_ranges = Coordinate(Nd); loop_vol = 1;
        for (int d = 0; d < Nd; d++) { loop_ranges[d] = loop_dims[d] ? latt[d] : 1; loop_vol *= loop_ranges[d]; }
    }
    void SetDisplayDimensions(Coordinate _xyz_dims)
    {
        xyz_dims = _xyz_dims; g_xyz_ranges = Coordinate(Nd); xyz_ranges = Coordinate(3); xyz_vol = 1;
        for (int d = 0; d < 3; d++) { xyz_ranges[d] = latt[xyz_dims[d]]; xyz_vol *= xyz_ranges[d]; }
        for (int d = 0; d < Nd; d++) {
            g_xyz_ranges[d] = 1;
            for (int dd = 0; dd < 3; dd++) if (xyz_dims[dd] == d) g_xyz_ranges[d] = latt[d];
        }
    }
    void SetSliceDimensions()
    {
        Coordinate sd;
        for (int d = 0; d < Nd; d++) {
            if (g_xyz_ranges[d] > 1 || loop_dims[d] || sum_dims[d]) continue;
            sd.push_back(d);
        }
        slice_dims = sd;
        std::cout << " Slice dimensions: " << slice_dims << std::endl;
    }
    void FillImageData(int loop_idx)
    {
        Coordinate loop_coor;
        Lexicographic::CoorFromIndex(loop_coor, loop_idx, loop_ranges);
        Coordinate xyz_coor(3), g_xyz_coor(Nd), sum_coor(Nd);
        for (uint64_t xyz = 0; xyz < xyz_vol; xyz++) {
            Lexicographic::CoorFromIndex(xyz_coor,   xyz, xyz_ranges);
            Lexicographic::CoorFromIndex(g_xyz_coor, xyz, g_xyz_ranges);
            RealD val = 0.0;
            for (uint64_t si = 0; si < sum_vol; si++) {
                Lexicographic::CoorFromIndex(sum_coor, si, sum_ranges);
                Coordinate site(Nd);
                for (int d = 0; d < Nd; d++)
                    site[d] = (sum_coor[d] + loop_coor[d] + g_xyz_coor[d] + Slice[d]) % latt[d];
                val += real(peekSite(*grid_data, site));
            }
            imageData->SetScalarComponentFromDouble(xyz_coor[0], xyz_coor[1], xyz_coor[2], 0, val);
        }
        imageData->Modified();
    }
    // Reload if needed, fill image, update label, render — no timer advance.
    void ForceRender(vtkRenderWindowInteractor* iren)
    {
        int file = ((TimerCount / (int)loop_vol) + ffile) % (int)files.size();
        if (file != old_file) {
            std::cout << "[render] Loading " << files[file] << std::endl;
            readFile(*grid_data, files[file]);
            old_file = file;
        }
        FillImageData(TimerCount % (int)loop_vol);
        UpdateLabel(file, TimerCount % (int)loop_vol);
        iren->GetRenderWindow()->Render();
    }
    virtual void Execute(vtkObject* caller, unsigned long eventId, void* callData)
    {
        if (vtkCommand::KeyPressEvent == eventId) {
            vtkRenderWindowInteractor* iren = static_cast<vtkRenderWindowInteractor*>(caller);
            std::string key = iren->GetKeySym();
            if (slice_dims.size() > 0) {
                int vert = slice_dims[slice_dims.size() - 1];
                int horz = slice_dims[0];
                if (key == "Up")    Slice[vert] = (Slice[vert] + 1) % latt[vert];
                if (key == "Down")  Slice[vert] = (Slice[vert] + latt[vert] - 1) % latt[vert];
                if (key == "Right") Slice[horz] = (Slice[horz] + 1) % latt[horz];
                if (key == "Left")  Slice[horz] = (Slice[horz] + latt[horz] - 1) % latt[horz];
            }
            if (key == "greater") ffile = (ffile + 1) % (int)files.size();
            if (key == "less")    ffile = (ffile - 1 + (int)files.size()) % (int)files.size();
            ForceRender(iren);
            return;
        }
        if (vtkCommand::TimerEvent == eventId) {
            // timerId == -2: no animation timer (--notime), ignore all timer events
            if (timerId < 0) return;
            int tid = *(reinterpret_cast<int*>(callData));
            if (tid != timerId) return;
            int file = ((TimerCount / (int)loop_vol) + ffile) % (int)files.size();
            if (file != old_file) { readFile(*grid_data, files[file]); old_file = file; }
            FillImageData(TimerCount % (int)loop_vol);
            UpdateLabel(file, TimerCount % (int)loop_vol);
            dynamic_cast<vtkRenderWindowInteractor*>(caller)->GetRenderWindow()->Render();
            ++TimerCount;
            if (TimerCount >= maxCount && timerId > -1)
                dynamic_cast<vtkRenderWindowInteractor*>(caller)->DestroyTimer(timerId);
        }
    }
 private:
    int TimerCount;
    void UpdateLabel(int file, int loop_idx)
    {
        Coordinate loop_coor;
        Lexicographic::CoorFromIndex(loop_coor, loop_idx, loop_ranges);
        // Extract tau value from filename (last '_'-delimited field)
        const std::string& path = files[file];
        std::string tau = path.substr(path.rfind('_') + 1);
        std::stringstream ss;
        ss << "tau = " << tau << "\nSlice " << Slice;
        text->SetInput(ss.str().c_str());
    }
 };
 // ─── Typewriter caption state ─────────────────────────────────────────────────
 // User caption (gold, upper line) — cleared on new user: instruction
 static std::string g_userCaptionFull;
 static size_t      g_userCaptionPos  = 0;
 // Claude caption (light blue, lower line) — cleared when user: arrives
 static std::string g_claudeCaptionFull;
 static size_t      g_claudeCaptionPos = 0;
 static int         g_captionTick = 0;
 static const int   g_captionRate = 1;   // ticks per character (1 x 100ms = 10 chars/sec)
 static std::string WrapText(const std::string& s, int maxCols = 45) {
    std::istringstream words(s);
    std::string word, line, result;
    while (words >> word) {
        if (!line.empty() && (int)(line.size() + 1 + word.size()) > maxCols) {
            result += line + "\n";
            line = word;
        } else {
            if (!line.empty()) line += " ";
            line += word;
        }
    }
    if (!line.empty()) result += line;
    return result;
 }
 // ─── CommandHandler ───────────────────────────────────────────────────────────
 // Minimal parser for the wire protocol. Natural-language interpretation
 // is handled externally (by Claude) before commands reach this program.
 class CommandHandler : public vtkCallbackCommand
 {
 public:
    static CommandHandler* New() { return new CommandHandler; }
    FrameUpdater*               fu;
    vtkCamera*                  camera;
    vtkRenderer*                renderer;
    vtkRenderWindowInteractor*  iren;
    vtkTextActor*               captionActor     = nullptr;  // claude (light blue, lower)
    vtkTextActor*               userCaptionActor = nullptr;  // user  (gold,       upper)
    int    pollTimerId     = -1;
    double isosurfaceLevel = 1.0;   // in RMS units
    void CaptureFrame() {
        if (g_recording && g_mpegWriter && g_imageFilter) {
            GenerateAudioFrame();
            g_imageFilter->Modified();
            g_mpegWriter->Write();
        }
    }
    void SetIsosurface(double level)
    {
        isosurfaceLevel = std::max(0.0, std::min(10.0, level));
        fu->posExtractor->SetValue(0,  isosurfaceLevel * fu->rms);
        fu->negExtractor->SetValue(0, -isosurfaceLevel * fu->rms);
        fu->posExtractor->Modified();
        fu->negExtractor->Modified();
    }
    void PrintStatus()
    {
        std::cout << "[status] file       = " << fu->ffile
                  << " : " << fu->files[fu->ffile] << "\n"
                  << "[status] Slice      = " << fu->Slice << "\n"
                  << "[status] latt       = " << fu->latt << "\n"
                  << "[status] isosurface = " << isosurfaceLevel
                  << " x RMS  (" << isosurfaceLevel * fu->rms << ")" << std::endl;
    }
    // Execute one line of the wire protocol.
    void RunLine(const std::string& line)
    {
        std::istringstream iss(line);
        std::string verb;
        if (!(iss >> verb)) return;
        // ── slice <dim> <N|+N|-N> ────────────────────────────────────────────
        if (verb == "slice") {
            int dim; std::string valstr;
            if (!(iss >> dim >> valstr)) { std::cout << "[cmd] slice: expected dim value" << std::endl; return; }
            if (dim < 0 || dim >= fu->Nd) { std::cout << "[cmd] slice: dim out of range" << std::endl; return; }
            int n = (int)fu->latt[dim];
            int newval;
            if (!valstr.empty() && (valstr[0] == '+' || valstr[0] == '-')) {
                int delta = std::stoi(valstr);
                newval = ((fu->Slice[dim] + delta) % n + n) % n;
            } else {
                newval = ((std::stoi(valstr) % n) + n) % n;
            }
            fu->Slice[dim] = newval;
            fu->ForceRender(iren);
            PrintStatus();
        }
        // ── zoom <factor> ────────────────────────────────────────────────────
        else if (verb == "zoom") {
            double factor;
            if (!(iss >> factor)) { std::cout << "[cmd] zoom: expected factor" << std::endl; return; }
            camera->Dolly(factor);
            renderer->ResetCameraClippingRange();
            iren->GetRenderWindow()->Render();
        }
        // ── azimuth <degrees> ────────────────────────────────────────────────
        else if (verb == "azimuth") {
            double deg;
            if (!(iss >> deg)) { std::cout << "[cmd] azimuth: expected degrees" << std::endl; return; }
            camera->Azimuth(deg);
            renderer->ResetCameraClippingRange();
            iren->GetRenderWindow()->Render();
        }
        // ── elevation <degrees> ──────────────────────────────────────────────
        else if (verb == "elevation") {
            double deg;
            if (!(iss >> deg)) { std::cout << "[cmd] elevation: expected degrees" << std::endl; return; }
            camera->Elevation(deg);
            renderer->ResetCameraClippingRange();
            iren->GetRenderWindow()->Render();
        }
        // ── spin <degrees_per_tick> ──────────────────────────────────────────
        // Applies azimuth rotation on every 100ms poll tick. spin 0 stops.
        else if (verb == "spin") {
            double deg;
            if (!(iss >> deg)) { std::cout << "[cmd] spin: expected degrees" << std::endl; return; }
            g_spinDeg = deg;
            std::cout << "[cmd] spin rate = " << g_spinDeg << " deg/tick" << std::endl;
        }
        // ── caption user: <text> / caption claude: <text> / caption ─────────
        // user:   clears both lines, types user text (gold) on upper line.
        // claude: keeps user line, types response (light blue) on lower line.
        // caption alone clears both immediately.
        else if (verb == "caption") {
            std::string rest;
            std::getline(iss, rest);
            if (!rest.empty() && rest[0] == ' ') rest = rest.substr(1);
            if (rest.empty()) {
                g_userCaptionFull = ""; g_userCaptionPos = 0;
                g_claudeCaptionFull = ""; g_claudeCaptionPos = 0;
                g_captionTick = 0;
                if (userCaptionActor) userCaptionActor->SetInput("");
                if (captionActor)     captionActor->SetInput("");
                iren->GetRenderWindow()->Render(); CaptureFrame();
            } else if (rest.substr(0,5) == "user:") {
                // New instruction: clear both, start typing user text
                g_claudeCaptionFull = ""; g_claudeCaptionPos = 0;
                g_userCaptionFull = WrapText(rest); g_userCaptionPos = 0;
                g_captionTick = 0;
                if (userCaptionActor) userCaptionActor->SetInput("");
                if (captionActor)     captionActor->SetInput("");
                iren->GetRenderWindow()->Render(); CaptureFrame();
            } else {
                // claude: or unlabelled — keep user line, type below
                g_claudeCaptionFull = WrapText(rest); g_claudeCaptionPos = 0;
                g_captionTick = 0;
            }
        }
        // ── record start <filename> / record stop ────────────────────────────
        else if (verb == "record") {
 #ifdef MPEG
            std::string sub;
            if (!(iss >> sub)) { std::cout << "[cmd] record: expected start <file> or stop" << std::endl; return; }
            if (sub == "stop") {
                if (g_recording && g_mpegWriter) {
                    g_mpegWriter->End();
                    g_mpegWriter->Delete(); g_mpegWriter = nullptr;
                    g_imageFilter->Delete(); g_imageFilter = nullptr;
                    g_recording = false;
                    std::cout << "[cmd] recording stopped: " << g_recordingFile << std::endl;
                    // Write WAV and mux to MP4
                    std::string wavFile = g_recordingFile + ".wav";
                    WriteWAV(wavFile, g_audioBuffer, AUDIO_RATE);
                    g_audioBuffer.clear();
                    std::string mp4File = g_recordingFile;
                    if (mp4File.size() > 4 && mp4File.substr(mp4File.size()-4) == ".avi")
                        mp4File = mp4File.substr(0, mp4File.size()-4) + ".mp4";
                    else
                        mp4File += ".mp4";
                    std::string cmd = "ffmpeg -y -i \"" + g_recordingFile + "\" -i \"" + wavFile +
                                      "\" -c:v copy -c:a aac -shortest \"" + mp4File + "\" 2>/dev/null";
                    int ret = system(cmd.c_str());
                    if (ret == 0) {
                        std::cout << "[cmd] muxed output: " << mp4File << std::endl;
                        unlink(wavFile.c_str());  // clean up intermediate WAV
                    } else {
                        std::cout << "[cmd] mux failed (ffmpeg not found?). WAV kept: " << wavFile << std::endl;
                    }
                } else {
                    std::cout << "[cmd] not recording" << std::endl;
                }
            } else if (sub == "start") {
                std::string fname = "recording.avi";
                iss >> fname;
                if (g_recording) { std::cout << "[cmd] already recording" << std::endl; return; }
                g_recordingFile    = fname;
                g_audioBuffer.clear();
                g_samplesPerFrame  = AUDIO_RATE / std::max(1, g_framerate);
                g_beepRemaining    = 0;
                g_beepPhase        = 0.0;
                g_imageFilter = vtkWindowToImageFilter::New();
                g_imageFilter->SetInput(iren->GetRenderWindow());
                g_imageFilter->SetInputBufferTypeToRGB();
                g_mpegWriter = vtkFFMPEGWriter::New();
                g_mpegWriter->SetFileName(fname.c_str());
                g_mpegWriter->SetRate(g_framerate);
                g_mpegWriter->SetInputConnection(g_imageFilter->GetOutputPort());
                g_mpegWriter->Start();
                g_recording = true;
                std::cout << "[cmd] recording started: " << fname << std::endl;
            } else {
                std::cout << "[cmd] record: unknown subcommand '" << sub << "'" << std::endl;
            }
 #else
            std::cout << "[cmd] record: MPEG support not compiled" << std::endl;
 #endif
        }
        // ── iso <value> ──────────────────────────────────────────────────────
        else if (verb == "iso") {
            double val;
            if (!(iss >> val)) { std::cout << "[cmd] iso: expected value" << std::endl; return; }
            SetIsosurface(val);
            fu->ForceRender(iren);
            PrintStatus();
        }
        // ── file <index|+N|-N> ───────────────────────────────────────────────
        else if (verb == "file") {
            std::string valstr;
            if (!(iss >> valstr)) { std::cout << "[cmd] file: expected index" << std::endl; return; }
            int n = (int)fu->files.size();
            int newval;
            if (!valstr.empty() && (valstr[0] == '+' || valstr[0] == '-')) {
                int delta = std::stoi(valstr);
                newval = ((fu->ffile + delta) % n + n) % n;
            } else {
                newval = ((std::stoi(valstr) % n) + n) % n;
            }
            fu->ffile    = newval;
            fu->old_file = -1;
            fu->ForceRender(iren);
            PrintStatus();
        }
        // ── render ───────────────────────────────────────────────────────────
        else if (verb == "render") {
            fu->ForceRender(iren);
        }
        // ── status ───────────────────────────────────────────────────────────
        else if (verb == "status") {
            PrintStatus();
        }
        // ── quit ─────────────────────────────────────────────────────────────
        else if (verb == "quit" || verb == "exit") {
            g_running = false;
            iren->TerminateApp();
        }
        else {
            std::cout << "[cmd] Unknown command: '" << line << "'" << std::endl;
        }
    }
    virtual void Execute(vtkObject*, unsigned long eventId, void* callData)
    {
        if (eventId != vtkCommand::TimerEvent) return;
        if (pollTimerId >= 0) {
            int tid = *(reinterpret_cast<int*>(callData));
            if (tid != pollTimerId) return;
        }
        std::vector<std::string> pending;
        {
            std::lock_guard<std::mutex> lk(g_cmdMutex);
            while (!g_cmdQueue.empty()) { pending.push_back(g_cmdQueue.front()); g_cmdQueue.pop(); }
        }
        for (const auto& line : pending) {
            std::cout << "[cmd] >> " << line << std::endl;
            RunLine(line);
            // CaptureFrame() called inside RunLine for caption; for other
            // rendering commands capture here (duplicate Modified() is harmless)
            CaptureFrame();
        }
        // Typewriter: advance one character every g_captionRate ticks.
        // User line types first; claude line starts once user line is complete.
        bool typing = (g_userCaptionPos < g_userCaptionFull.size()) ||
                      (g_claudeCaptionPos < g_claudeCaptionFull.size());
        if (typing) {
            if (++g_captionTick >= g_captionRate) {
                g_captionTick = 0;
                bool rendered = false;
                if (g_userCaptionPos < g_userCaptionFull.size()) {
                    ++g_userCaptionPos;
                    if (userCaptionActor)
                        userCaptionActor->SetInput(g_userCaptionFull.substr(0, g_userCaptionPos).c_str());
                    PlayBeepAudible();
                    TriggerBeep();
                    rendered = true;
                } else if (g_claudeCaptionPos < g_claudeCaptionFull.size()) {
                    ++g_claudeCaptionPos;
                    if (captionActor)
                        captionActor->SetInput(g_claudeCaptionFull.substr(0, g_claudeCaptionPos).c_str());
                    rendered = true;
                }
                if (rendered) {
                    iren->GetRenderWindow()->Render();
                    CaptureFrame();
                }
            }
        }
        // Apply continuous spin (if active) at poll-timer rate
        if (g_spinDeg != 0.0) {
            camera->Azimuth(g_spinDeg);
            renderer->ResetCameraClippingRange();
            iren->GetRenderWindow()->Render();
            CaptureFrame();
        }
    }
 };
 // ─── main ─────────────────────────────────────────────────────────────────────
 int main(int argc, char* argv[])
 {
    using namespace Grid;
    Grid_init(&argc, &argv);
    GridLogLayout();
    auto latt_size   = GridDefaultLatt();
    auto simd_layout = GridDefaultSimd(latt_size.size(), vComplex::Nsimd());
    auto mpi_layout  = GridDefaultMpi();
    GridCartesian Grid(latt_size, simd_layout, mpi_layout);
    double default_contour = 1.0;
    std::string arg;
    std::vector<std::string> file_list({"file1","file2","file3","file4",
                                        "file5","file6","file7","file8"});
    if (GridCmdOptionExists(argv, argv+argc, "--files")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--files");
        GridCmdOptionCSL(arg, file_list);
    }
 #ifdef MPEG
    if (GridCmdOptionExists(argv, argv+argc, "--mpeg")) g_mpeg = 1;
 #endif
    if (GridCmdOptionExists(argv, argv+argc, "--fps")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--fps");
        GridCmdOptionInt(arg, g_framerate);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--isosurface")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--isosurface");
        GridCmdOptionFloat(arg, default_contour);
    }
    int NoTime = 0, Nd = Grid.Nd();
    Coordinate Slice(Nd,0), SumDims(Nd,0), LoopDims(Nd,0), XYZDims({0,1,2});
    if (GridCmdOptionExists(argv, argv+argc, "--slice")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--slice");
        GridCmdOptionIntVector(arg, Slice);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--sum")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--sum");
        GridCmdOptionIntVector(arg, SumDims);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--loop")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--loop");
        GridCmdOptionIntVector(arg, LoopDims);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--xyz")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--xyz");
        GridCmdOptionIntVector(arg, XYZDims);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--notime")) { NoTime = 1; }
    std::thread cmdThread(CommandReaderThread);
    cmdThread.detach();
    // ── VTK scene ────────────────────────────────────────────────────────────
    vtkNew<vtkNamedColors> colors;
    std::array<unsigned char,4> posColor{{240,184,160,255}}; colors->SetColor("posColor", posColor.data());
    std::array<unsigned char,4> bkg{{51,77,102,255}};        colors->SetColor("BkgColor", bkg.data());
    vtkNew<vtkRenderWindow>           renWin;
    vtkNew<vtkRenderWindowInteractor> iren;
    iren->SetRenderWindow(renWin);
    int frameCount = (int)file_list.size();
    for (int d = 0; d < Nd; d++) if (LoopDims[d]) frameCount *= latt_size[d];
    vtkNew<vtkCamera> aCamera;
    aCamera->SetViewUp(0,0,-1); aCamera->SetPosition(0,-1000,0); aCamera->SetFocalPoint(0,0,0);
    aCamera->ComputeViewPlaneNormal(); aCamera->Azimuth(30.0); aCamera->Elevation(30.0);
    vtkNew<vtkRenderer> aRenderer;
    renWin->AddRenderer(aRenderer);
    double nrm, rms, contour;
    { LatticeComplexD data(&Grid); readFile(data, file_list[0]); nrm = norm2(data); }
    rms     = std::sqrt(nrm / Grid.gSites());
    contour = default_contour * rms;
    vtkNew<vtkImageData> imageData;
    imageData->SetDimensions(latt_size[XYZDims[0]], latt_size[XYZDims[1]], latt_size[XYZDims[2]]);
    imageData->AllocateScalars(VTK_DOUBLE, 1);
    for (int xx=0;xx<latt_size[XYZDims[0]];xx++)
    for (int yy=0;yy<latt_size[XYZDims[1]];yy++)
    for (int zz=0;zz<latt_size[XYZDims[2]];zz++)
        imageData->SetScalarComponentFromDouble(xx,yy,zz,0,0.0);
    vtkNew<isosurface> posExtractor; posExtractor->SetInputData(imageData); posExtractor->SetValue(0,  contour);
    vtkNew<vtkStripper> posStripper; posStripper->SetInputConnection(posExtractor->GetOutputPort());
    vtkNew<vtkPolyDataMapper> posMapper; posMapper->SetInputConnection(posStripper->GetOutputPort()); posMapper->ScalarVisibilityOff();
    vtkNew<vtkActor> pos; pos->SetMapper(posMapper);
    pos->GetProperty()->SetDiffuseColor(colors->GetColor3d("posColor").GetData());
    pos->GetProperty()->SetSpecular(0.3); pos->GetProperty()->SetSpecularPower(20); pos->GetProperty()->SetOpacity(0.5);
    vtkNew<isosurface> negExtractor; negExtractor->SetInputData(imageData); negExtractor->SetValue(0, -contour);
    vtkNew<vtkStripper> negStripper; negStripper->SetInputConnection(negExtractor->GetOutputPort());
    vtkNew<vtkPolyDataMapper> negMapper; negMapper->SetInputConnection(negStripper->GetOutputPort()); negMapper->ScalarVisibilityOff();
    vtkNew<vtkActor> neg; neg->SetMapper(negMapper);
    neg->GetProperty()->SetDiffuseColor(colors->GetColor3d("Ivory").GetData());
    vtkNew<vtkOutlineFilter> outlineData; outlineData->SetInputData(imageData);
    vtkNew<vtkPolyDataMapper> mapOutline; mapOutline->SetInputConnection(outlineData->GetOutputPort());
    vtkNew<vtkActor> outline; outline->SetMapper(mapOutline);
    outline->GetProperty()->SetColor(colors->GetColor3d("Black").GetData());
    vtkNew<vtkTextActor> TextT;
    TextT->SetInput("Initialising...");
    TextT->SetPosition(10, 920);
    TextT->GetTextProperty()->SetFontSize(24);
    TextT->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData());
    // Claude response caption (light blue, lower line)
    vtkNew<vtkTextActor> CaptionT;
    CaptionT->SetInput("");
    CaptionT->SetPosition(512, 38);
    CaptionT->GetTextProperty()->SetFontSize(32);
    CaptionT->GetTextProperty()->SetColor(0.6, 0.9, 1.0);
    CaptionT->GetTextProperty()->SetJustificationToCentered();
    CaptionT->GetTextProperty()->SetBackgroundColor(0.0, 0.0, 0.0);
    CaptionT->GetTextProperty()->SetBackgroundOpacity(0.6);
    CaptionT->GetTextProperty()->BoldOn();
    // User instruction caption (gold, upper line)
    vtkNew<vtkTextActor> UserCaptionT;
    UserCaptionT->SetInput("");
    UserCaptionT->SetPosition(512, 82);
    UserCaptionT->GetTextProperty()->SetFontSize(32);
    UserCaptionT->GetTextProperty()->SetColor(1.0, 0.85, 0.0);
    UserCaptionT->GetTextProperty()->SetJustificationToCentered();
    UserCaptionT->GetTextProperty()->SetBackgroundColor(0.0, 0.0, 0.0);
    UserCaptionT->GetTextProperty()->SetBackgroundOpacity(0.6);
    UserCaptionT->GetTextProperty()->BoldOn();
    aRenderer->AddActor(TextT); aRenderer->AddActor(CaptionT); aRenderer->AddActor(UserCaptionT); aRenderer->AddActor(outline);
    aRenderer->AddActor(pos);   aRenderer->AddActor(neg);
    vtkNew<FrameUpdater> fu;
    fu->SetGrid(&Grid); fu->SetFiles(file_list); fu->SetSlice(Slice);
    fu->SetSumDimensions(SumDims); fu->SetLoopDimensions(LoopDims);
    fu->SetDisplayDimensions(XYZDims); fu->SetSliceDimensions();
    fu->imageData = imageData; fu->text = TextT; fu->maxCount = frameCount;
    fu->posExtractor = posExtractor; fu->negExtractor = negExtractor; fu->rms = rms;
    iren->AddObserver(vtkCommand::TimerEvent,    fu);
    iren->AddObserver(vtkCommand::KeyPressEvent, fu);
    aRenderer->SetActiveCamera(aCamera); aRenderer->ResetCamera();
    aRenderer->SetBackground(colors->GetColor3d("BkgColor").GetData());
    aCamera->Dolly(1.0); aRenderer->SetViewport(0.0,0.0,1.0,1.0);
    aRenderer->ResetCameraClippingRange();
    renWin->SetSize(1024,1024); renWin->SetWindowName("ControlledFieldDensity");
    renWin->Render(); iren->Initialize();
    // CommandHandler on fast poll timer
    vtkNew<CommandHandler> cmdHandler;
    cmdHandler->fu             = fu;
    cmdHandler->camera         = aCamera;
    cmdHandler->renderer       = aRenderer;
    cmdHandler->iren           = iren;
    cmdHandler->captionActor     = CaptionT;
    cmdHandler->userCaptionActor = UserCaptionT;
    cmdHandler->isosurfaceLevel = default_contour;
    iren->AddObserver(vtkCommand::TimerEvent, cmdHandler);
    cmdHandler->pollTimerId = iren->CreateRepeatingTimer(100);
    if (g_mpeg == 0 && NoTime == 0) {
        fu->timerId = iren->CreateRepeatingTimer(10000 / g_framerate);
    }
    if (g_mpeg) {
 #ifdef MPEG
        vtkWindowToImageFilter* imageFilter = vtkWindowToImageFilter::New();
        imageFilter->SetInput(renWin); imageFilter->SetInputBufferTypeToRGB();
        vtkFFMPEGWriter* writer = vtkFFMPEGWriter::New();
        writer->SetFileName("movie.avi"); writer->SetRate(g_framerate);
        writer->SetInputConnection(imageFilter->GetOutputPort()); writer->Start();
        for (int i = 0; i < fu->maxCount; i++) {
            fu->Execute(iren, vtkCommand::TimerEvent, &fu->timerId);
            imageFilter->Modified(); writer->Write();
        }
        writer->End(); writer->Delete(); imageFilter->Delete();
 #else
        assert(-1 && "MPEG support not compiled");
 #endif
    } else {
        iren->Start();
    }
    g_running = false;
    Grid_finalize();
    return EXIT_SUCCESS;
 }
@@ -0,0 +1,633 @@
 // ForceAnalysis.cxx
 //
 // Reads a sequence of force snapshot files (LatticeComplexD, real part = force magnitude)
 // and produces two outputs:
 //
 //   1. Tab-separated timeseries to stdout:
 //        idx  Gauge_lat_rms  Gauge_lat_hot  Gauge_smr_rms  ...
 //      where _rms is the lattice RMS and _hot is the value at --hotsite.
 //
 //   2. PNG images (one per force component per snapshot) rendered via VTK
 //      as isosurfaces of the force density, using the same pipeline as
 //      Visualise5D.  Images are written to --pngdir/<label>_<idx>.png.
 //      These can be read back by Claude to interpret spatial structure.
 //
 // Usage:
 //   ForceAnalysis --grid 32.32.32.32 --mpi 1.1.1.1
 //                 --snapdir /path/to/snapshots
 //                 --first 0 --last 1920 --step 10
 //                 --hotsite x.y.z.t
 //                 --pngdir /path/to/output/pngs
 //                 --isosurface 1.0        (contour in units of field RMS)
 //                 --fixediso 0.05         (fixed absolute contour, overrides --isosurface)
 //                 --slice t               (which dimension to fix for 3D display, default: t)
 //                 --sliceval 2            (value of that dimension, default: 0)
 //
 // Dimension order on the 32^4 lattice: x=0 y=1 z=2 t=3
 #include <vtkActor.h>
 #include <vtkActor2D.h>
 #include <vtkCamera.h>
 #include <vtkImageActor.h>
 #include <vtkImageMapper3D.h>
 #include <vtkImageData.h>
 #include <vtkImageMapToColors.h>
 #include <vtkLookupTable.h>
 #include <vtkNamedColors.h>
 #include <vtkNew.h>
 #include <vtkOutlineFilter.h>
 #include <vtkPolyData.h>
 #include <vtkPolyDataMapper.h>
 #include <vtkPolyDataMapper2D.h>
 #include <vtkProperty.h>
 #include <vtkProperty2D.h>
 #include <vtkPoints.h>
 #include <vtkCellArray.h>
 #include <vtkRenderWindow.h>
 #include <vtkRenderWindowInteractor.h>
 #include <vtkRenderer.h>
 #include <vtkStripper.h>
 #include <vtkCallbackCommand.h>
 #include <vtkTextActor.h>
 #include <vtkTextProperty.h>
 #include <vtkWindowToImageFilter.h>
 #include <vtkPNGWriter.h>
 #define USE_FLYING_EDGES
 #ifdef USE_FLYING_EDGES
 #include <vtkFlyingEdges3D.h>
 typedef vtkFlyingEdges3D isosurface;
 #else
 #include <vtkMarchingCubes.h>
 typedef vtkMarchingCubes isosurface;
 #endif
 #include <Grid/Grid.h>
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <iomanip>
 #include <string>
 #include <vector>
 #include <cmath>
 #include <memory>
 #include <sys/stat.h>
 using namespace Grid;
 // ─── I/O ─────────────────────────────────────────────────────────────────────
 template <class T>
 bool tryReadFile(T& out, const std::string& fname)
 {
    std::ifstream test(fname);
    if (!test.good()) return false;
    test.close();
    emptyUserRecord record;
    ScidacReader RD;
    RD.open(fname);
    RD.readScidacFieldRecord(out, record);
    RD.close();
    return true;
 }
 // ─── Fill a 3D vtkImageData slice from a 4D lattice field ────────────────────
 // Sums over the sliced dimension at sliceval, displays the remaining 3 dims.
 void fillImageData(vtkImageData* img,
                   LatticeComplexD& field,
                   const Coordinate& latt_size,
                   int slice_dim, int sliceval)
 {
    // Display dims = all dims except slice_dim, in order
    std::vector<int> disp;
    for (int d = 0; d < 4; d++) if (d != slice_dim) disp.push_back(d);
    int Nx = latt_size[disp[0]];
    int Ny = latt_size[disp[1]];
    int Nz = latt_size[disp[2]];
    for (int ix = 0; ix < Nx; ix++)
    for (int iy = 0; iy < Ny; iy++)
    for (int iz = 0; iz < Nz; iz++) {
        Coordinate site(4);
        site[disp[0]]  = ix;
        site[disp[1]]  = iy;
        site[disp[2]]  = iz;
        site[slice_dim] = sliceval;
        RealD val = real(peekSite(field, site));
        img->SetScalarComponentFromDouble(ix, iy, iz, 0, val);
    }
    img->Modified();
 }
 // ─── 2D heatmap: persistent context ───────────────────────────────────────────
 // Renders a fixed (dim1=v1, dim2=v2) slice of the 4D force field as a
 // diverging blue→white→red colour map, with a fixed symmetric colour scale
 // so brightness directly encodes force magnitude across all frames.
 // A white cross-hair marks the hotsite projection onto the slice.
 struct HeatmapCtx {
    // image pipeline
    vtkNew<vtkImageData>         img;
    vtkNew<vtkLookupTable>       lut;
    vtkNew<vtkImageMapToColors>  colorMap;
    vtkNew<vtkImageActor>        imgActor;
    // colour scale legend (text, avoids needing RenderingAnnotation module)
    vtkNew<vtkTextActor>         cbar;
    // hotsite cross-hair (2D overlay actors)
    vtkNew<vtkPolyData>          crossPD;
    vtkNew<vtkPoints>            crossPts;
    vtkNew<vtkCellArray>         crossLines;
    vtkNew<vtkActor2D>           crossActor;
    // title
    vtkNew<vtkTextActor>         titleAct;
    // renderer / window
    vtkNew<vtkRenderer>          ren;
    vtkNew<vtkRenderWindow>      renWin;
    vtkNew<vtkWindowToImageFilter> w2i;
    vtkNew<vtkPNGWriter>         writer;
    int Nx = 0, Ny = 0;           // display dimensions of the slice
    double scale = 0.07;          // colour range: [-scale, +scale]
    int hotX = -1, hotY = -1;     // hotsite projection onto (Nx,Ny) plane
    // pixel coords of the image origin in the render window
    int imgOffX = 60, imgOffY = 40;
    int imgW = 0, imgH = 0;       // rendered pixel size of each lattice cell
    void init(int nx, int ny, double sc, int hx, int hy)
    {
        Nx = nx; Ny = ny; scale = sc;
        hotX = hx; hotY = hy;
        const int WIN_W = 900, WIN_H = 700;
        // Make cells square and as large as possible within the central area
        int cellW = (WIN_W - 160) / Nx;
        int cellH = (WIN_H - 120) / Ny;
        imgW = std::min(cellW, cellH);
        imgH = imgW;
        imgOffX = (WIN_W - Nx * imgW) / 2;
        imgOffY = 60;
        // --- Image data (scalar field, one component) ---
        img->SetDimensions(Nx, Ny, 1);
        img->SetSpacing(imgW, imgH, 1);
        img->SetOrigin(imgOffX, imgOffY, 0);
        img->AllocateScalars(VTK_DOUBLE, 1);
        // --- Diverging LUT: blue(-scale) → white(0) → red(+scale) ---
        lut->SetNumberOfTableValues(512);
        lut->SetRange(-scale, scale);
        lut->SetNanColor(0.2, 0.2, 0.2, 1.0);
        for (int i = 0; i < 512; ++i) {
            double t = i / 511.0;   // 0=blue, 0.5=white, 1=red
            double r = (t > 0.5) ? 1.0 : 2.0 * t;
            double g = (t < 0.5) ? 2.0 * t : 2.0 * (1.0 - t);
            double b = (t < 0.5) ? 1.0 : 2.0 * (1.0 - t);
            lut->SetTableValue(i, r, g, b, 1.0);
        }
        lut->Build();
        // --- Colour map pipeline ---
        colorMap->SetInputData(img);
        colorMap->SetLookupTable(lut);
        colorMap->Update();
        imgActor->GetMapper()->SetInputConnection(colorMap->GetOutputPort());
        // --- Colour scale legend (text) ---
        {
            std::ostringstream ss;
            ss << std::scientific << std::setprecision(2)
               << "blue=-" << sc << "  white=0  red=+" << sc;
            cbar->SetInput(ss.str().c_str());
        }
        cbar->GetTextProperty()->SetFontFamilyToCourier();
        cbar->GetTextProperty()->SetFontSize(13);
        cbar->GetTextProperty()->SetColor(0.9, 0.9, 0.9);
        cbar->SetDisplayPosition(10, 10);
        // --- Cross-hair at hotsite (2D display coords) ---
        if (hotX >= 0 && hotY >= 0) {
            double cx = imgOffX + (hotX + 0.5) * imgW;
            double cy = imgOffY + (hotY + 0.5) * imgH;
            double arm = imgW * 0.8;
            crossPts->InsertNextPoint(cx - arm, cy, 0);
            crossPts->InsertNextPoint(cx + arm, cy, 0);
            crossPts->InsertNextPoint(cx, cy - arm, 0);
            crossPts->InsertNextPoint(cx, cy + arm, 0);
            vtkIdType seg0[2] = {0, 1};
            vtkIdType seg1[2] = {2, 3};
            crossLines->InsertNextCell(2, seg0);
            crossLines->InsertNextCell(2, seg1);
            crossPD->SetPoints(crossPts);
            crossPD->SetLines(crossLines);
            vtkNew<vtkPolyDataMapper2D> crossMap;
            crossMap->SetInputData(crossPD);
            crossActor->SetMapper(crossMap);
            crossActor->GetProperty()->SetColor(1, 1, 1);
            crossActor->GetProperty()->SetLineWidth(2.0);
        }
        // --- Title ---
        titleAct->GetTextProperty()->SetFontFamilyToCourier();
        titleAct->GetTextProperty()->SetFontSize(16);
        titleAct->GetTextProperty()->SetColor(1, 1, 0);
        titleAct->SetDisplayPosition(10, WIN_H - 30);
        // --- Renderer (2D parallel projection so image fills correctly) ---
        ren->SetBackground(0.08, 0.08, 0.12);
        ren->AddActor(imgActor);
        ren->AddActor2D(cbar);
        ren->AddActor2D(crossActor);
        ren->AddActor2D(titleAct);
        ren->GetActiveCamera()->ParallelProjectionOn();
        // Set up camera to look straight down at the image plane
        ren->GetActiveCamera()->SetPosition(WIN_W/2.0, WIN_H/2.0, 1000);
        ren->GetActiveCamera()->SetFocalPoint(WIN_W/2.0, WIN_H/2.0, 0);
        ren->GetActiveCamera()->SetViewUp(0, 1, 0);
        ren->GetActiveCamera()->SetParallelScale(WIN_H / 2.0);
        ren->ResetCameraClippingRange();
        renWin->AddRenderer(ren);
        renWin->SetSize(WIN_W, WIN_H);
        renWin->SetOffScreenRendering(1);
        renWin->SetMultiSamples(0);
        w2i->SetInput(renWin);
        w2i->SetInputBufferTypeToRGB();
        w2i->ReadFrontBufferOff();
    }
 };
 void renderHeatmap(HeatmapCtx& ctx,
                   LatticeComplexD& field,
                   const Coordinate& latt_size,
                   int dim1, int val1,   // first fixed dimension
                   int dim2, int val2,   // second fixed dimension
                   const std::string& title,
                   const std::string& outpath)
 {
    // Display dimensions: the two dims that are NOT fixed
    std::vector<int> disp;
    for (int d = 0; d < 4; d++)
        if (d != dim1 && d != dim2) disp.push_back(d);
    int Nx = latt_size[disp[0]];
    int Ny = latt_size[disp[1]];
    // Fill image data
    for (int ix = 0; ix < Nx; ix++) {
        for (int iy = 0; iy < Ny; iy++) {
            Coordinate site(4);
            site[disp[0]] = ix;
            site[disp[1]] = iy;
            site[dim1]    = val1;
            site[dim2]    = val2;
            RealD val = real(TensorRemove(peekSite(field, site)));
            ctx.img->SetScalarComponentFromDouble(ix, iy, 0, 0, val);
        }
    }
    ctx.img->Modified();
    ctx.colorMap->Update();
    ctx.titleAct->SetInput(title.c_str());
    ctx.renWin->Render();
    ctx.w2i->Modified();
    ctx.w2i->Update();
    ctx.writer->SetFileName(outpath.c_str());
    ctx.writer->SetInputConnection(ctx.w2i->GetOutputPort());
    ctx.writer->Write();
 }
 // ─── Persistent rendering context (created once, reused every frame) ──────────
 // Avoids Metal GPU context exhaustion on macOS when rendering hundreds of frames.
 struct RenderCtx {
    vtkNew<vtkNamedColors>       colors;
    vtkNew<vtkImageData>         imageData;
    vtkNew<isosurface>           posEx, negEx;
    vtkNew<vtkStripper>          posSt, negSt;
    vtkNew<vtkPolyDataMapper>    posMap, negMap, outMap;
    vtkNew<vtkActor>             posAct, negAct, outAct;
    vtkNew<vtkOutlineFilter>     outF;
    vtkNew<vtkTextActor>         label;
    vtkNew<vtkRenderer>          ren;
    vtkNew<vtkCamera>            cam;
    vtkNew<vtkRenderWindow>      renWin;
    vtkNew<vtkWindowToImageFilter> w2i;
    vtkNew<vtkPNGWriter>         writer;
    void init(int Nx, int Ny, int Nz)
    {
        std::array<unsigned char,4> posColor{{240,184,160,255}};
        colors->SetColor("posColor", posColor.data());
        std::array<unsigned char,4> bkg{{51,77,102,255}};
        colors->SetColor("BkgColor", bkg.data());
        imageData->SetDimensions(Nx, Ny, Nz);
        imageData->AllocateScalars(VTK_DOUBLE, 1);
        posEx->SetInputData(imageData); posEx->SetValue(0,  1.0);
        posSt->SetInputConnection(posEx->GetOutputPort());
        posMap->SetInputConnection(posSt->GetOutputPort());
        posMap->ScalarVisibilityOff();
        posAct->SetMapper(posMap);
        posAct->GetProperty()->SetDiffuseColor(colors->GetColor3d("posColor").GetData());
        posAct->GetProperty()->SetSpecular(0.3);
        posAct->GetProperty()->SetSpecularPower(20);
        posAct->GetProperty()->SetOpacity(0.6);
        negEx->SetInputData(imageData); negEx->SetValue(0, -1.0);
        negSt->SetInputConnection(negEx->GetOutputPort());
        negMap->SetInputConnection(negSt->GetOutputPort());
        negMap->ScalarVisibilityOff();
        negAct->SetMapper(negMap);
        negAct->GetProperty()->SetDiffuseColor(colors->GetColor3d("Ivory").GetData());
        negAct->GetProperty()->SetOpacity(0.6);
        outF->SetInputData(imageData);
        outMap->SetInputConnection(outF->GetOutputPort());
        outAct->SetMapper(outMap);
        outAct->GetProperty()->SetColor(colors->GetColor3d("Black").GetData());
        label->SetPosition(10, 10);
        label->GetTextProperty()->SetFontFamilyToCourier();
        label->GetTextProperty()->SetFontSize(18);
        label->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData());
        ren->AddActor(posAct);
        ren->AddActor(negAct);
        ren->AddActor(outAct);
        ren->AddActor2D(label);
        ren->SetBackground(colors->GetColor3d("BkgColor").GetData());
        cam->SetViewUp(0,0,-1);
        cam->SetPosition(0,-1000,0);
        cam->SetFocalPoint(0,0,0);
        cam->ComputeViewPlaneNormal();
        cam->Azimuth(30.0);
        cam->Elevation(30.0);
        ren->SetActiveCamera(cam);
        renWin->AddRenderer(ren);
        renWin->SetSize(800, 600);
        renWin->SetOffScreenRendering(1);
        renWin->SetMultiSamples(0);
        w2i->SetInput(renWin);
        w2i->SetInputBufferTypeToRGB();
        w2i->ReadFrontBufferOff();
    }
 };
 // ─── Render one force field snapshot to a PNG (reuses existing RenderCtx) ─────
 void renderPNG(RenderCtx& ctx,
               LatticeComplexD& field,
               const Coordinate& latt_size,
               int slice_dim, int sliceval,
               double contour,
               const std::string& title,
               const std::string& outpath)
 {
    // Update image data
    fillImageData(ctx.imageData, field, latt_size, slice_dim, sliceval);
    // Update isosurface levels
    ctx.posEx->SetValue(0,  contour);
    ctx.negEx->SetValue(0, -contour);
    // Update label
    ctx.label->SetInput(title.c_str());
    // Reset camera to fit the (possibly new) data bounds
    ctx.ren->ResetCamera();
    ctx.cam->Dolly(1.2);
    ctx.ren->ResetCameraClippingRange();
    ctx.renWin->Render();
    ctx.w2i->Modified();
    ctx.w2i->Update();
    ctx.writer->SetFileName(outpath.c_str());
    ctx.writer->SetInputConnection(ctx.w2i->GetOutputPort());
    ctx.writer->Write();
 }
 // ─── main ─────────────────────────────────────────────────────────────────────
 int main(int argc, char* argv[])
 {
    Grid_init(&argc, &argv);
    GridLogMessage.Active(0);
    GridLogIterative.Active(0);
    GridLogDebug.Active(0);
    GridLogPerformance.Active(0);
    GridLogComms.Active(0);
    GridLogDslash.Active(0);
    GridLogMemory.Active(0);
    // ── CLI ──────────────────────────────────────────────────────────────────
    std::string snapdir = ".";
    std::string pngdir  = "";
    int first = 0, last = 1920, step = 1;
    int slice_dim = 3, sliceval = 0;   // default: fix t=0, display xyz
    double iso_rms    = 1.0;
    double fixed_iso  = -1.0;   // if >0, use this absolute contour
    double tau_start  = -1.0;   // if >=0, display MD time tau = tau_start + idx*tau_step
    double tau_step   =  0.0;
    // Heatmap mode: fix two dimensions, show 2D colour map
    bool   do_heatmap  = false;
    int    slice_dim2  = -1, sliceval2 = 0;
    double heat_scale  = -1.0;  // if >0, fixed symmetric colour scale; else auto
    Coordinate hotsite({0,0,0,0});
    bool has_hotsite = false;
    std::string arg;
    if (GridCmdOptionExists(argv, argv+argc, "--snapdir"))
        snapdir = GridCmdOptionPayload(argv, argv+argc, "--snapdir");
    if (GridCmdOptionExists(argv, argv+argc, "--pngdir"))
        pngdir = GridCmdOptionPayload(argv, argv+argc, "--pngdir");
    if (GridCmdOptionExists(argv, argv+argc, "--first")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--first");
        GridCmdOptionInt(arg, first);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--last")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--last");
        GridCmdOptionInt(arg, last);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--step")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--step");
        GridCmdOptionInt(arg, step);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--slicedim")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--slicedim");
        GridCmdOptionInt(arg, slice_dim);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--sliceval")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--sliceval");
        GridCmdOptionInt(arg, sliceval);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--isosurface")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--isosurface");
        GridCmdOptionFloat(arg, iso_rms);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--fixediso")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--fixediso");
        GridCmdOptionFloat(arg, fixed_iso);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--taustart")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--taustart");
        GridCmdOptionFloat(arg, tau_start);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--taustep")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--taustep");
        GridCmdOptionFloat(arg, tau_step);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--hotsite")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--hotsite");
        GridCmdOptionIntVector(arg, hotsite);
        has_hotsite = true;
    }
    if (GridCmdOptionExists(argv, argv+argc, "--heatmap"))
        do_heatmap = true;
    if (GridCmdOptionExists(argv, argv+argc, "--slicedim2")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--slicedim2");
        GridCmdOptionInt(arg, slice_dim2);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--sliceval2")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--sliceval2");
        GridCmdOptionInt(arg, sliceval2);
    }
    if (GridCmdOptionExists(argv, argv+argc, "--heatscale")) {
        arg = GridCmdOptionPayload(argv, argv+argc, "--heatscale");
        GridCmdOptionFloat(arg, heat_scale);
    }
    bool do_png = !pngdir.empty();
    if (do_png) mkdir(pngdir.c_str(), 0755);
    // ── Grid setup ───────────────────────────────────────────────────────────
    auto latt_size   = GridDefaultLatt();
    auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd());
    auto mpi_layout  = GridDefaultMpi();
    GridCartesian grid(latt_size, simd_layout, mpi_layout);
    LatticeComplexD field(&grid);
    // Force components
    struct ForceSpec { std::string prefix; std::string label; };
    std::vector<ForceSpec> forces = {
        { "F_IwasakiGaugeAction_lat",                                             "Gauge_lat"    },
        { "F_IwasakiGaugeAction_smr",                                             "Gauge_smr"    },
        { "F_JacobianAction_smr",                                                 "Jacobian"     },
        { "F_TwoFlavourEvenOddRatioPseudoFermionActiondet_0.0047_det_0.05_lat",  "Ferm0047_lat" },
        { "F_TwoFlavourEvenOddRatioPseudoFermionActiondet_0.0047_det_0.05_smr",  "Ferm0047_smr" },
        { "F_TwoFlavourEvenOddRatioPseudoFermionActiondet_0.05_det_0.1_lat",     "Ferm005_lat"  },
        { "F_TwoFlavourEvenOddRatioPseudoFermionActiondet_0.1_det_0.25_lat",     "Ferm01_lat"   },
        { "F_TwoFlavourEvenOddRatioPseudoFermionActiondet_0.25_det_0.5_lat",     "Ferm025_lat"  },
        { "F_TwoFlavourEvenOddRatioPseudoFermionActiondet_0.5_det_1_lat",        "Ferm05_lat"   },
    };
    // ── Stdout header ─────────────────────────────────────────────────────────
    std::cerr << "idx";
    for (auto& fs : forces) {
        std::cerr << "\t" << fs.label << "_rms";
        if (has_hotsite) std::cerr << "\t" << fs.label << "_hot";
    }
    std::cerr << "\n";
    // ── Persistent render contexts (one GPU context for all frames) ──────────
    std::unique_ptr<RenderCtx>    ctx;      // isosurface mode
    std::unique_ptr<HeatmapCtx>   hctx;    // heatmap mode
    // ── Main loop ─────────────────────────────────────────────────────────────
    for (int idx = first; idx <= last; idx += step) {
        std::cerr << idx;
        for (auto& fs : forces) {
            std::string fname = snapdir + "/" + fs.prefix + "." + std::to_string(idx);
            if (!tryReadFile(field, fname)) {
                std::cerr << "\t-";
                if (has_hotsite) std::cerr << "\t-";
                continue;
            }
            // RMS (real part)
            RealD sumsq = 0.0;
            for (int i = 0; i < grid.gSites(); i++) {
                Coordinate site;
                Lexicographic::CoorFromIndex(site, i, latt_size);
                RealD v = real(peekSite(field, site));
                sumsq += v * v;
            }
            RealD rms = std::sqrt(sumsq / grid.gSites());
            std::cerr << "\t" << rms;
            if (has_hotsite) {
                RealD hval = real(TensorRemove(peekSite(field, hotsite)));
                std::cerr << "\t" << hval;
            }
            // PNG output (isosurface or heatmap)
            if (do_png) {
                // Build title string
                std::ostringstream title;
                title << fs.label << "  ";
                if (tau_start >= 0.0 && tau_step > 0.0) {
                    double tau = tau_start + idx * tau_step;
                    title << std::fixed << std::setprecision(6) << "tau=" << tau;
                } else {
                    title << "idx=" << idx;
                }
                title << "  rms=" << std::scientific << std::setprecision(3) << rms;
                std::ostringstream outpath;
                outpath << pngdir << "/" << fs.label
                        << "_" << std::setfill('0') << std::setw(6) << idx << ".png";
                if (do_heatmap && slice_dim2 >= 0) {
                    // ── Heatmap mode ────────────────────────────────────────
                    // Display dims = the two that are NOT fixed
                    std::vector<int> disp;
                    for (int d = 0; d < 4; d++)
                        if (d != slice_dim && d != slice_dim2) disp.push_back(d);
                    if (!hctx) {
                        double sc = (heat_scale > 0) ? heat_scale : rms * 20.0;
                        // Hotsite projection onto display plane
                        int hx = -1, hy = -1;
                        if (has_hotsite) {
                            hx = hotsite[disp[0]];
                            hy = hotsite[disp[1]];
                        }
                        hctx = std::make_unique<HeatmapCtx>();
                        hctx->init(latt_size[disp[0]], latt_size[disp[1]], sc, hx, hy);
                    }
                    title << "  scale=+-" << std::fixed << std::setprecision(4) << hctx->scale;
                    renderHeatmap(*hctx, field, latt_size,
                                  slice_dim,  sliceval,
                                  slice_dim2, sliceval2,
                                  title.str(), outpath.str());
                } else {
                    // ── Isosurface mode ─────────────────────────────────────
                    double contour = (fixed_iso > 0) ? fixed_iso : iso_rms * rms;
                    title << "  iso=" << contour;
                    if (!ctx) {
                        std::vector<int> disp;
                        for (int d = 0; d < 4; d++) if (d != slice_dim) disp.push_back(d);
                        ctx = std::make_unique<RenderCtx>();
                        ctx->init(latt_size[disp[0]], latt_size[disp[1]], latt_size[disp[2]]);
                    }
                    renderPNG(*ctx, field, latt_size, slice_dim, sliceval,
                              contour, title.str(), outpath.str());
                }
            }
        }
        std::cerr << "\n";
        std::cerr.flush();
    }
    Grid_finalize();
    return EXIT_SUCCESS;
 }
@@ -0,0 +1,742 @@
 // TranscriptToVideo.cxx
 //
 // Reads a conversation transcript file with [User] / [Claude] turns and
 // renders it to an AVI using vtkFFMPEGWriter at 1280x720, 10 fps.
 //
 // Transcript format:
 //   [USER] Some question or command, possibly spanning
 //          multiple continuation lines.
 //   [ASSISTANT] A response, also possibly
 //               spanning multiple lines.
 //   ...
 //
 // Rules:
 //   - A line beginning "[User]" or "[Claude]" starts a new turn.
 //   - Any subsequent non-blank line that does NOT begin with "[" is a
 //     continuation of the previous turn (joined with a single space).
 //   - Blank lines are ignored.
 //
 // Usage:
 //   ./TranscriptToVideo <transcript.txt> <output.avi>
 //
 // Typewriter speed : 10 chars/sec  →  1 frame/char at 10 fps
 // Pause after turn : 0.5 s         →  5 frames
 // Word-wrap column : 62
 #include <vtkActor.h>
 #include <vtkActor2D.h>
 #include <vtkCellArray.h>
 #include <vtkFFMPEGWriter.h>
 #include <vtkNamedColors.h>
 #include <vtkNew.h>
 #include <vtkPoints.h>
 #include <vtkPolyData.h>
 #include <vtkPolyDataMapper2D.h>
 #include <vtkProperty2D.h>
 #include <vtkRenderWindow.h>
 #include <vtkRenderer.h>
 #include <vtkTextActor.h>
 #include <vtkTextProperty.h>
 #include <vtkWindowToImageFilter.h>
 #include <algorithm>
 #include <fstream>
 #include <iostream>
 #include <sstream>
 #include <string>
 #include <vector>
 // ---------------------------------------------------------------------------
 // Constants
 // ---------------------------------------------------------------------------
 static const int    WIDTH         = 1280;
 static const int    HEIGHT        = 720;
 static const int    FPS           = 10;
 static const int    CHARS_PER_FRAME = 5;        // 50 chars/sec at 10 fps
 static const int    PAUSE_FRAMES  = 3;          // 0.3 s
 static const int    WRAP_COLS     = 60;
 static const int    MAX_HISTORY   = 18;         // visible completed lines
 static const int    FONT_SIZE     = 18;
 static const int    TITLE_SIZE    = 23;
 static const int    LINE_HEIGHT   = 28;         // pixels between lines
 static const int    MARGIN_LEFT   = 48;
 static const int    MARGIN_TOP    = 58;         // below title bar
 // Colours (R, G, B in 0–1)
 static const double COL_BG[3]     = { 0.04, 0.04, 0.10 };   // near-black navy
 static const double COL_USER[3]   = { 1.00, 0.84, 0.00 };   // gold
 static const double COL_CLAUDE[3] = { 0.68, 0.85, 0.90 };   // light steel blue
 static const double COL_TITLE[3]  = { 1.00, 1.00, 1.00 };   // white
 static const double COL_BAR[3]    = { 0.30, 0.55, 0.80 };   // progress bar blue
 static const double COL_LABEL[3]  = { 0.65, 0.65, 0.65 };   // dim grey for speaker tag
 // ---------------------------------------------------------------------------
 // Structs
 // ---------------------------------------------------------------------------
 enum class Speaker { User, Claude, Caption };
 struct Turn {
    Speaker     speaker;
    std::string text;   // full unwrapped text
 };
 // A rendered display line (already word-wrapped, tagged with speaker)
 struct DisplayLine {
    Speaker     speaker;
    std::string prefix;  // "[USER]      " or "[ASSISTANT] "
    std::string body;    // wrapped segment
    bool        isFirst;   // first line of this turn (prefix printed)
    bool        isBlank;   // spacer between turns (no text rendered)
    bool        isCaption; // caption update — body holds text (empty = clear)
    DisplayLine() : speaker(Speaker::User), isFirst(false), isBlank(false), isCaption(false) {}
 };
 // ---------------------------------------------------------------------------
 // Replace common multi-byte UTF-8 sequences with ASCII equivalents so that
 // VTK's font renderer (which only handles ASCII reliably) does not crash.
 // Any remaining non-ASCII byte is replaced with '?'.
 // ---------------------------------------------------------------------------
 static std::string SanitiseASCII(const std::string& s)
 {
    std::string out;
    out.reserve(s.size());
    const unsigned char* p = (const unsigned char*)s.data();
    const unsigned char* end = p + s.size();
    while (p < end) {
        unsigned char c = *p;
        if (c < 0x80) {
            out += (char)c;
            ++p;
        } else if (c == 0xE2 && (p + 2) < end) {
            // 3-byte sequence starting 0xE2
            unsigned char b1 = *(p+1), b2 = *(p+2);
            // U+2018 ' U+2019 '  (0xE2 0x80 0x98 / 0x99)
            if (b1 == 0x80 && (b2 == 0x98 || b2 == 0x99)) { out += '\''; p += 3; }
            // U+201C " U+201D "  (0xE2 0x80 0x9C / 0x9D)
            else if (b1 == 0x80 && (b2 == 0x9C || b2 == 0x9D)) { out += '"'; p += 3; }
            // U+2013 en-dash (0xE2 0x80 0x93)
            else if (b1 == 0x80 && b2 == 0x93) { out += '-'; p += 3; }
            // U+2014 em-dash (0xE2 0x80 0x94)
            else if (b1 == 0x80 && b2 == 0x94) { out += '-'; p += 3; }
            // U+2026 ellipsis (0xE2 0x80 0xA6)
            else if (b1 == 0x80 && b2 == 0xA6) { out += "..."; p += 3; }
            // U+2192 arrow (0xE2 0x86 0x92)
            else if (b1 == 0x86 && b2 == 0x92) { out += "->"; p += 3; }
            // U+00D7 multiplication sign (0xC3 0x97) — caught below, but
            // U+22xx math operators: replace with '~'
            else { out += '~'; p += 3; }
        } else if (c == 0xC3 && (p + 1) < end) {
            // 2-byte latin-1 supplement
            unsigned char b1 = *(p+1);
            if (b1 == 0x97) { out += 'x'; p += 2; }       // U+00D7 ×
            else if (b1 == 0xB7) { out += '/'; p += 2; }  // U+00F7 ÷
            else { out += '?'; p += 2; }
        } else {
            // Unknown multi-byte: skip the whole sequence
            out += '?';
            ++p;
            while (p < end && (*p & 0xC0) == 0x80) ++p; // skip continuation bytes
        }
    }
    return out;
 }
 // ---------------------------------------------------------------------------
 // Strip markdown emphasis markers (* and `) so they don't appear in the video.
 // ---------------------------------------------------------------------------
 static std::string StripMarkdown(const std::string& s)
 {
    std::string out;
    out.reserve(s.size());
    for (char c : s) {
        if (c == '*' || c == '`') continue;
        out += c;
    }
    return out;
 }
 // ---------------------------------------------------------------------------
 // Word-wrap: splits `text` into lines of at most maxCols characters.
 // ---------------------------------------------------------------------------
 static std::vector<std::string> WrapText(const std::string& text, int maxCols)
 {
    std::vector<std::string> lines;
    std::istringstream words(text);
    std::string word, line;
    while (words >> word) {
        if (!line.empty() && (int)(line.size() + 1 + word.size()) > maxCols) {
            lines.push_back(line);
            line = word;
        } else {
            if (!line.empty()) line += ' ';
            line += word;
        }
    }
    if (!line.empty()) lines.push_back(line);
    if (lines.empty()) lines.push_back("");
    return lines;
 }
 // ---------------------------------------------------------------------------
 // Parse transcript file into list of Turns.
 // ---------------------------------------------------------------------------
 static std::vector<Turn> ParseTranscript(const std::string& path)
 {
    std::ifstream f(path);
    if (!f) {
        std::cerr << "Cannot open transcript: " << path << "\n";
        std::exit(1);
    }
    std::vector<Turn> turns;
    std::string line;
    while (std::getline(f, line)) {
        // Trim trailing CR (Windows files)
        if (!line.empty() && line.back() == '\r') line.pop_back();
        if (line.empty()) {
            // Blank line within a dialogue turn = paragraph break.
            // Caption turns don't support paragraph breaks.
            if (!turns.empty() && turns.back().speaker != Speaker::Caption) {
                turns.back().text += '\n';   // '\n' sentinel: expands to blank spacer
            }
            continue;
        }
        if (line.size() >= 6 && line.substr(0, 6) == "[USER]") {
            Turn t;
            t.speaker = Speaker::User;
            t.text    = line.substr(6);
            while (!t.text.empty() && t.text.front() == ' ') t.text.erase(t.text.begin());
            t.text = SanitiseASCII(t.text);
            turns.push_back(std::move(t));
        } else if (line.size() >= 11 && line.substr(0, 11) == "[ASSISTANT]") {
            Turn t;
            t.speaker = Speaker::Claude;
            t.text    = line.substr(11);
            while (!t.text.empty() && t.text.front() == ' ') t.text.erase(t.text.begin());
            t.text = SanitiseASCII(t.text);
            turns.push_back(std::move(t));
        } else if (line.size() >= 9 && line.substr(0, 9) == "[CAPTION]") {
            Turn t;
            t.speaker = Speaker::Caption;
            t.text    = line.substr(9);
            while (!t.text.empty() && t.text.front() == ' ') t.text.erase(t.text.begin());
            t.text = SanitiseASCII(t.text);
            turns.push_back(std::move(t));
        } else if (!turns.empty()) {
            // Continuation line — strip leading whitespace, preserve as separate line
            size_t start = line.find_first_not_of(" \t");
            if (start != std::string::npos) {
                if (!turns.back().text.empty()) turns.back().text += '\n';
                turns.back().text += SanitiseASCII(line.substr(start));
            }
        }
    }
    return turns;
 }
 // ---------------------------------------------------------------------------
 // Expand all Turns into DisplayLines (word-wrapped).
 // ---------------------------------------------------------------------------
 static std::vector<DisplayLine> ExpandToDisplayLines(const std::vector<Turn>& turns)
 {
    std::vector<DisplayLine> out;
    // Prefix widths kept equal for alignment
    const std::string userPfx   = "[USER]      ";
    const std::string claudePfx = "[ASSISTANT] ";
    // Track previous non-caption speaker to know when to insert blank spacers
    Speaker prevSpeaker = Speaker::Caption; // sentinel: no spacer before first real turn
    for (size_t ti = 0; ti < turns.size(); ++ti) {
        const Turn& t = turns[ti];
        // Caption turn: emit one special DisplayLine, no spacer, no history entry
        if (t.speaker == Speaker::Caption) {
            DisplayLine dl;
            dl.isCaption = true;
            dl.speaker   = Speaker::Caption;
            // body: whitespace-only → clear; otherwise wrap lines joined with \n
            std::string trimmed = t.text;
            size_t first = trimmed.find_first_not_of(" \t\r\n");
            if (first == std::string::npos) {
                dl.body = "";   // signal to clear caption
            } else {
                // Wrap to ~90 cols for the wider caption zone
                auto lines = WrapText(trimmed, 90);
                for (size_t i = 0; i < lines.size(); ++i) {
                    if (i > 0) dl.body += "\n";
                    dl.body += lines[i];
                }
            }
            out.push_back(dl);
            continue;
        }
        // Insert a blank spacer before each new dialogue turn (not before the first)
        if (prevSpeaker != Speaker::Caption) {
            DisplayLine blank;
            blank.isBlank = true;
            blank.speaker = t.speaker;
            out.push_back(blank);
        }
        prevSpeaker = t.speaker;
        const std::string& pfx = (t.speaker == Speaker::User) ? userPfx : claudePfx;
        int bodyWidth = WRAP_COLS - (int)pfx.size();
        if (bodyWidth < 20) bodyWidth = 20;
        // Split the turn text on '\n' to get individual source lines.
        // Empty source lines become blank spacers (paragraph breaks within a turn).
        // Non-empty source lines are stripped of markdown markers and word-wrapped.
        std::vector<std::string> srcLines;
        {
            std::string seg;
            for (char ch : t.text) {
                if (ch == '\n') { srcLines.push_back(seg); seg.clear(); }
                else seg += ch;
            }
            srcLines.push_back(seg);
        }
        bool firstOfTurn = true;
        for (const auto& srcLine : srcLines) {
            // Strip markdown emphasis markers
            std::string stripped = StripMarkdown(srcLine);
            // Trim leading/trailing whitespace
            size_t f = stripped.find_first_not_of(" \t");
            if (f == std::string::npos) {
                // Blank — paragraph break spacer within the turn
                DisplayLine blank;
                blank.isBlank = true;
                blank.speaker = t.speaker;
                out.push_back(blank);
                continue;
            }
            size_t l = stripped.find_last_not_of(" \t");
            stripped = stripped.substr(f, l - f + 1);
            auto wrapped = WrapText(stripped, bodyWidth);
            for (size_t i = 0; i < wrapped.size(); ++i) {
                DisplayLine dl;
                dl.speaker = t.speaker;
                dl.prefix  = pfx;
                dl.body    = wrapped[i];
                dl.isFirst = firstOfTurn && (i == 0);
                out.push_back(dl);
            }
            firstOfTurn = false;
        }
    }
    return out;
 }
 // ---------------------------------------------------------------------------
 // Helper: set actor text to `s`, configure font/colour, position.
 // ---------------------------------------------------------------------------
 static void ConfigureTextActor(vtkTextActor* a, int fontSize,
                               double r, double g, double b)
 {
    a->GetTextProperty()->SetFontFamilyToCourier();
    a->GetTextProperty()->SetFontSize(fontSize);
    a->GetTextProperty()->SetColor(r, g, b);
    a->GetTextProperty()->SetBold(0);
    a->GetTextProperty()->SetItalic(0);
    a->GetTextProperty()->ShadowOff();
    a->GetTextProperty()->SetJustificationToLeft();
    a->GetTextProperty()->SetVerticalJustificationToBottom();
 }
 // ---------------------------------------------------------------------------
 // Create a thin horizontal progress bar actor (2D polygon).
 // Returns the actor; caller adds to renderer.
 // ---------------------------------------------------------------------------
 static vtkActor2D* MakeProgressBar(vtkPolyData*& pd, vtkPoints*& pts)
 {
    pts = vtkPoints::New();
    vtkCellArray* cells = vtkCellArray::New();
    // 4 points, updated every frame
    pts->SetNumberOfPoints(4);
    pts->SetPoint(0, 0,   HEIGHT - 8, 0);
    pts->SetPoint(1, 0,   HEIGHT - 2, 0);
    pts->SetPoint(2, 100, HEIGHT - 2, 0);
    pts->SetPoint(3, 100, HEIGHT - 8, 0);
    vtkIdType quad[4] = { 0, 1, 2, 3 };
    cells->InsertNextCell(4, quad);
    pd = vtkPolyData::New();
    pd->SetPoints(pts);
    pd->SetPolys(cells);
    cells->Delete();
    vtkPolyDataMapper2D* mapper = vtkPolyDataMapper2D::New();
    mapper->SetInputData(pd);
    vtkActor2D* actor = vtkActor2D::New();
    actor->SetMapper(mapper);
    actor->GetProperty()->SetColor(COL_BAR[0], COL_BAR[1], COL_BAR[2]);
    mapper->Delete();
    return actor;
 }
 // ---------------------------------------------------------------------------
 // Main
 // ---------------------------------------------------------------------------
 int main(int argc, char* argv[])
 {
    if (argc < 3) {
        std::cerr << "Usage: TranscriptToVideo <transcript.txt> <output.avi>\n";
        return 1;
    }
    const std::string transcriptPath = argv[1];
    const std::string outputPath     = argv[2];
    // -----------------------------------------------------------------------
    // Parse & expand
    // -----------------------------------------------------------------------
    auto turns        = ParseTranscript(transcriptPath);
    auto displayLines = ExpandToDisplayLines(turns);
    const int totalLines = (int)displayLines.size();
    if (totalLines == 0) {
        std::cerr << "Transcript has no parseable turns.\n";
        return 1;
    }
    // Total frames (rough estimate for progress bar denominator)
    // Each display line: on average ~40 chars + PAUSE_FRAMES
    // We'll compute exact total below after knowing char counts.
    int totalFrames = 0;
    for (const auto& dl : displayLines) {
        int chars = (int)dl.body.size();
        totalFrames += (chars + CHARS_PER_FRAME - 1) / CHARS_PER_FRAME + 1 + PAUSE_FRAMES;
    }
    // -----------------------------------------------------------------------
    // VTK Setup
    // -----------------------------------------------------------------------
    vtkNew<vtkRenderer>     ren;
    vtkNew<vtkRenderWindow> renWin;
    ren->SetBackground(COL_BG[0], COL_BG[1], COL_BG[2]);
    renWin->AddRenderer(ren);
    renWin->SetSize(WIDTH, HEIGHT);
    renWin->SetOffScreenRendering(1);
    renWin->SetMultiSamples(0);
    // -----------------------------------------------------------------------
    // Title actor
    // -----------------------------------------------------------------------
    vtkNew<vtkTextActor> titleActor;
    titleActor->SetInput("Transcript");
    ConfigureTextActor(titleActor, TITLE_SIZE,
                       COL_TITLE[0], COL_TITLE[1], COL_TITLE[2]);
    titleActor->GetTextProperty()->SetBold(1);
    titleActor->GetTextProperty()->SetJustificationToCentered();
    titleActor->SetDisplayPosition(WIDTH / 2, HEIGHT - 36);
    ren->AddActor2D(titleActor);
    // Thin separator line below title (drawn as a narrow quad)
    {
        vtkPoints*    lpts  = vtkPoints::New();
        vtkCellArray* lcell = vtkCellArray::New();
        lpts->InsertNextPoint(0,     HEIGHT - 44, 0);
        lpts->InsertNextPoint(WIDTH, HEIGHT - 44, 0);
        lpts->InsertNextPoint(WIDTH, HEIGHT - 42, 0);
        lpts->InsertNextPoint(0,     HEIGHT - 42, 0);
        vtkIdType q[4] = {0,1,2,3};
        lcell->InsertNextCell(4, q);
        vtkPolyData* lpd = vtkPolyData::New();
        lpd->SetPoints(lpts);
        lpd->SetPolys(lcell);
        vtkPolyDataMapper2D* lmap = vtkPolyDataMapper2D::New();
        lmap->SetInputData(lpd);
        vtkActor2D* lineActor = vtkActor2D::New();
        lineActor->SetMapper(lmap);
        lineActor->GetProperty()->SetColor(0.3, 0.3, 0.5);
        ren->AddActor2D(lineActor);
        lpts->Delete(); lcell->Delete(); lpd->Delete();
        lmap->Delete(); lineActor->Delete();
    }
    // -----------------------------------------------------------------------
    // History text actors (MAX_HISTORY lines, reused with shifting content)
    // -----------------------------------------------------------------------
    std::vector<vtkTextActor*> histActors(MAX_HISTORY);
    for (int i = 0; i < MAX_HISTORY; ++i) {
        histActors[i] = vtkTextActor::New();
        histActors[i]->SetInput("");
        ConfigureTextActor(histActors[i], FONT_SIZE, 0.5, 0.5, 0.5);
        // Position: bottom of history area = just above current-line area
        int y = MARGIN_TOP + (MAX_HISTORY - 1 - i) * LINE_HEIGHT;
        histActors[i]->SetDisplayPosition(MARGIN_LEFT, HEIGHT - y);
        ren->AddActor2D(histActors[i]);
    }
    // Current (actively typing) line actor — two actors: prefix + body
    vtkNew<vtkTextActor> curPfxActor;   // "[User]  " in dim colour
    vtkNew<vtkTextActor> curBodyActor;  // body text in vivid colour
    vtkNew<vtkTextActor> cursorActor;   // blinking block
    ConfigureTextActor(curPfxActor,  FONT_SIZE, COL_LABEL[0], COL_LABEL[1], COL_LABEL[2]);
    ConfigureTextActor(curBodyActor, FONT_SIZE, 1, 1, 1);  // will be overridden per turn
    ConfigureTextActor(cursorActor,  FONT_SIZE, 1, 1, 1);
    cursorActor->SetInput("|");
    int curLineY = HEIGHT - (MARGIN_TOP + MAX_HISTORY * LINE_HEIGHT);
    // Place current line below history
    curPfxActor->SetDisplayPosition(MARGIN_LEFT, curLineY);
    ren->AddActor2D(curPfxActor);
    ren->AddActor2D(curBodyActor);
    ren->AddActor2D(cursorActor);
    // -----------------------------------------------------------------------
    // Progress bar
    // -----------------------------------------------------------------------
    vtkPolyData* barPD  = nullptr;
    vtkPoints*   barPts = nullptr;
    vtkActor2D*  barActor = MakeProgressBar(barPD, barPts);
    ren->AddActor2D(barActor);
    // Progress label
    vtkNew<vtkTextActor> progLabelActor;
    progLabelActor->SetInput("");
    ConfigureTextActor(progLabelActor, 13,
                       COL_LABEL[0], COL_LABEL[1], COL_LABEL[2]);
    progLabelActor->SetDisplayPosition(MARGIN_LEFT, 12);
    ren->AddActor2D(progLabelActor);
    // -----------------------------------------------------------------------
    // Caption actor — bottom-centre, Arial, white, initially hidden
    // -----------------------------------------------------------------------
    vtkNew<vtkTextActor> captionActor;
    captionActor->SetInput("");
    captionActor->GetTextProperty()->SetFontFamilyToArial();
    captionActor->GetTextProperty()->SetFontSize(20);
    captionActor->GetTextProperty()->SetColor(1.0, 1.0, 1.0);
    captionActor->GetTextProperty()->SetBold(0);
    captionActor->GetTextProperty()->SetItalic(1);
    captionActor->GetTextProperty()->ShadowOn();
    captionActor->GetTextProperty()->SetShadowOffset(1, -1);
    captionActor->GetTextProperty()->SetJustificationToCentered();
    captionActor->GetTextProperty()->SetVerticalJustificationToBottom();
    // Position: centred horizontally, in the gap between typing line and progress label
    captionActor->SetDisplayPosition(WIDTH / 2, 32);
    ren->AddActor2D(captionActor);
    // -----------------------------------------------------------------------
    // FFMPEG writer
    // -----------------------------------------------------------------------
    vtkNew<vtkWindowToImageFilter> w2i;
    w2i->SetInput(renWin);
    w2i->SetScale(1);
    w2i->ReadFrontBufferOff();
    vtkNew<vtkFFMPEGWriter> writer;
    writer->SetInputConnection(w2i->GetOutputPort());
    writer->SetFileName(outputPath.c_str());
    writer->SetRate(FPS);
    writer->SetBitRate(4000);
    writer->SetBitRateTolerance(400);
    writer->Start();
    // -----------------------------------------------------------------------
    // Helper: measure pixel width of a string in the current font
    // (approximate: Courier is monospace, so width ≈ chars × charWidth)
    // At font size 17 in Courier, one character ≈ 10.2px wide.
    // -----------------------------------------------------------------------
    const double CHAR_PX = 10.2;
    auto bodyX = [&](const std::string& pfx) -> int {
        return MARGIN_LEFT + (int)(pfx.size() * CHAR_PX);
    };
    // -----------------------------------------------------------------------
    // Render one frame
    // -----------------------------------------------------------------------
    int frameCount = 0;
    auto renderFrame = [&]() {
        renWin->Render();
        w2i->Modified();
        writer->Write();
        ++frameCount;
    };
    // -----------------------------------------------------------------------
    // History ring — holds completed display lines (most-recent last)
    // -----------------------------------------------------------------------
    std::vector<DisplayLine> history;
    auto refreshHistory = [&]() {
        // slot 0 = bottom row (newest), slot MAX_HISTORY-1 = top row (oldest).
        // Brightness fades linearly from 0.85 (slot 0) to 0.20 (slot MAX-1).
        // Blank spacer lines show as empty strings.
        // The [USER]/[ASSISTANT] prefix stays at a fixed bright level so the
        // speaker is always identifiable even in dim history.
        int n = (int)history.size();
        for (int slot = 0; slot < MAX_HISTORY; ++slot) {
            int idx = n - 1 - slot;   // slot 0 → newest, slot MAX-1 → oldest
            if (idx < 0) {
                histActors[slot]->SetInput("");
                continue;
            }
            const auto& hl = history[idx];
            if (hl.isBlank) {
                histActors[slot]->SetInput("");
                continue;
            }
            // Graduated brightness: bright near bottom, dim near top
            double bodyBright = 0.20 + 0.65 * (1.0 - (double)slot / (MAX_HISTORY - 1));
            const double* col = (hl.speaker == Speaker::User) ? COL_USER : COL_CLAUDE;
            if (hl.isFirst) {
                // Prefix stays vivid; body fades
                // We render prefix + body as one string but colour the whole line
                // at body brightness — a compromise since VTK TextActor is single-colour.
                // Use a slightly higher floor for the prefix line so the tag is readable.
                double pfxBright = std::min(1.0, bodyBright + 0.25);
                std::string txt = hl.prefix + hl.body;
                histActors[slot]->SetInput(txt.c_str());
                histActors[slot]->GetTextProperty()->SetColor(
                    col[0] * pfxBright, col[1] * pfxBright, col[2] * pfxBright);
            } else {
                std::string txt = std::string(hl.prefix.size(), ' ') + hl.body;
                histActors[slot]->SetInput(txt.c_str());
                histActors[slot]->GetTextProperty()->SetColor(
                    col[0] * bodyBright, col[1] * bodyBright, col[2] * bodyBright);
            }
        }
    };
    // -----------------------------------------------------------------------
    // Main animation loop
    // -----------------------------------------------------------------------
    int dlIdx = 0;  // index into displayLines
    // Count total turns to feed into progress label
    // (display line index → turn index: count isFirst lines)
    int totalTurns = 0;
    for (const auto& dl : displayLines) if (dl.isFirst) ++totalTurns;
    int turnsSeen = 0;
    for (int li = 0; li < totalLines; ++li) {
        const DisplayLine& dl = displayLines[li];
        // Caption update: swap the bottom caption, no history, no typewriter
        if (dl.isCaption) {
            captionActor->SetInput(dl.body.c_str());
            renderFrame();
            continue;
        }
        // Blank spacer: push to history silently with no typewriter frames
        if (dl.isBlank) {
            history.push_back(dl);
            refreshHistory();
            continue;
        }
        // Vivid colour for current speaker
        double cr, cg, cb;
        if (dl.speaker == Speaker::User) {
            cr = COL_USER[0]; cg = COL_USER[1]; cb = COL_USER[2];
        } else {
            cr = COL_CLAUDE[0]; cg = COL_CLAUDE[1]; cb = COL_CLAUDE[2];
        }
        curBodyActor->GetTextProperty()->SetColor(cr, cg, cb);
        cursorActor->GetTextProperty()->SetColor(cr, cg, cb);
        // Prefix: vivid speaker colour for first line, indent for continuation
        if (dl.isFirst) {
            curPfxActor->SetInput(dl.prefix.c_str());
            curPfxActor->GetTextProperty()->SetColor(cr, cg, cb);
        } else {
            curPfxActor->SetInput(std::string(dl.prefix.size(), ' ').c_str());
            curPfxActor->GetTextProperty()->SetColor(0, 0, 0);
        }
        int bx = bodyX(dl.prefix);
        curBodyActor->SetDisplayPosition(bx, curLineY);
        cursorActor->SetDisplayPosition(bx, curLineY);
        if (dl.isFirst) ++turnsSeen;
        // Update progress label
        {
            char buf[64];
            std::snprintf(buf, sizeof(buf), "Turn %d / %d", turnsSeen, totalTurns);
            progLabelActor->SetInput(buf);
        }
        // Update progress bar width
        {
            double frac = (totalFrames > 0) ? (double)frameCount / totalFrames : 0.0;
            double barW = frac * (WIDTH - 2 * MARGIN_LEFT);
            barPts->SetPoint(0, MARGIN_LEFT,        HEIGHT - 8, 0);
            barPts->SetPoint(1, MARGIN_LEFT,        HEIGHT - 2, 0);
            barPts->SetPoint(2, MARGIN_LEFT + barW, HEIGHT - 2, 0);
            barPts->SetPoint(3, MARGIN_LEFT + barW, HEIGHT - 8, 0);
            barPts->Modified();
            barPD->Modified();
        }
        // Typewriter: reveal CHARS_PER_FRAME characters per rendered frame.
        // Always render the fully-complete state last.
        const std::string& body = dl.body;
        int ci = 0;
        while (true) {
            curBodyActor->SetInput(body.substr(0, ci).c_str());
            double cx = bx + ci * CHAR_PX;
            cursorActor->SetDisplayPosition((int)cx, curLineY);
            cursorActor->SetVisibility((frameCount % 2 == 0) ? 1 : 0);
            renderFrame();
            if (ci >= (int)body.size()) break;
            ci = std::min(ci + CHARS_PER_FRAME, (int)body.size());
        }
        // Line is complete — move it to history
        history.push_back(dl);
        refreshHistory();
        // Clear current line display
        curPfxActor->SetInput("");
        curBodyActor->SetInput("");
        cursorActor->SetVisibility(0);
        // Pause frames
        for (int p = 0; p < PAUSE_FRAMES; ++p) {
            // Update progress bar
            {
                double frac = (totalFrames > 0) ? (double)frameCount / totalFrames : 0.0;
                double barW = frac * (WIDTH - 2 * MARGIN_LEFT);
                barPts->SetPoint(0, MARGIN_LEFT,        HEIGHT - 8, 0);
                barPts->SetPoint(1, MARGIN_LEFT,        HEIGHT - 2, 0);
                barPts->SetPoint(2, MARGIN_LEFT + barW, HEIGHT - 2, 0);
                barPts->SetPoint(3, MARGIN_LEFT + barW, HEIGHT - 8, 0);
                barPts->Modified();
                barPD->Modified();
            }
            renderFrame();
        }
    }
    // -----------------------------------------------------------------------
    // Finish
    // -----------------------------------------------------------------------
    writer->End();
    // Tidy up manual ref-counted objects
    barActor->Delete();
    barPD->Delete();
    barPts->Delete();
    for (auto* a : histActors) a->Delete();
    std::cerr << "Wrote " << frameCount << " frames ("
              << frameCount / FPS << " s) to " << outputPath << "\n";
    return 0;
 }
@@ -0,0 +1,729 @@
 // Derived from VTK/Examples/Cxx/Medical2.cxx
 // The example reads a volume dataset, extracts two isosurfaces that
 // represent the skin and bone, and then displays them.
 //
 // Modified heavily by Peter Boyle to display lattice field theory data as movies and compare multiple files
 #include <vtkActor.h>
 #include <vtkCamera.h>
 #include <vtkMetaImageReader.h>
 #include <vtkNamedColors.h>
 #include <vtkNew.h>
 #include <vtkOutlineFilter.h>
 #include <vtkPolyDataMapper.h>
 #include <vtkProperty.h>
 #include <vtkRenderWindow.h>
 #include <vtkRenderWindowInteractor.h>
 #include <vtkRenderer.h>
 #include <vtkStripper.h>
 #include <vtkImageData.h>
 #include <vtkVersion.h>
 #include <vtkCallbackCommand.h>
 #include <vtkTextActor.h>
 #include <vtkTextProperty.h>
 #define MPEG
 #ifdef MPEG
 #include <vtkFFMPEGWriter.h>
 #endif
 #include <vtkProperty2D.h>
 #include <vtkSliderWidget.h>
 #include <vtkSliderRepresentation2D.h>
 #include <vtkWindowToImageFilter.h>
 #include <array>
 #include <string>
 #include <Grid/Grid.h>
 #define USE_FLYING_EDGES
 #ifdef USE_FLYING_EDGES
 #include <vtkFlyingEdges3D.h>
 typedef vtkFlyingEdges3D isosurface;
 #else
 #include <vtkMarchingCubes.h>
 typedef vtkMarchingCubes isosurface;
 #endif
 int mpeg = 0 ;
 int framerate = 10;
 template <class T> void readFile(T& out, std::string const fname){
  Grid::emptyUserRecord record;
  Grid::ScidacReader RD;
  RD.open(fname);
  RD.readScidacFieldRecord(out,record);
  RD.close();
 }
 using namespace Grid;
 class FrameUpdater : public vtkCallbackCommand
 {
 public:
  FrameUpdater() {
    ffile=0;
    TimerCount = 0;
    xoff       = 0;
    t          = 0;
    imageData = nullptr;
    timerId = 0;
    maxCount = -1;
    old_file=-1;
  }
  static FrameUpdater* New()
  {
    FrameUpdater* cb = new FrameUpdater;
    cb->TimerCount = 0;
    return cb;
  }
  //
  // Must map a x,y,z + frame index into
  // i)  a d-dimensional site Coordinate
  // ii) a file name
  // Need a:
  //     loop_ranges
  //     sum_ranges
  //     loop_vol  -- map loop_idx -> loop_coor
  //     sum_vol   -- map sum_idx -> sum_coor with Lexicographic
  //
  /*
   * Just set this up
   */
  int old_file ; // Cache, avoid reread
  Coordinate          latt;
  Coordinate          xyz_dims    ; // List lattice dimensions corresponding to xyz_dims displayed 
  Coordinate          xyz_ranges  ; // 3-vector
  Coordinate          g_xyz_ranges; // Nd-vector
  uint64_t            xyz_vol     ;
  Coordinate          loop_dims;    // List lattice dimensions put into movie time
  Coordinate          loop_ranges;  // movie time ranges
  uint64_t            loop_vol;
  Coordinate          sum_dims;      // List lattice dimensions summed
  Coordinate          sum_ranges;    // summation ranges
  uint64_t            sum_vol;
  Coordinate          slice_dims;      // List slice dimensions 
  Coordinate          Slice;
  std::vector<std::string> files;        // file list that is looped over
  int Nd;
  GridBase *grid;
  Grid::LatticeComplexD *grid_data;
  void SetGrid(GridBase *_grid)
  {
    grid = _grid;
    Nd=grid->Nd();
    latt = grid->GlobalDimensions();
    grid_data = new Grid::LatticeComplexD(grid);
  }
  void SetFiles(std::vector<std::string> list)      { files = list; old_file = -1; }
  void SetSlice(Coordinate _Slice)             { Slice = _Slice;} // Offset / skew for lattice coords
  void SetSumDimensions (Coordinate _SumDims ) {
    sum_ranges=Coordinate(Nd);
    sum_dims = _SumDims; // 1 hot for dimensions summed
    sum_vol = 1;
    for(int d=0;d<sum_dims.size();d++){
      if ( sum_dims[d] == 1 ) sum_ranges[d] = latt[d];
      else                    sum_ranges[d] = 1;
      sum_vol*=sum_ranges[d];
    }
  }
  void SetLoopDimensions(Coordinate _LoopDims) {
    loop_ranges=Coordinate(Nd);
    loop_dims= _LoopDims;
    loop_vol = 1;
    for(int d=0;d<loop_dims.size();d++){
      if ( loop_dims[d] == 1 ) loop_ranges[d] = latt[d];
      else                     loop_ranges[d] = 1;
      loop_vol*=loop_ranges[d];
    }
  } // 
  void SetDisplayDimensions(Coordinate _xyz_dims   ) {
    g_xyz_ranges=Coordinate(Nd);
    xyz_ranges=Coordinate(3);
    xyz_dims = _xyz_dims;
    xyz_vol  = 1;
    for(int d=0;d<3;d++){
      xyz_ranges[d] = latt[xyz_dims[d]];
      xyz_vol *= xyz_ranges[d];
    }
    // Find dim extents for grid
    int dd=0;
    for(int d=0;d<Nd;d++){
      g_xyz_ranges[d] = 1;
      for(int dd=0;dd<3;dd++) {
 	if ( xyz_dims[dd]==d ) {
 	  g_xyz_ranges[d] = latt[d];
 	}
      }
    }
  }
  void SetSliceDimensions(void) {
    Coordinate _slice_dims;
    for ( int d=0;d<Nd;d++){
      int is_slice = 1;
      if(g_xyz_ranges[d]>1) is_slice = 0;
      if(loop_dims[d]) is_slice = 0;
      if(sum_dims[d] ) is_slice = 0;
      if(is_slice) _slice_dims.push_back(d);
    }
    slice_dims = _slice_dims;
    std::cout << " Setting Slice Dimensions to "<<slice_dims<<std::endl;
  }
  virtual void Execute(vtkObject* caller, unsigned long eventId,void* vtkNotUsed(callData))
  {
    const int max=256;
    char text_string[max];
    auto latt_size = grid->GlobalDimensions();
    if ( vtkCommand::KeyPressEvent == eventId ) {
      vtkRenderWindowInteractor* iren = static_cast<vtkRenderWindowInteractor*>(caller);
      std::string key = iren->GetKeySym();
      std::cout << "Pressed: " << key << std::endl;
      if (slice_dims.size()>0) {
 	int vert = slice_dims[slice_dims.size()-1];
 	int horz = slice_dims[0];
 	if ( key == "Up" ) {
 	  Slice[vert] = (Slice[vert]+1)%latt[vert];
 	}
 	if ( key == "Down" ) {
 	  Slice[vert] = (Slice[vert]+latt[vert]-1)%latt[vert];
 	}
 	if ( key == "Right" ) {
 	  Slice[horz] = (Slice[horz]+1)%latt[horz];
 	}
 	if ( key == "Left" ) {
 	  Slice[horz] = (Slice[horz]+latt[horz]-1)%latt[horz];
 	}
      }
      if ( key == "greater" ) {
 	ffile = (ffile + 1) % files.size();
      }
      if ( key == "less" ) {
 	ffile = (ffile - 1 + files.size()) % files.size();
      }
      std::cout <<"Slice " <<Slice <<std::endl;
      std::cout <<"File  " <<ffile <<std::endl;
    }
    // Make a new frame for frame index TimerCount
    if ( vtkCommand::TimerEvent == eventId || vtkCommand::KeyPressEvent == eventId)
      {
 	int file     = ((this->TimerCount / loop_vol) + ffile )%files.size();
 	if ( file != old_file ) {
 	  readFile(*grid_data,files[file]);
 	  old_file = file;
 	}
 	RealD max, min, max_abs,min_abs;
 	Coordinate max_site;
 	Coordinate min_site;
 	Coordinate max_abs_site;
 	Coordinate min_abs_site;
 	for(int idx=0;idx<grid->gSites();idx++){
 	  Coordinate site;
 	  Lexicographic::CoorFromIndex (site,idx,latt);
 	  RealD val=real(peekSite(*grid_data,site));
 	  if (idx==0){
 	    max = min = val;
 	    max_abs = min_abs = fabs(val);
 	    max_site = site;
 	    min_site = site;
 	    min_abs_site = site;
 	    max_abs_site = site;
 	  } else {
 	    if ( val > max ) {
 	      max=val;
 	      max_site = site;
 	    }
 	    if ( fabs(val) > max_abs ) {
 	      max_abs=fabs(val);
 	      max_abs_site = site;
 	    }
 	    if ( val < min ) {
 	      min=val;
 	      min_site = site;
 	    }	    
 	    if ( fabs(val) < min_abs ) {
 	      min_abs=fabs(val);
 	      min_abs_site = site;
 	    }	    
 	  }
 	}
 	std::cout << " abs_max "<<max_abs<<" at " << max_abs_site<<std::endl;
 	std::cout << " abs_min "<<min_abs<<" at " << min_abs_site<<std::endl;
 	std::cout << " max "<<max<<" at " << max_site<<std::endl;
 	std::cout << " min "<<min<<" at " << min_site<<std::endl;
 	// Looped dimensions, map index to coordinate
 	int loop_idx = this->TimerCount % loop_vol;
 	Coordinate loop_coor;
 	Lexicographic::CoorFromIndex (loop_coor,loop_idx,loop_ranges);
 	// Loop over xyz sites
 	Coordinate xyz_coor(3);
 	Coordinate g_xyz_coor(Nd);
 	Coordinate sum_coor(Nd);
 	for(uint64_t xyz = 0 ; xyz< xyz_vol; xyz++){
 	  Lexicographic::CoorFromIndex (xyz_coor,xyz,xyz_ranges);
 	  Lexicographic::CoorFromIndex (g_xyz_coor,xyz,g_xyz_ranges);
 	  RealD sum_value = 0.0;
 	  for(uint64_t sum_idx = 0 ; sum_idx< sum_vol; sum_idx++){
 	    Lexicographic::CoorFromIndex (sum_coor,sum_idx,sum_ranges);
 	    Coordinate site(Nd);
 	    for(int d=0;d<Nd;d++){
 	      site[d] = (sum_coor[d] + loop_coor[d] + g_xyz_coor[d] + Slice[d])%latt[d];
 	    }
 	    sum_value+= real(peekSite(*grid_data,site));
 	    if(xyz==0) std::cout << "sum "<<sum_idx<<" "<<sum_value<<std::endl;
 	  }
 	  imageData->SetScalarComponentFromDouble(xyz_coor[0],xyz_coor[1],xyz_coor[2],0,sum_value);
 	}
 	imageData->Modified();
 	std::stringstream ss;
 	ss<< files[file] <<"\nSlice "<<Slice << "\nLoop  " <<loop_coor<<"\nSummed "<<sum_dims;
 	text->SetInput(ss.str().c_str());
 	vtkRenderWindowInteractor* iren = dynamic_cast<vtkRenderWindowInteractor*>(caller);
 	iren->GetRenderWindow()->Render();
      }
    if ( vtkCommand::TimerEvent == eventId ) {
      ++this->TimerCount;
      std::cout << " This was a timer event count "<<this->TimerCount << std::endl;
    }
    if (this->TimerCount >= this->maxCount) {
      vtkRenderWindowInteractor* iren = dynamic_cast<vtkRenderWindowInteractor*>(caller);
      if (this->timerId > -1)
 	{
        iren->DestroyTimer(this->timerId);
 	}
    }
  }
 private:
  int TimerCount;
  int ffile;
  int xoff;
  int t;
 public:
  vtkImageData* imageData = nullptr;
  vtkTextActor* text = nullptr;
  vtkFFMPEGWriter *writer = nullptr;
  int timerId ;
  int maxCount ;
  double rms;
  isosurface * posExtractor;
  isosurface * negExtractor;
 };
 class SliderCallback : public vtkCommand
 {
 public:
    static SliderCallback* New()
    {
        return new SliderCallback;
    }
    virtual void Execute(vtkObject* caller, unsigned long eventId, void* callData)
    {
        vtkSliderWidget *sliderWidget = vtkSliderWidget::SafeDownCast(caller);
        if (sliderWidget)
        {
 	  contour = ((vtkSliderRepresentation *)sliderWidget->GetRepresentation())->GetValue();
        }
 	fu->posExtractor->SetValue(0,  SliderCallback::contour*fu->rms);
 	fu->negExtractor->SetValue(0, -SliderCallback::contour*fu->rms);
 	fu->posExtractor->Modified();
 	fu->negExtractor->Modified();
    }    
 public:
  static double contour;
  FrameUpdater * fu;
 };
 FrameUpdater * KBfu;
 void KeypressCallbackFunction(vtkObject* caller, long unsigned int eventId,
                              void* clientData, void* callData)
 {
  std::cout << "Keypress callback" << std::endl;
  vtkRenderWindowInteractor* iren = static_cast<vtkRenderWindowInteractor*>(caller);
  std::cout << "Pressed: " << iren->GetKeySym() << std::endl;
  //  imageData->Modified();
 }
 double SliderCallback::contour;
 int main(int argc, char* argv[])
 {
  using namespace Grid;
  Grid_init(&argc, &argv);
  GridLogLayout();
  auto latt_size   = GridDefaultLatt();
  auto simd_layout = GridDefaultSimd(latt_size.size(), vComplex::Nsimd());
  auto mpi_layout  = GridDefaultMpi();
  GridCartesian    Grid(latt_size, simd_layout, mpi_layout);
  double default_contour = 1.0;
  std::string arg;
  std::cout << argc << " command Line arguments "<<std::endl;
  for(int c=0;c<argc;c++) {
    std::cout << " - "<<argv[c]<<std::endl;
  }
  std::vector<std::string> file_list({
      "file1",
      "file2",
      "file3",
      "file4",
      "file5",
      "file6",
      "file7",
      "file8"
    });
  if( GridCmdOptionExists(argv,argv+argc,"--files") ){
    arg=GridCmdOptionPayload(argv,argv+argc,"--files");
    GridCmdOptionCSL(arg, file_list);
  }
 #ifdef MPEG
  if( GridCmdOptionExists(argv,argv+argc,"--mpeg") ){
    mpeg = 1;
  }
 #endif
  if( GridCmdOptionExists(argv,argv+argc,"--fps") ){
    arg=GridCmdOptionPayload(argv,argv+argc,"--fps");
    GridCmdOptionInt(arg,framerate);
  }
  if( GridCmdOptionExists(argv,argv+argc,"--isosurface") ){
    arg=GridCmdOptionPayload(argv,argv+argc,"--isosurface");
    GridCmdOptionFloat(arg,default_contour);
  }
  for(int c=0;c<file_list.size();c++) {
    std::cout << " file: "<<file_list[c]<<std::endl;
  }
  int NoTime = 0;
  int Nd; Nd = Grid.Nd();
  Coordinate    Slice(Nd,0);
  Coordinate  SumDims(Nd,0);
  Coordinate LoopDims(Nd,0);
  Coordinate  XYZDims({0,1,2});
  if( GridCmdOptionExists(argv,argv+argc,"--slice") ){
    arg=GridCmdOptionPayload(argv,argv+argc,"--slice");
    GridCmdOptionIntVector(arg,Slice);
  }
  if( GridCmdOptionExists(argv,argv+argc,"--sum") ){
    arg=GridCmdOptionPayload(argv,argv+argc,"--sum");
    GridCmdOptionIntVector(arg,SumDims);
  }
  if( GridCmdOptionExists(argv,argv+argc,"--loop") ){
    arg=GridCmdOptionPayload(argv,argv+argc,"--loop");
    GridCmdOptionIntVector(arg,LoopDims);
  }
  if( GridCmdOptionExists(argv,argv+argc,"--xyz") ){
    arg=GridCmdOptionPayload(argv,argv+argc,"--xyz");
    GridCmdOptionIntVector(arg,XYZDims);
    std::cout << "xyz : "<<XYZDims<<std::endl;
  }
  if( GridCmdOptionExists(argv,argv+argc,"--notime") ){
    NoTime = 1;
    std::cout << "Suppressing time loop"<<std::endl;
  }
  // Common things:
  vtkNew<vtkNamedColors> colors;
  std::array<unsigned char, 4> posColor{{240, 184, 160, 255}};  colors->SetColor("posColor", posColor.data());
  std::array<unsigned char, 4> bkg{{51, 77, 102, 255}};         colors->SetColor("BkgColor", bkg.data());
  // Create the renderer, the render window, and the interactor. The renderer
  // draws into the render window, the interactor enables mouse- and
  // keyboard-based interaction with the data within the render window.
  //
  vtkNew<vtkRenderWindow> renWin;
  vtkNew<vtkRenderWindowInteractor> iren;
  iren->SetRenderWindow(renWin);
  //  std::vector<LatticeComplexD> data(file_list.size(),&Grid);
  //  FieldMetaData header;
  int frameCount = file_list.size();
  for(int d=0;d<Grid.Nd();d++) {
    if ( LoopDims[d] ) frameCount*= latt_size[d];
  }
  // It is convenient to create an initial view of the data. The FocalPoint
  // and Position form a vector direction. Later on (ResetCamera() method)
  // this vector is used to position the camera to look at the data in
  // this direction.
  vtkNew<vtkCamera> aCamera;
  aCamera->SetViewUp(0, 0, -1);
  aCamera->SetPosition(0, -1000, 0);
  aCamera->SetFocalPoint(0, 0, 0);
  aCamera->ComputeViewPlaneNormal();
  aCamera->Azimuth(30.0);
  aCamera->Elevation(30.0);
  vtkNew<vtkRenderer> aRenderer;
  renWin->AddRenderer(aRenderer);
  double vol = Grid.gSites();
  std::cout << "Reading "<<file_list[0]<<std::endl;
  double nrm, nrmbar,rms, contour;
  {
    LatticeComplexD data(&Grid);
    readFile(data,file_list[0]);
    nrm    = norm2(data);
  }
  nrmbar = nrm/vol;
  rms    = sqrt(nrmbar);
  contour = default_contour * rms; // default to 1 x RMS
  // The following reader is used to read a series of 2D slices (images)
  // that compose the volume. The slice dimensions are set, and the
  // pixel spacing. The data Endianness must also be specified. The reader
  // uses the FilePrefix in combination with the slice number to construct
  // filenames using the format FilePrefix.%d. (In this case the FilePrefix
  // is the root name of the file: quarter.)
  vtkNew<vtkImageData> imageData;
  imageData->SetDimensions(latt_size[0],latt_size[1],latt_size[2]);
  imageData->AllocateScalars(VTK_DOUBLE, 1);
  for(int xx=0;xx<latt_size[0];xx++){
    for(int yy=0;yy<latt_size[1];yy++){
      for(int zz=0;zz<latt_size[2];zz++){
 	Coordinate site({xx,yy,zz,0});
 	RealD value = 0;
 	imageData->SetScalarComponentFromDouble(xx,yy,zz,0,value);
   }}}
  vtkNew<isosurface> posExtractor;
  posExtractor->SetInputData(imageData);
  posExtractor->SetValue(0, contour);
  vtkNew<vtkStripper> posStripper;
  posStripper->SetInputConnection(posExtractor->GetOutputPort());
  vtkNew<vtkPolyDataMapper> posMapper;
  posMapper->SetInputConnection(posStripper->GetOutputPort());
  posMapper->ScalarVisibilityOff();
  vtkNew<vtkActor> pos;
  pos->SetMapper(posMapper);
  pos->GetProperty()->SetDiffuseColor(colors->GetColor3d("posColor").GetData());
  pos->GetProperty()->SetSpecular(0.3);
  pos->GetProperty()->SetSpecularPower(20);
  pos->GetProperty()->SetOpacity(0.5);
  // An isosurface, or contour value is set
  // The triangle stripper is used to create triangle strips from the
  // isosurface; these render much faster on may systems.
  vtkNew<isosurface> negExtractor;
  negExtractor->SetInputData(imageData);
  negExtractor->SetValue(0, -contour);
  vtkNew<vtkStripper> negStripper;
  negStripper->SetInputConnection(negExtractor->GetOutputPort());
  vtkNew<vtkPolyDataMapper> negMapper;
  negMapper->SetInputConnection(negStripper->GetOutputPort());
  negMapper->ScalarVisibilityOff();
  vtkNew<vtkActor> neg;
  neg->SetMapper(negMapper);
  neg->GetProperty()->SetDiffuseColor(colors->GetColor3d("Ivory").GetData());
  // An outline provides context around the data.
  vtkNew<vtkOutlineFilter> outlineData;
  outlineData->SetInputData(imageData);
  vtkNew<vtkPolyDataMapper> mapOutline;
  mapOutline->SetInputConnection(outlineData->GetOutputPort());
  vtkNew<vtkActor> outline;
  outline->SetMapper(mapOutline);
  outline->GetProperty()->SetColor(colors->GetColor3d("Black").GetData());
  vtkNew<vtkTextActor> Text;
  //  Text->SetInput(file_list[f].c_str());
  Text->SetPosition2(0,0);
  Text->GetTextProperty()->SetFontSize(24);
  Text->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData());
  vtkNew<vtkTextActor> TextT;
  TextT->SetInput("T=0");
  TextT->SetPosition(0,.7*1025);
  TextT->GetTextProperty()->SetFontSize(24);
  TextT->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData());
  // Actors are added to the renderer. An initial camera view is created.
  // The Dolly() method moves the camera towards the FocalPoint,
  // thereby enlarging the image.
  //    aRenderer->AddActor(Text);
  aRenderer->AddActor(TextT);
  aRenderer->AddActor(outline);
  aRenderer->AddActor(pos);
  aRenderer->AddActor(neg);
  // Sign up to receive TimerEvent
  vtkNew<FrameUpdater> fu;
  fu->SetGrid(&Grid);
  fu->SetFiles(file_list);
  fu->SetSlice(Slice);
  fu->SetSumDimensions (SumDims);
  fu->SetLoopDimensions(LoopDims);
  fu->SetDisplayDimensions(XYZDims);
  fu->SetSliceDimensions();
  fu->imageData = imageData;
  //    fu->grid_data = &data[f];
  fu->text      = TextT;
  fu->maxCount = frameCount;
  fu->posExtractor = posExtractor;
  fu->negExtractor = negExtractor;
  fu->rms = rms;
  iren->AddObserver(vtkCommand::TimerEvent, fu);
  iren->AddObserver(vtkCommand::KeyPressEvent, fu);
  aRenderer->SetActiveCamera(aCamera);
  aRenderer->ResetCamera();
  aRenderer->SetBackground(colors->GetColor3d("BkgColor").GetData());
  aCamera->Dolly(1.0);
  //    double nf = file_list.size();
  //    std::cout << " Adding renderer " <<f<<" of "<<nf<<std::endl;
  aRenderer->SetViewport(0.0, 0.0,1.0 , 1.0);
  // Note that when camera movement occurs (as it does in the Dolly()
  // method), the clipping planes often need adjusting. Clipping planes
  // consist of two planes: near and far along the view direction. The
  // near plane clips out objects in front of the plane; the far plane
  // clips out objects behind the plane. This way only what is drawn
  // between the planes is actually rendered.
  aRenderer->ResetCameraClippingRange();
  // Set a background color for the renderer and set the size of the
  // render window (expressed in pixels).
  // Initialize the event loop and then start it.
  renWin->SetSize(1024, 1024);
  renWin->SetWindowName("FieldDensity");
  renWin->Render();
  // Take a pointer to the FrameUpdater for keypress mgt.
  //  KBfu = fu;
  //  vtkNew<vtkCallbackCommand> keypressCallback;
  //  keypressCallback->SetCallback(KeypressCallbackFunction);
  //  iren->AddObserver(vtkCommand::KeyPressEvent,keypressCallback);
  iren->Initialize();
  if ( mpeg ) {
 #ifdef MPEG
    vtkWindowToImageFilter *imageFilter = vtkWindowToImageFilter::New();
    imageFilter->SetInput( renWin );
    imageFilter->SetInputBufferTypeToRGB();
    vtkFFMPEGWriter *writer = vtkFFMPEGWriter::New();
    writer->SetFileName("movie.avi");
    writer->SetRate(framerate);
    writer->SetInputConnection(imageFilter->GetOutputPort());
    writer->Start();
    for(int i=0;i<fu->maxCount;i++){
 	fu->Execute(iren,vtkCommand::TimerEvent,nullptr);
 	imageFilter->Modified();
 	writer->Write();
    }
    writer->End();
    writer->Delete();
 #else
    assert(-1 && "MPEG support not compiled");
 #endif
  } else { 
    // Add control of contour threshold
    // Create a slider widget
    vtkSmartPointer<vtkSliderRepresentation2D> sliderRep = vtkSmartPointer<vtkSliderRepresentation2D>::New();
    sliderRep->SetMinimumValue(0.0);
    sliderRep->SetMaximumValue(10.0);
    sliderRep->SetValue(1.0);
    sliderRep->SetTitleText("Fraction RMS");
    // Set color properties:
    // Change the color of the knob that slides
    //  sliderRep->GetSliderProperty()->SetColor(colors->GetColor3d("Green").GetData());
    sliderRep->GetTitleProperty()->SetColor(colors->GetColor3d("AliceBlue").GetData());
    sliderRep->GetLabelProperty()->SetColor(colors->GetColor3d("AliceBlue").GetData());
    sliderRep->GetSelectedProperty()->SetColor(colors->GetColor3d("DeepPink").GetData());
    // Change the color of the bar
    sliderRep->GetTubeProperty()->SetColor(colors->GetColor3d("MistyRose").GetData());
    sliderRep->GetCapProperty()->SetColor(colors->GetColor3d("Yellow").GetData());
    sliderRep->SetSliderLength(0.05);
    sliderRep->SetSliderWidth(0.025);
    sliderRep->SetEndCapLength(0.02);
    sliderRep->GetPoint1Coordinate()->SetCoordinateSystemToNormalizedDisplay();
    sliderRep->GetPoint1Coordinate()->SetValue(0.1, 0.1);
    sliderRep->GetPoint2Coordinate()->SetCoordinateSystemToNormalizedDisplay();
    sliderRep->GetPoint2Coordinate()->SetValue(0.9, 0.1);
    vtkSmartPointer<vtkSliderWidget> sliderWidget = vtkSmartPointer<vtkSliderWidget>::New();
    sliderWidget->SetInteractor(iren);
    sliderWidget->SetRepresentation(sliderRep);
    sliderWidget->SetAnimationModeToAnimate();
    sliderWidget->EnabledOn();
    // Create the slider callback
    vtkSmartPointer<SliderCallback> slidercallback = vtkSmartPointer<SliderCallback>::New();
    slidercallback->fu = fu;
    sliderWidget->AddObserver(vtkCommand::InteractionEvent, slidercallback);
    if ( NoTime==0 ) {
      int timerId = iren->CreateRepeatingTimer(10000/framerate);
      std::cout << "timerId "<<timerId<<std::endl;
    }
    // Start the interaction and timer
    iren->Start();
  }
  Grid_finalize();
  return EXIT_SUCCESS;
 }
Author	SHA1	Message	Date
Peter Boyle	5ce270f1de	Adding Claude related files	2026-04-21 10:41:18 -04:00
Peter Boyle	af43b067a0	New CLAUDE controllable visualiser	2026-04-10 11:23:25 -04:00
Quadro	34b44d1fee	New file for animation in MD time direction	2026-04-02 13:55:38 -04:00