Improved the lancos

2025-11-03 21:44:33 +00:00 · 2017-06-20 18:46:01 +01:00
parent e9cc21900f
commit 0486ff8e79
7 changed files with 211 additions and 1712 deletions
--- a/28
+++ b/28
@@ -1,24 +1,28 @@
 TODO:
 ---------------

-Peter's work list:
-1)- Precision conversion and sort out localConvert      <-- 
-2)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- 
-
-- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet
-- Physical propagator interface
-- Conserved currents
-- GaugeFix into central location
-- Multigrid Wilson and DWF, compare to other Multigrid implementations
-- HDCR resume
+Large item work list:
+1)- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- 
+2)- MultiRHS with spread out extra dim
+3)- BG/Q port and check
+4)- Precision conversion and sort out localConvert      <-- partial
+  - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
+5)- Physical propagator interface
+6)- Conserved currents
+7)- Multigrid Wilson and DWF, compare to other Multigrid implementations
+8)- HDCR resume

 Recent DONE 
+-- GaugeFix into central location                      <-- DONE
+-- Scidac and Ildg metadata handling                   <-- DONE
+-- Binary I/O MPI2 IO                                  <-- DONE
 -- Binary I/O speed up & x-strips                      <-- DONE
 -- Cut down the exterior overhead                      <-- DONE
 -- Interior legs from SHM comms                        <-- DONE
 -- Half-precision comms                                <-- DONE
-- Merge high precision reduction into develop        
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination
+-- Merge high precision reduction into develop         <-- DONE
+-- BlockCG, BCGrQ                                      <-- DONE
+-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE
   -- slice* linalg routines for multiRHS, BlockCG    

 -----
--- a/lib/algorithms/densematrix/DenseMatrix.h
+++ b/lib/algorithms/densematrix/DenseMatrix.h
@@ -1,137 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/DenseMatrix.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef GRID_DENSE_MATRIX_H
-#define GRID_DENSE_MATRIX_H
-
-namespace Grid {
-    /////////////////////////////////////////////////////////////
-    // Matrix untils
-    /////////////////////////////////////////////////////////////
-
-template<class T> using DenseVector = std::vector<T>;
-template<class T> using DenseMatrix = DenseVector<DenseVector<T> >;
-
-template<class T> void Size(DenseVector<T> & vec, int &N) 
-{ 
-  N= vec.size();
-}
-template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M) 
-{ 
-  N= mat.size();
-  M= mat[0].size();
-}
-
-template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N) 
-{ 
-  int M; Size(mat,N,M);
-  assert(N==M);
-}
-
-template<class T> void Resize(DenseVector<T > & mat, int N) { 
-  mat.resize(N);
-}
-template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) { 
-  mat.resize(N);
-  for(int i=0;i<N;i++){
-    mat[i].resize(M);
-  }
-}
-template<class T> void Fill(DenseMatrix<T> & mat, T&val) { 
-  int N,M;
-  Size(mat,N,M);
-  for(int i=0;i<N;i++){
-  for(int j=0;j<M;j++){
-    mat[i][j] = val;
-  }}
-}
-
-/** Transpose of a matrix **/
-template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){
-  int N,M;
-  Size(mat,N,M);
-  DenseMatrix<T> C; Resize(C,M,N);
-  for(int i=0;i<M;i++){
-  for(int j=0;j<N;j++){
-    C[i][j] = mat[j][i];
-  }} 
-  return C;
-}
-/** Set DenseMatrix to unit matrix **/
-template<class T> void Unity(DenseMatrix<T> &A){
-  int N;  SizeSquare(A,N);
-  for(int i=0;i<N;i++){
-    for(int j=0;j<N;j++){
-      if ( i==j ) A[i][j] = 1;
-      else        A[i][j] = 0;
-    } 
-  } 
-}
-
-/** Add C * I to matrix **/
-template<class T>
-void PlusUnit(DenseMatrix<T> & A,T c){
-  int dim;  SizeSquare(A,dim);
-  for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;} 
-}
-
-/** return the Hermitian conjugate of matrix **/
-template<class T>
-DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){
-
-  int dim; SizeSquare(mat,dim);
-
-  DenseMatrix<T> C; Resize(C,dim,dim);
-
-  for(int i=0;i<dim;i++){
-    for(int j=0;j<dim;j++){
-      C[i][j] = conj(mat[j][i]);
-    } 
-  } 
-  return C;
-}
-/**Get a square submatrix**/
-template <class T>
-DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end)
-{
-  DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st);
-
-  for(int i = row_st; i<row_end; i++){
-  for(int j = col_st; j<col_end; j++){
-    H[i-row_st][j-col_st]=A[i][j];
-  }}
-  return H;
-}
-
-}
-
-#include "Householder.h"
-#include "Francis.h"
-
-#endif
-
--- a/lib/algorithms/densematrix/Francis.h
+++ b/lib/algorithms/densematrix/Francis.h
@@ -1,525 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/Francis.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef FRANCIS_H
-#define FRANCIS_H
-
-#include <cstdlib>
-#include <string>
-#include <cmath>
-#include <iostream>
-#include <sstream>
-#include <stdexcept>
-#include <fstream>
-#include <complex>
-#include <algorithm>
-
-//#include <timer.h>
-//#include <lapacke.h>
-//#include <Eigen/Dense>
-
-namespace Grid {
-
-template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
-template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
-
-/**
-  Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm.
-H =
-      x  x  x  x  x  x  x  x  x
-      x  x  x  x  x  x  x  x  x
-      0  x  x  x  x  x  x  x  x
-      0  0  x  x  x  x  x  x  x
-      0  0  0  x  x  x  x  x  x
-      0  0  0  0  x  x  x  x  x
-      0  0  0  0  0  x  x  x  x
-      0  0  0  0  0  0  x  x  x
-      0  0  0  0  0  0  0  x  x
-Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.
-**/
-template <class T>
-int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
-{
-  DenseMatrix<T> H = Hin; 
-
-  int N ; SizeSquare(H,N);
-  int M = N;
-
-  Fill(evals,0);
-  Fill(evecs,0);
-
-  T s,t,x=0,y=0,z=0;
-  T u,d;
-  T apd,amd,bc;
-  DenseVector<T> p(N,0);
-  T nrm = Norm(H);    ///DenseMatrix Norm
-  int n, m;
-  int e = 0;
-  int it = 0;
-  int tot_it = 0;
-  int l = 0;
-  int r = 0;
-  DenseMatrix<T> P; Resize(P,N,N); Unity(P);
-  DenseVector<int> trows(N,0);
-
-  /// Check if the matrix is really hessenberg, if not abort
-  RealD sth = 0;
-  for(int j=0;j<N;j++){
-    for(int i=j+2;i<N;i++){
-      sth = abs(H[i][j]);
-      if(sth > small){
-	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl;
-	exit(1);
-      }
-    }
-  }
-
-  do{
-    std::cout << "Francis QR Step N = " << N << std::endl;
-    /** Check for convergence
-      x  x  x  x  x
-      0  x  x  x  x
-      0  0  x  x  x
-      0  0  x  x  x
-      0  0  0  0  x
-      for this matrix l = 4
-     **/
-    do{
-      l = Chop_subdiag(H,nrm,e,small);
-      r = 0;    ///May have converged on more than one eval
-      ///Single eval
-      if(l == N-1){
-        evals[e] = H[l][l];
-        N--; e++; r++; it = 0;
-      }
-      ///RealD eval
-      if(l == N-2){
-        trows[l+1] = 1;    ///Needed for UTSolve
-        apd = H[l][l] + H[l+1][l+1];
-        amd = H[l][l] - H[l+1][l+1];
-        bc =  (T)4.0*H[l+1][l]*H[l][l+1];
-        evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) );
-        evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) );
-        N-=2; e+=2; r++; it = 0;
-      }
-    } while(r>0);
-
-    if(N ==0) break;
-
-    DenseVector<T > ck; Resize(ck,3);
-    DenseVector<T> v;   Resize(v,3);
-
-    for(int m = N-3; m >= l; m--){
-      ///Starting vector essentially random shift.
-      if(it%10 == 0 && N >= 3 && it > 0){
-        s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
-        t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
-        x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
-        y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
-        z = H[m+1][m]*H[m+2][m+1];
-      }
-      ///Starting vector implicit Q theorem
-      else{
-        s = (H[N-2][N-2] + H[N-1][N-1]);
-        t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]);
-        x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
-        y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
-        z = H[m+1][m]*H[m+2][m+1];
-      }
-      ck[0] = x; ck[1] = y; ck[2] = z;
-
-      if(m == l) break;
-
-      /** Some stupid thing from numerical recipies, seems to work**/
-      // PAB.. for heaven's sake quote page, purpose, evidence it works.
-      //       what sort of comment is that!?!?!?
-      u=abs(H[m][m-1])*(abs(y)+abs(z));
-      d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1]));
-      if ((T)abs(u+d) == (T)abs(d) ){
-	l = m; break;
-      }
-
-      //if (u < small){l = m; break;}
-    }
-    if(it > 100000){
-     std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl;
-     std::cout << "got " << e << " evals " << l << " " << N << std::endl;
-      exit(1);
-    }
-    normalize(ck);    ///Normalization cancels in PHP anyway
-    T beta;
-    Householder_vector<T >(ck, 0, 2, v, beta);
-    Householder_mult<T >(H,v,beta,0,l,l+2,0);
-    Householder_mult<T >(H,v,beta,0,l,l+2,1);
-    ///Accumulate eigenvector
-    Householder_mult<T >(P,v,beta,0,l,l+2,1);
-    int sw = 0;      ///Are we on the last row?
-    for(int k=l;k<N-2;k++){
-      x = H[k+1][k];
-      y = H[k+2][k];
-      z = (T)0.0;
-      if(k+3 <= N-1){
-	z = H[k+3][k];
-      } else{
-	sw = 1; 
-	v[2] = (T)0.0;
-      }
-      ck[0] = x; ck[1] = y; ck[2] = z;
-      normalize(ck);
-      Householder_vector<T >(ck, 0, 2-sw, v, beta);
-      Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0);
-      Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1);
-      ///Accumulate eigenvector
-      Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1);
-    }
-    it++;
-    tot_it++;
-  }while(N > 1);
-  N = evals.size();
-  ///Annoying - UT solves in reverse order;
-  DenseVector<T> tmp; Resize(tmp,N);
-  for(int i=0;i<N;i++){
-    tmp[i] = evals[N-i-1];
-  } 
-  evals = tmp;
-  UTeigenvectors(H, trows, evals, evecs);
-  for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);}
-  return tot_it;
-}
-
-template <class T>
-int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
-{
-  /**
-  Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm.
-  H =
-  x  x  0  0  0  0
-  x  x  x  0  0  0
-  0  x  x  x  0  0
-  0  0  x  x  x  0
-  0  0  0  x  x  x
-  0  0  0  0  x  x
-  Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/
-  return my_Wilkinson(Hin, evals, evecs, small, small);
-}
-
-template <class T>
-int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol)
-{
-  int N; SizeSquare(Hin,N);
-  int M = N;
-
-  ///I don't want to modify the input but matricies must be passed by reference
-  //Scale a matrix by its "norm"
-  //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm);
-  DenseMatrix<T> H;  H = Hin;
-  
-  RealD Hnorm = abs(Norm(Hin));
-  H = H * (1.0 / Hnorm);
-
-  // TODO use openmp and memset
-  Fill(evals,0);
-  Fill(evecs,0);
-
-  T s, t, x = 0, y = 0, z = 0;
-  T u, d;
-  T apd, amd, bc;
-  DenseVector<T> p; Resize(p,N); Fill(p,0);
-
-  T nrm = Norm(H);    ///DenseMatrix Norm
-  int n, m;
-  int e = 0;
-  int it = 0;
-  int tot_it = 0;
-  int l = 0;
-  int r = 0;
-  DenseMatrix<T> P; Resize(P,N,N);
-  Unity(P);
-  DenseVector<int> trows(N, 0);
-  /// Check if the matrix is really symm tridiag
-  RealD sth = 0;
-  for(int j = 0; j < N; ++j)
-  {
-    for(int i = j + 2; i < N; ++i)
-    {
-      if(abs(H[i][j]) > tol || abs(H[j][i]) > tol)
-      {
-	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl;
-	std::cout << "Warning tridiagonalize and call again" << std::endl;
-        // exit(1); // see what is going on
-        //return;
-      }
-    }
-  }
-
-  do{
-    do{
-      //Jasper
-      //Check if the subdiagonal term is small enough (<small)
-      //if true then it is converged.
-      //check start from H.dim - e - 1
-      //How to deal with more than 2 are converged?
-      //What if Chop_symm_subdiag return something int the middle?
-      //--------------
-      l = Chop_symm_subdiag(H,nrm, e, small);
-      r = 0;    ///May have converged on more than one eval
-      //Jasper
-      //In this case
-      // x  x  0  0  0  0
-      // x  x  x  0  0  0
-      // 0  x  x  x  0  0
-      // 0  0  x  x  x  0
-      // 0  0  0  x  x  0
-      // 0  0  0  0  0  x  <- l
-      //--------------
-      ///Single eval
-      if(l == N - 1)
-      {
-        evals[e] = H[l][l];
-        N--;
-        e++;
-        r++;
-        it = 0;
-      }
-      //Jasper
-      // x  x  0  0  0  0
-      // x  x  x  0  0  0
-      // 0  x  x  x  0  0
-      // 0  0  x  x  0  0
-      // 0  0  0  0  x  x  <- l
-      // 0  0  0  0  x  x
-      //--------------
-      ///RealD eval
-      if(l == N - 2)
-      {
-        trows[l + 1] = 1;    ///Needed for UTSolve
-        apd = H[l][l] + H[l + 1][ l + 1];
-        amd = H[l][l] - H[l + 1][l + 1];
-        bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1];
-        evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc));
-        evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc));
-        N -= 2;
-        e += 2;
-        r++;
-        it = 0;
-      }
-    }while(r > 0);
-    //Jasper
-    //Already converged
-    //--------------
-    if(N == 0) break;
-
-    DenseVector<T> ck,v; Resize(ck,2); Resize(v,2);
-
-    for(int m = N - 3; m >= l; m--)
-    {
-      ///Starting vector essentially random shift.
-      if(it%10 == 0 && N >= 3 && it > 0)
-      {
-        t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]);
-        x = H[m][m] - t;
-        z = H[m + 1][m];
-      } else {
-      ///Starting vector implicit Q theorem
-        d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5;
-        t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2] 
-	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2]));
-        x = H[m][m] - t;
-        z = H[m + 1][m];
-      }
-      //Jasper
-      //why it is here????
-      //-----------------------
-      if(m == l)
-        break;
-
-      u = abs(H[m][m - 1]) * (abs(y) + abs(z));
-      d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1]));
-      if ((T)abs(u + d) == (T)abs(d))
-      {
-        l = m;
-        break;
-      }
-    }
-    //Jasper
-    if(it > 1000000)
-    {
-      std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl;
-      std::cout << "got " << e << " evals " << l << " " << N << std::endl;
-      exit(1);
-    }
-    //
-    T s, c;
-    Givens_calc<T>(x, z, c, s);
-    Givens_mult<T>(H, l, l + 1, c, -s, 0);
-    Givens_mult<T>(H, l, l + 1, c,  s, 1);
-    Givens_mult<T>(P, l, l + 1, c,  s, 1);
-    //
-    for(int k = l; k < N - 2; ++k)
-    {
-      x = H.A[k + 1][k];
-      z = H.A[k + 2][k];
-      Givens_calc<T>(x, z, c, s);
-      Givens_mult<T>(H, k + 1, k + 2, c, -s, 0);
-      Givens_mult<T>(H, k + 1, k + 2, c,  s, 1);
-      Givens_mult<T>(P, k + 1, k + 2, c,  s, 1);
-    }
-    it++;
-    tot_it++;
-  }while(N > 1);
-
-  N = evals.size();
-  ///Annoying - UT solves in reverse order;
-  DenseVector<T> tmp(N);
-  for(int i = 0; i < N; ++i)
-    tmp[i] = evals[N-i-1];
-  evals = tmp;
-  //
-  UTeigenvectors(H, trows, evals, evecs);
-  //UTSymmEigenvectors(H, trows, evals, evecs);
-  for(int i = 0; i < evals.size(); ++i)
-  {
-    evecs[i] = P * evecs[i];
-    normalize(evecs[i]);
-    evals[i] = evals[i] * Hnorm;
-  }
-  // // FIXME this is to test
-  // Hin.write("evecs3", evecs);
-  // Hin.write("evals3", evals);
-  // // check rsd
-  // for(int i = 0; i < M; i++) {
-  //   vector<T> Aevec = Hin * evecs[i];
-  //   RealD norm2(0.);
-  //   for(int j = 0; j < M; j++) {
-  //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]);
-  //   }
-  // }
-  return tot_it;
-}
-
-template <class T>
-void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
-
-  /**
-  turn a matrix A =
-  x  x  x  x  x
-  x  x  x  x  x
-  x  x  x  x  x
-  x  x  x  x  x
-  x  x  x  x  x
-  into
-  x  x  x  x  x
-  x  x  x  x  x
-  0  x  x  x  x
-  0  0  x  x  x
-  0  0  0  x  x
-  with householder rotations
-  Slow.
-  */
-  int N ; SizeSquare(A,N);
-  DenseVector<T > p; Resize(p,N); Fill(p,0);
-
-  for(int k=start;k<N-2;k++){
-    //cerr << "hess" << k << std::endl;
-    DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1);
-    for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column
-    normalize(ck);    ///Normalization cancels in PHP anyway
-    T beta;
-    Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector
-    Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA
-    Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H
-    ///Accumulate eigenvector
-    Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H
-  }
-  /*for(int l=0;l<N-2;l++){
-    for(int k=l+2;k<N;k++){
-    A(0,k,l);
-    }
-    }*/
-}
-
-template <class T>
-void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
-///Tridiagonalize a matrix
-  int N; SizeSquare(A,N);
-  Hess(A,Q,start);
-  /*for(int l=0;l<N-2;l++){
-    for(int k=l+2;k<N;k++){
-    A(0,l,k);
-    }
-    }*/
-}
-
-template <class T>
-void ForceTridiagonal(DenseMatrix<T> &A){
-///Tridiagonalize a matrix
-  int N ; SizeSquare(A,N);
-  for(int l=0;l<N-2;l++){
-    for(int k=l+2;k<N;k++){
-      A[l][k]=0;
-      A[k][l]=0;
-    }
-  }
-}
-
-template <class T>
-int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
-  ///Solve a symmetric eigensystem, not necessarily in tridiagonal form
-  int N; SizeSquare(Ain,N);
-  DenseMatrix<T > A; A = Ain;
-  DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q);
-  Tri(A,Q,0);
-  int it = my_Wilkinson<T>(A, evals, evecs, small);
-  for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
-  return it;
-}
-
-
-template <class T>
-int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
-  return my_Wilkinson(Ain, evals, evecs, small);
-}
-
-template <class T>
-int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
-  return my_SymmEigensystem(Ain, evals, evecs, small);
-}
-
-template <class T>
-int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
-///Solve a general eigensystem, not necessarily in tridiagonal form
-  int N = Ain.dim;
-  DenseMatrix<T > A(N); A = Ain;
-  DenseMatrix<T > Q(N);Q.Unity();
-  Hess(A,Q,0);
-  int it = QReigensystem<T>(A, evals, evecs, small);
-  for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
-  return it;
-}
-
-}
-#endif
--- a/lib/algorithms/densematrix/Householder.h
+++ b/lib/algorithms/densematrix/Householder.h
@@ -1,242 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/Householder.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef HOUSEHOLDER_H
-#define HOUSEHOLDER_H
-
-#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
-#define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
-#define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
-
-#include <cstdlib>
-#include <string>
-#include <cmath>
-#include <iostream>
-#include <sstream>
-#include <stdexcept>
-#include <fstream>
-#include <complex>
-#include <algorithm>
-
-namespace Grid {
-/** Comparison function for finding the max element in a vector **/
-template <class T> bool cf(T i, T j) { 
-  return abs(i) < abs(j); 
-}
-
-/** 
-	Calculate a real Givens angle 
- **/
-template <class T> inline void Givens_calc(T y, T z, T &c, T &s){
-
-  RealD mz = (RealD)abs(z);
-  
-  if(mz==0.0){
-    c = 1; s = 0;
-  }
-  if(mz >= (RealD)abs(y)){
-    T t = -y/z;
-    s = (T)1.0 / sqrt ((T)1.0 + t * t);
-    c = s * t;
-  } else {
-    T t = -z/y;
-    c = (T)1.0 / sqrt ((T)1.0 + t * t);
-    s = c * t;
-  }
-}
-
-template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir)
-{
-  int q ; SizeSquare(A,q);
-
-  if(dir == 0){
-    for(int j=0;j<q;j++){
-      T nu = A[i][j];
-      T w  = A[k][j];
-      A[i][j] = (c*nu + s*w);
-      A[k][j] = (-s*nu + c*w);
-    }
-  }
-
-  if(dir == 1){
-    for(int j=0;j<q;j++){
-      T nu = A[j][i];
-      T w  = A[j][k];
-      A[j][i] = (c*nu - s*w);
-      A[j][k] = (s*nu + c*w);
-    }
-  }
-}
-
-/**
-	from input = x;
-	Compute the complex Householder vector, v, such that
-	P = (I - b v transpose(v) )
-	b = 2/v.v
-
-	P | x |    | x | k = 0
-	| x |    | 0 | 
-	| x | =  | 0 |
-	| x |    | 0 | j = 3
-	| x |	   | x |
-
-	These are the "Unreduced" Householder vectors.
-
- **/
-template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta)
-{
-  int N ; Size(input,N);
-  T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> );
-
-  if(abs(m) > 0.0){
-    T alpha = 0;
-
-    for(int i=k; i<j+1; i++){
-      v[i] = input[i]/m;
-      alpha = alpha + v[i]*conj(v[i]);
-    }
-    alpha = sqrt(alpha);
-    beta = (T)1.0/(alpha*(alpha + abs(v[k]) ));
-
-    if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha;
-    else                 v[k] = -alpha;
-  } else{
-    for(int i=k; i<j+1; i++){
-      v[i] = 0.0;
-    } 
-  }
-}
-
-/**
-	from input = x;
-	Compute the complex Householder vector, v, such that
-	P = (I - b v transpose(v) )
-	b = 2/v.v
-
-	Px = alpha*e_dir
-
-	These are the "Unreduced" Householder vectors.
-
- **/
-
-template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta)
-{
-  int N = input.size();
-  T m = *max_element(input.begin() + k, input.begin() + j + 1, cf);
-  
-  if(abs(m) > 0.0){
-    T alpha = 0;
-
-    for(int i=k; i<j+1; i++){
-      v[i] = input[i]/m;
-      alpha = alpha + v[i]*conj(v[i]);
-    }
-    
-    alpha = sqrt(alpha);
-    beta = 1.0/(alpha*(alpha + abs(v[dir]) ));
-	
-    if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha;
-    else                  v[dir] = -alpha;
-  }else{
-    for(int i=k; i<j+1; i++){
-      v[i] = 0.0;
-    } 
-  }
-}
-
-/**
-	Compute the product PA if trans = 0
-	AP if trans = 1
-	P = (I - b v transpose(v) )
-	b = 2/v.v
-	start at element l of matrix A
-	v is of length j - k + 1 of v are nonzero
- **/
-
-template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans)
-{
-  int N ; SizeSquare(A,N);
-
-  if(abs(beta) > 0.0){
-    for(int p=l; p<N; p++){
-      T s = 0;
-      if(trans==0){
-	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p];
-	s *= beta;
-	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);}
-      } else {
-	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];}
-	s *= beta;
-	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);}
-      }
-    }
-  }
-}
-
-/**
-	Compute the product PA if trans = 0
-	AP if trans = 1
-	P = (I - b v transpose(v) )
-	b = 2/v.v
-	start at element l of matrix A
-	v is of length j - k + 1 of v are nonzero
-	A is tridiagonal
- **/
-template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans)
-{
-  if(abs(beta) > 0.0){
-
-    int N ; SizeSquare(A,N);
-
-    DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0); 
-
-    T s;
-    for(int p=l; p<M; p++){
-      s = 0;
-      if(trans==0){
-	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p];
-      }else{
-	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i];
-      }
-      s = beta*s;
-      if(trans==0){
-	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k];
-      }else{
-	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]);
-      }
-    }
-    for(int p=l; p<M; p++){
-      if(trans==0){
-	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p];
-      }else{
-	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i];
-      }
-    }
-  }
-}
-}
-#endif
--- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
+++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
--- a/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h
+++ b/lib/qcd/hmc/checkpointers/ILDGCheckpointer.h
@@ -102,7 +102,7 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
    FieldMetaData header;
    IldgReader _IldgReader;
    _IldgReader.open(config);
-    _IldgReader.readConfiguration(config,U,header);  // format from the header
+    _IldgReader.readConfiguration(U,header);  // format from the header
    _IldgReader.close();

    std::cout << GridLogMessage << "Read ILDG Configuration from " << config
--- a/tests/solver/Test_dwf_lanczos.cc
+++ b/tests/solver/Test_dwf_lanczos.cc
@@ -54,7 +54,7 @@ int main (int argc, char ** argv)
  GridParallelRNG          RNG5rb(FrbGrid);  RNG5.SeedFixedIntegers(seeds5);

  LatticeGaugeField Umu(UGrid); 
-  SU3::TepidConfiguration(RNG4, Umu);
+  SU3::HotConfiguration(RNG4, Umu);

  std::vector<LatticeColourMatrix> U(4,UGrid);
  for(int mu=0;mu<Nd;mu++){