Merge branch 'develop' into feature/hmc_generalise

2025-11-08 15:49:32 +00:00 · 2017-05-01 12:13:56 +01:00
parent 8c540333d5 99220f6531
commit 3344788fa1
69 changed files with 3971 additions and 3179 deletions
--- a/lib/algorithms/Algorithms.h
+++ b/lib/algorithms/Algorithms.h
@@ -46,7 +46,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>

 // Lanczos support
-#include <Grid/algorithms/iterative/MatrixUtils.h>
+//#include <Grid/algorithms/iterative/MatrixUtils.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>
--- a/lib/algorithms/approx/.dirstamp
+++ b/lib/algorithms/approx/.dirstamp
--- a/lib/algorithms/densematrix/DenseMatrix.h
+++ b/lib/algorithms/densematrix/DenseMatrix.h
--- a/lib/algorithms/densematrix/Francis.h
+++ b/lib/algorithms/densematrix/Francis.h
--- a/lib/algorithms/densematrix/Householder.h
+++ b/lib/algorithms/densematrix/Householder.h
--- a/lib/algorithms/iterative/BlockConjugateGradient.h
+++ b/lib/algorithms/iterative/BlockConjugateGradient.h
@@ -0,0 +1,366 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
+
+Copyright (C) 2017
+
+Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
+#define GRID_BLOCK_CONJUGATE_GRADIENT_H
+
+
+namespace Grid {
+
+//////////////////////////////////////////////////////////////////////////
+// Block conjugate gradient. Dimension zero should be the block direction
+//////////////////////////////////////////////////////////////////////////
+template <class Field>
+class BlockConjugateGradient : public OperatorFunction<Field> {
+ public:
+
+  typedef typename Field::scalar_type scomplex;
+
+  const int blockDim = 0;
+
+  int Nblock;
+  bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge.
+                           // Defaults true.
+  RealD Tolerance;
+  Integer MaxIterations;
+  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
+  
+  BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
+    : Tolerance(tol),
+    MaxIterations(maxit),
+    ErrorOnNoConverge(err_on_no_conv){};
+
+void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) 
+{
+  int Orthog = 0; // First dimension is block dim
+  Nblock = Src._grid->_fdimensions[Orthog];
+
+  std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
+
+  Psi.checkerboard = Src.checkerboard;
+  conformable(Psi, Src);
+
+  Field P(Src);
+  Field AP(Src);
+  Field R(Src);
+  
+  Eigen::MatrixXcd m_pAp    = Eigen::MatrixXcd::Identity(Nblock,Nblock);
+  Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
+  Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock);
+  Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
+
+  Eigen::MatrixXcd m_alpha      = Eigen::MatrixXcd::Zero(Nblock,Nblock);
+  Eigen::MatrixXcd m_beta   = Eigen::MatrixXcd::Zero(Nblock,Nblock);
+
+  // Initial residual computation & set up
+  std::vector<RealD> residuals(Nblock);
+  std::vector<RealD> ssq(Nblock);
+
+  sliceNorm(ssq,Src,Orthog);
+  RealD sssum=0;
+  for(int b=0;b<Nblock;b++) sssum+=ssq[b];
+
+  sliceNorm(residuals,Src,Orthog);
+  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
+
+  sliceNorm(residuals,Psi,Orthog);
+  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
+
+  // Initial search dir is guess
+  Linop.HermOp(Psi, AP);
+  
+
+  /************************************************************************
+   * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
+   ************************************************************************
+   * O'Leary : R = B - A X
+   * O'Leary : P = M R ; preconditioner M = 1
+   * O'Leary : alpha = PAP^{-1} RMR
+   * O'Leary : beta  = RMR^{-1}_old RMR_new
+   * O'Leary : X=X+Palpha
+   * O'Leary : R_new=R_old-AP alpha
+   * O'Leary : P=MR_new+P beta
+   */
+
+  R = Src - AP;  
+  P = R;
+  sliceInnerProductMatrix(m_rr,R,R,Orthog);
+
+  GridStopWatch sliceInnerTimer;
+  GridStopWatch sliceMaddTimer;
+  GridStopWatch MatrixTimer;
+  GridStopWatch SolverTimer;
+  SolverTimer.Start();
+
+  int k;
+  for (k = 1; k <= MaxIterations; k++) {
+
+    RealD rrsum=0;
+    for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
+
+    std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
+	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
+
+    MatrixTimer.Start();
+    Linop.HermOp(P, AP);
+    MatrixTimer.Stop();
+
+    // Alpha
+    sliceInnerTimer.Start();
+    sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
+    sliceInnerTimer.Stop();
+    m_pAp_inv = m_pAp.inverse();
+    m_alpha   = m_pAp_inv * m_rr ;
+
+    // Psi, R update
+    sliceMaddTimer.Start();
+    sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog);     // add alpha *  P to psi
+    sliceMaddMatrix(R  ,m_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid
+    sliceMaddTimer.Stop();
+
+    // Beta
+    m_rr_inv = m_rr.inverse();
+    sliceInnerTimer.Start();
+    sliceInnerProductMatrix(m_rr,R,R,Orthog);
+    sliceInnerTimer.Stop();
+    m_beta = m_rr_inv *m_rr;
+
+    // Search update
+    sliceMaddTimer.Start();
+    sliceMaddMatrix(AP,m_beta,P,R,Orthog);
+    sliceMaddTimer.Stop();
+    P= AP;
+
+    /*********************
+     * convergence monitor
+     *********************
+     */
+    RealD max_resid=0;
+    for(int b=0;b<Nblock;b++){
+      RealD rr = real(m_rr(b,b))/ssq[b];
+      if ( rr > max_resid ) max_resid = rr;
+    }
+    
+    if ( max_resid < Tolerance*Tolerance ) { 
+
+      SolverTimer.Stop();
+
+      std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
+      for(int b=0;b<Nblock;b++){
+	std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
+      }
+      std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
+
+      Linop.HermOp(Psi, AP);
+      AP = AP-Src;
+      std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
+
+      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
+      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
+      std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl;
+      std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl;
+      std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl;
+	    
+      IterationsToComplete = k;
+      return;
+    }
+
+  }
+  std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
+
+  if (ErrorOnNoConverge) assert(0);
+  IterationsToComplete = k;
+}
+};
+
+
+//////////////////////////////////////////////////////////////////////////
+// multiRHS conjugate gradient. Dimension zero should be the block direction
+//////////////////////////////////////////////////////////////////////////
+template <class Field>
+class MultiRHSConjugateGradient : public OperatorFunction<Field> {
+ public:
+
+  typedef typename Field::scalar_type scomplex;
+
+  const int blockDim = 0;
+
+  int Nblock;
+  bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge.
+                           // Defaults true.
+  RealD Tolerance;
+  Integer MaxIterations;
+  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
+  
+   MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
+    : Tolerance(tol),
+    MaxIterations(maxit),
+    ErrorOnNoConverge(err_on_no_conv){};
+
+void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) 
+{
+  int Orthog = 0; // First dimension is block dim
+  Nblock = Src._grid->_fdimensions[Orthog];
+
+  std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
+
+  Psi.checkerboard = Src.checkerboard;
+  conformable(Psi, Src);
+
+  Field P(Src);
+  Field AP(Src);
+  Field R(Src);
+  
+  std::vector<ComplexD> v_pAp(Nblock);
+  std::vector<RealD> v_rr (Nblock);
+  std::vector<RealD> v_rr_inv(Nblock);
+  std::vector<RealD> v_alpha(Nblock);
+  std::vector<RealD> v_beta(Nblock);
+
+  // Initial residual computation & set up
+  std::vector<RealD> residuals(Nblock);
+  std::vector<RealD> ssq(Nblock);
+
+  sliceNorm(ssq,Src,Orthog);
+  RealD sssum=0;
+  for(int b=0;b<Nblock;b++) sssum+=ssq[b];
+
+  sliceNorm(residuals,Src,Orthog);
+  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
+
+  sliceNorm(residuals,Psi,Orthog);
+  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
+
+  // Initial search dir is guess
+  Linop.HermOp(Psi, AP);
+
+  R = Src - AP;  
+  P = R;
+  sliceNorm(v_rr,R,Orthog);
+
+  GridStopWatch sliceInnerTimer;
+  GridStopWatch sliceMaddTimer;
+  GridStopWatch sliceNormTimer;
+  GridStopWatch MatrixTimer;
+  GridStopWatch SolverTimer;
+
+  SolverTimer.Start();
+  int k;
+  for (k = 1; k <= MaxIterations; k++) {
+
+    RealD rrsum=0;
+    for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
+
+    std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
+	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
+
+    MatrixTimer.Start();
+    Linop.HermOp(P, AP);
+    MatrixTimer.Stop();
+
+    // Alpha
+    //    sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog);
+    sliceInnerTimer.Start();
+    sliceInnerProductVector(v_pAp,P,AP,Orthog);
+    sliceInnerTimer.Stop();
+    for(int b=0;b<Nblock;b++){
+      //      std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl;
+      v_alpha[b] = v_rr[b]/real(v_pAp[b]);
+    }
+
+    // Psi, R update
+    sliceMaddTimer.Start();
+    sliceMaddVector(Psi,v_alpha, P,Psi,Orthog);     // add alpha *  P to psi
+    sliceMaddVector(R  ,v_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid
+    sliceMaddTimer.Stop();
+
+    // Beta
+    for(int b=0;b<Nblock;b++){
+      v_rr_inv[b] = 1.0/v_rr[b];
+    }
+    sliceNormTimer.Start();
+    sliceNorm(v_rr,R,Orthog);
+    sliceNormTimer.Stop();
+    for(int b=0;b<Nblock;b++){
+      v_beta[b] = v_rr_inv[b] *v_rr[b];
+    }
+
+    // Search update
+    sliceMaddTimer.Start();
+    sliceMaddVector(P,v_beta,P,R,Orthog);
+    sliceMaddTimer.Stop();
+
+    /*********************
+     * convergence monitor
+     *********************
+     */
+    RealD max_resid=0;
+    for(int b=0;b<Nblock;b++){
+      RealD rr = v_rr[b]/ssq[b];
+      if ( rr > max_resid ) max_resid = rr;
+    }
+    
+    if ( max_resid < Tolerance*Tolerance ) { 
+
+      SolverTimer.Stop();
+
+      std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
+      for(int b=0;b<Nblock;b++){
+	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
+      }
+      std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
+
+      Linop.HermOp(Psi, AP);
+      AP = AP-Src;
+      std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
+
+      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
+      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
+      std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl;
+      std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl;
+      std::cout << GridLogMessage << "\tNorm       " << sliceNormTimer.Elapsed() <<std::endl;
+      std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl;
+
+
+      IterationsToComplete = k;
+      return;
+    }
+
+  }
+  std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
+
+  if (ErrorOnNoConverge) assert(0);
+  IterationsToComplete = k;
+}
+};
+
+
+
+}
+#endif
--- a/lib/algorithms/iterative/ConjugateGradient.h
+++ b/lib/algorithms/iterative/ConjugateGradient.h
@@ -78,18 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> {
    cp = a;
    ssq = norm2(src);

-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient: guess " << guess << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:   src " << ssq << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:    mp " << d << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:   mmp " << b << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:  cp,r " << cp << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:     p " << a << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   src " << ssq << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:    mp " << d << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   mmp " << b << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:  cp,r " << cp << std::endl;
+    std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:     p " << a << std::endl;

    RealD rsq = Tolerance * Tolerance * ssq;

@@ -99,8 +93,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
    }

    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient: k=0 residual " << cp << " target " << rsq
-              << std::endl;
+              << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;

    GridStopWatch LinalgTimer;
    GridStopWatch MatrixTimer;
@@ -148,19 +141,20 @@ class ConjugateGradient : public OperatorFunction<Field> {
        RealD resnorm = sqrt(norm2(p));
        RealD true_residual = resnorm / srcnorm;

-        std::cout << GridLogMessage
-                  << "ConjugateGradient: Converged on iteration " << k << std::endl;
-        std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
-                  << " true residual " << true_residual << " target "
-                  << Tolerance << std::endl;
-        std::cout << GridLogMessage << "Time elapsed: Iterations "
-                  << SolverTimer.Elapsed() << " Matrix  "
-                  << MatrixTimer.Elapsed() << " Linalg "
-                  << LinalgTimer.Elapsed();
-        std::cout << std::endl;
+        std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
+        std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
+	std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
+	std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
+
+        std::cout << GridLogMessage << "Time breakdown "<<std::endl;
+	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
+	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
+	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;

        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
+
 	IterationsToComplete = k;	
+
        return;
      }
    }
--- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
+++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
@@ -30,6 +30,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #define GRID_IRL_H

 #include <string.h> //memset
+
 #ifdef USE_LAPACK
 void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
                   double *vl, double *vu, int *il, int *iu, double *abstol,
@@ -37,8 +38,9 @@ void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
                   double *work, int *lwork, int *iwork, int *liwork,
                   int *info);
 #endif
-#include "DenseMatrix.h"
-#include "EigenSort.h"
+
+#include <Grid/algorithms/densematrix/DenseMatrix.h>
+#include <Grid/algorithms/iterative/EigenSort.h>

 namespace Grid {

@@ -1088,8 +1090,6 @@ static void Lock(DenseMatrix<T> &H, 	// Hess mtx
 		 int dfg,
 		 bool herm)
 {	
-
-
  //ForceTridiagonal(H);

  int M = H.dim;
@@ -1121,7 +1121,6 @@ static void Lock(DenseMatrix<T> &H, 	// Hess mtx

  AH = Hermitian(QQ)*AH;
  AH = AH*QQ;
-	

  for(int i=con;i<M;i++){
    for(int j=con;j<M;j++){
--- a/lib/algorithms/iterative/Matrix.h
+++ b/lib/algorithms/iterative/Matrix.h
@@ -1,453 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/Matrix.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef MATRIX_H
-#define MATRIX_H
-
-#include <cstdlib>
-#include <string>
-#include <cmath>
-#include <vector>
-#include <iostream>
-#include <iomanip>
-#include <complex>
-#include <typeinfo>
-#include <Grid/Grid.h>
-
-
-/** Sign function **/
-template <class T> T sign(T p){return ( p/abs(p) );}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////
-///////////////////// Hijack STL containers for our wicked means /////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////////////
-template<class T> using Vector = Vector<T>;
-template<class T> using Matrix = Vector<Vector<T> >;
-
-template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); }
-
-template<class T> void Resize(Matrix<T > & mat, int N, int M) { 
-  mat.resize(N);
-  for(int i=0;i<N;i++){
-    mat[i].resize(M);
-  }
-}
-template<class T> void Size(Vector<T> & vec, int &N) 
-{ 
-  N= vec.size();
-}
-template<class T> void Size(Matrix<T> & mat, int &N,int &M) 
-{ 
-  N= mat.size();
-  M= mat[0].size();
-}
-template<class T> void SizeSquare(Matrix<T> & mat, int &N) 
-{ 
-  int M; Size(mat,N,M);
-  assert(N==M);
-}
-template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1) 
-{ 
-  int N2,M2;
-  Size(mat1,N1,M1);
-  Size(mat2,N2,M2);
-  assert(N1==N2);
-  assert(M1==M2);
-}
-
-//*****************************************
-//*	(Complex) Vector operations	*
-//*****************************************
-
-/**Conj of a Vector **/
-template <class T> Vector<T> conj(Vector<T> p){
-	Vector<T> q(p.size());
-	for(int i=0;i<p.size();i++){q[i] = conj(p[i]);}
-	return q;
-}
-
-/** Norm of a Vector**/
-template <class T> T norm(Vector<T> p){
-	T sum = 0;
-	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
-	return abs(sqrt(sum));
-}
-
-/** Norm squared of a Vector **/
-template <class T> T norm2(Vector<T> p){
-	T sum = 0;
-	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
-	return abs((sum));
-}
-
-/** Sum elements of a Vector **/
-template <class T> T trace(Vector<T> p){
-	T sum = 0;
-	for(int i=0;i<p.size();i++){sum = sum + p[i];}
-	return sum;
-}
-
-/** Fill a Vector with constant c **/
-template <class T> void Fill(Vector<T> &p, T c){
-	for(int i=0;i<p.size();i++){p[i] = c;}
-}
-/** Normalize a Vector **/
-template <class T> void normalize(Vector<T> &p){
-	T m = norm(p);
-	if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;}
-}
-/** Vector by scalar **/
-template <class T, class U> Vector<T> times(Vector<T> p, U s){
-	for(int i=0;i<p.size();i++){p[i] *= s;}
-	return p;
-}
-template <class T, class U> Vector<T> times(U s, Vector<T> p){
-	for(int i=0;i<p.size();i++){p[i] *= s;}
-	return p;
-}
-/** inner product of a and b = conj(a) . b **/
-template <class T> T inner(Vector<T> a, Vector<T> b){
-	T m = 0.;
-	for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];}
-	return m;
-}
-/** sum of a and b = a + b **/
-template <class T> Vector<T> add(Vector<T> a, Vector<T> b){
-	Vector<T> m(a.size());
-	for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];}
-	return m;
-}
-/** sum of a and b = a - b **/
-template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){
-	Vector<T> m(a.size());
-	for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];}
-	return m;
-}
-
-/** 
- *********************************
- *	Matrices	         *
- *********************************
- **/
-
-template<class T> void Fill(Matrix<T> & mat, T&val) { 
-  int N,M;
-  Size(mat,N,M);
-  for(int i=0;i<N;i++){
-  for(int j=0;j<M;j++){
-    mat[i][j] = val;
-  }}
-}
-
-/** Transpose of a matrix **/
-Matrix<T> Transpose(Matrix<T> & mat){
-  int N,M;
-  Size(mat,N,M);
-  Matrix C; Resize(C,M,N);
-  for(int i=0;i<M;i++){
-  for(int j=0;j<N;j++){
-    C[i][j] = mat[j][i];
-  }} 
-  return C;
-}
-/** Set Matrix to unit matrix **/
-template<class T> void Unity(Matrix<T> &mat){
-  int N;  SizeSquare(mat,N);
-  for(int i=0;i<N;i++){
-    for(int j=0;j<N;j++){
-      if ( i==j ) A[i][j] = 1;
-      else        A[i][j] = 0;
-    } 
-  } 
-}
-/** Add C * I to matrix **/
-template<class T>
-void PlusUnit(Matrix<T> & A,T c){
-  int dim;  SizeSquare(A,dim);
-  for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;} 
-}
-
-/** return the Hermitian conjugate of matrix **/
-Matrix<T> HermitianConj(Matrix<T> &mat){
-
-  int dim; SizeSquare(mat,dim);
-
-  Matrix<T> C; Resize(C,dim,dim);
-
-  for(int i=0;i<dim;i++){
-    for(int j=0;j<dim;j++){
-      C[i][j] = conj(mat[j][i]);
-    } 
-  } 
-  return C;
-}
-
-/** return diagonal entries as a Vector **/
-Vector<T> diag(Matrix<T> &A)
-{
-  int dim; SizeSquare(A,dim);
-  Vector<T> d; Resize(d,dim);
-
-  for(int i=0;i<dim;i++){
-    d[i] = A[i][i];
-  }
-  return d;
-}
-
-/** Left multiply by a Vector **/
-Vector<T> operator *(Vector<T> &B,Matrix<T> &A)
-{
-  int K,M,N; 
-  Size(B,K);
-  Size(A,M,N);
-  assert(K==M);
-  
-  Vector<T> C; Resize(C,N);
-
-  for(int j=0;j<N;j++){
-    T sum = 0.0;
-    for(int i=0;i<M;i++){
-      sum += B[i] * A[i][j];
-    }
-    C[j] =  sum;
-  }
-  return C; 
-}
-
-/** return 1/diagonal entries as a Vector **/
-Vector<T> inv_diag(Matrix<T> & A){
-  int dim; SizeSquare(A,dim);
-  Vector<T> d; Resize(d,dim);
-  for(int i=0;i<dim;i++){
-    d[i] = 1.0/A[i][i];
-  }
-  return d;
-}
-/** Matrix Addition **/
-inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B)
-{
-  int N,M  ; SizeSame(A,B,N,M);
-  Matrix C; Resize(C,N,M);
-  for(int i=0;i<N;i++){
-    for(int j=0;j<M;j++){
-      C[i][j] = A[i][j] +  B[i][j];
-    } 
-  } 
-  return C;
-} 
-/** Matrix Subtraction **/
-inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){
-  int N,M  ; SizeSame(A,B,N,M);
-  Matrix C; Resize(C,N,M);
-  for(int i=0;i<N;i++){
-  for(int j=0;j<M;j++){
-    C[i][j] = A[i][j] -  B[i][j];
-  }}
-  return C;
-} 
-
-/** Matrix scalar multiplication **/
-inline Matrix<T> operator* (Matrix<T> & A,T c){
-  int N,M; Size(A,N,M);
-  Matrix C; Resize(C,N,M);
-  for(int i=0;i<N;i++){
-  for(int j=0;j<M;j++){
-    C[i][j] = A[i][j]*c;
-  }} 
-  return C;
-} 
-/** Matrix Matrix multiplication **/
-inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){
-  int K,L,N,M;
-  Size(A,K,L);
-  Size(B,N,M); assert(L==N);
-  Matrix C; Resize(C,K,M);
-
-  for(int i=0;i<K;i++){
-    for(int j=0;j<M;j++){
-      T sum = 0.0;
-      for(int k=0;k<N;k++) sum += A[i][k]*B[k][j];
-      C[i][j] =sum;
-    }
-  }
-  return C; 
-} 
-/** Matrix Vector multiplication **/
-inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){
-  int M,N,K;
-  Size(A,N,M);
-  Size(B,K); assert(K==M);
-  Vector<T> C; Resize(C,N);
-  for(int i=0;i<N;i++){
-    T sum = 0.0;
-    for(int j=0;j<M;j++) sum += A[i][j]*B[j];
-    C[i] =  sum;
-  }
-  return C; 
-} 
-
-/** Some version of Matrix norm **/
-/*
-inline T Norm(){ // this is not a usual L2 norm
-    T norm = 0;
-    for(int i=0;i<dim;i++){
-      for(int j=0;j<dim;j++){
-	norm += abs(A[i][j]);
-    }}
-    return norm;
-  }
-*/
-
-/** Some version of Matrix norm **/
-template<class T> T LargestDiag(Matrix<T> &A)
-{
-  int dim ; SizeSquare(A,dim); 
-
-  T ld = abs(A[0][0]);
-  for(int i=1;i<dim;i++){
-    T cf = abs(A[i][i]);
-    if(abs(cf) > abs(ld) ){ld = cf;}
-  }
-  return ld;
-}
-
-/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
-template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small)
-{
-  int dim; SizeSquare(A,dim);
-  for(int l = dim - 1 - offset; l >= 1; l--) {             		
-    if((U)abs(A[l][l - 1]) < (U)small) {
-      A[l][l-1]=(U)0.0;
-      return l;
-    }
-  }
-  return 0;
-}
-
-/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
-template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small) 
-{
-  int dim; SizeSquare(A,dim);
-  for(int l = dim - 1 - offset; l >= 1; l--) {
-    if((U)abs(A[l][l - 1]) < (U)small) {
-      A[l][l - 1] = (U)0.0;
-      A[l - 1][l] = (U)0.0;
-      return l;
-    }
-  }
-  return 0;
-}
-/**Assign a submatrix to a larger one**/
-template<class T>
-void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
-{
-  for(int i = row_st; i<row_end; i++){
-    for(int j = col_st; j<col_end; j++){
-      A[i][j] = S[i - row_st][j - col_st];
-    }
-  }
-}
-
-/**Get a square submatrix**/
-template <class T>
-Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end)
-{
-  Matrix<T> H; Resize(row_end - row_st,col_end-col_st);
-
-  for(int i = row_st; i<row_end; i++){
-  for(int j = col_st; j<col_end; j++){
-    H[i-row_st][j-col_st]=A[i][j];
-  }}
-  return H;
-}
-  
- /**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/
-template<class T>
-void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
-{
-  for(int i = row_st; i<row_end; i++){
-  for(int j = col_st; j<col_end; j++){
-    A[i][j] = S[i - row_st][j - col_st];
-  }}
-}
-  
-/** compute b_i A_ij b_j **/ // surprised no Conj
-template<class T> T proj(Matrix<T> A, Vector<T> B){
-  int dim; SizeSquare(A,dim);
-  int dimB; Size(B,dimB);
-  assert(dimB==dim);
-  T C = 0;
-  for(int i=0;i<dim;i++){
-    T sum = 0.0;
-    for(int j=0;j<dim;j++){
-      sum += A[i][j]*B[j];
-    }
-    C +=  B[i]*sum; // No conj?
-  }
-  return C; 
-}
-
-
-/*
- *************************************************************
- *
- * Matrix Vector products
- *
- *************************************************************
- */
-// Instead make a linop and call my CG;
-
-/// q -> q Q
-template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q)
-{
-  int M; SizeSquare(Q,M);
-  int N; Size(q,N); 
-  assert(M==N);
-
-  times(q,Q,N);
-}
-
-/// q -> q Q
-template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N)
-{
-  GridBase *grid = q[0]._grid;
-  int M; SizeSquare(Q,M);
-  int K; Size(q,K); 
-  assert(N<M);
-  assert(N<K);
-  Vector<Fermion> S(N,grid );
-  for(int j=0;j<N;j++){
-    S[j] = zero;
-    for(int k=0;k<N;k++){
-      S[j] = S[j] +  q[k]* Q[k][j]; 
-    }
-  }
-  for(int j=0;j<q.size();j++){
-    q[j] = S[j];
-  }
-}
-#endif
--- a/lib/algorithms/iterative/MatrixUtils.h
+++ b/lib/algorithms/iterative/MatrixUtils.h
@@ -1,75 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/algorithms/iterative/MatrixUtils.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#ifndef GRID_MATRIX_UTILS_H
-#define GRID_MATRIX_UTILS_H
-
-namespace Grid {
-
-  namespace MatrixUtils { 
-
-    template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
-      N=A.size(); assert(N>0);
-      M=A[0].size();
-      for(int i=0;i<N;i++){
-	assert(A[i].size()==M);
-      }
-    }
-
-    template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
-    {
-      int M;
-      Size(A,N,M);
-      assert(N==M);
-    }
-
-    template<class T> inline void Fill(Matrix<T>& A,T & val)
-    { 
-      int N,M;
-      Size(A,N,M);
-      for(int i=0;i<N;i++){
-      for(int j=0;j<M;j++){
-	A[i][j]=val;
-      }}
-    }
-    template<class T> inline void Diagonal(Matrix<T>& A,T & val)
-    { 
-      int N;
-      SizeSquare(A,N);
-      for(int i=0;i<N;i++){
-	A[i][i]=val;
-      }
-    }
-    template<class T> inline void Identity(Matrix<T>& A)
-    {
-      Fill(A,0.0);
-      Diagonal(A,1.0);
-    }
-
-  };
-}
-#endif
--- a/lib/algorithms/iterative/TODO
+++ b/lib/algorithms/iterative/TODO
@@ -1,15 +0,0 @@
- ConjugateGradientMultiShift
- MCR
-
- Potentially Useful Boost libraries
-
- MultiArray
- Aligned allocator; memory pool
- Remez -- Mike or Boost?
- Multiprecision
- quaternians
- Tokenize
- Serialization
- Regex
- Proto (ET)
- uBlas
--- a/lib/algorithms/iterative/bisec.c
+++ b/lib/algorithms/iterative/bisec.c
@@ -1,122 +0,0 @@
-#include <math.h>
-#include <stdlib.h>
-#include <vector>
-
-struct Bisection {
-
-static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
-{
-  int i,j;
-  std::vector<RealD> evec1(row_num+3);
-  std::vector<RealD> evec2(row_num+3);
-  RealD eps2;
-  ALPHA[1]=0.;
-  BETHA[1]=0.;
-  for(i=0;i<row_num-1;i++) {
-    ALPHA[i+1] = A[i*(row_num+1)].real();
-    BETHA[i+2] = A[i*(row_num+1)+1].real();
-  }
-  ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
-  bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
-  bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
-
-  // Do we really need to sort here?
-  int begin=1;
-  int end = row_num;
-  int swapped=1;
-  while(swapped) {
-    swapped=0;
-    for(i=begin;i<end;i++){
-      if(mag(evec2[i])>mag(evec2[i+1]))	{
-	swap(evec2+i,evec2+i+1);
-	swapped=1;
-      }
-    }
-    end--;
-    for(i=end-1;i>=begin;i--){
-      if(mag(evec2[i])>mag(evec2[i+1]))	{
-	swap(evec2+i,evec2+i+1);
-	swapped=1;
-      }
-    }
-    begin++;
-  }
-
-  for(i=0;i<row_num;i++){
-    for(j=0;j<row_num;j++) {
-      if(i==j) H[i*row_num+j]=evec2[i+1];
-      else H[i*row_num+j]=0.;
-    }
-  }
-}
-
-static void bisec(std::vector<RealD> &c,   
-		  std::vector<RealD> &b,
-		  int n,
-		  int m1,
-		  int m2,
-		  RealD eps1,
-		  RealD relfeh,
-		  std::vector<RealD> &x,
-		  RealD &eps2)
-{
-  std::vector<RealD> wu(n+2);
-
-  RealD h,q,x1,xu,x0,xmin,xmax; 
-  int i,a,k;
-
-  b[1]=0.0;
-  xmin=c[n]-fabs(b[n]);
-  xmax=c[n]+fabs(b[n]);
-  for(i=1;i<n;i++){
-    h=fabs(b[i])+fabs(b[i+1]);
-    if(c[i]+h>xmax) xmax= c[i]+h;
-    if(c[i]-h<xmin) xmin= c[i]-h;
-  }
-  xmax *=2.;
-
-  eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
-  if(eps1<=0.0) eps1=eps2;
-  eps2=0.5*eps1+7.0*(eps2);
-  x0=xmax;
-  for(i=m1;i<=m2;i++){
-    x[i]=xmax;
-    wu[i]=xmin;
-  }
-
-  for(k=m2;k>=m1;k--){
-    xu=xmin;
-    i=k;
-    do{
-      if(xu<wu[i]){
-	xu=wu[i];
-	i=m1-1;
-      }
-      i--;
-    }while(i>=m1);
-    if(x0>x[k]) x0=x[k];
-    while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
-      x1=(xu+x0)/2;
-
-      a=0;
-      q=1.0;
-      for(i=1;i<=n;i++){
-	q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
-	if(q<0) a++;
-      }
-      //			printf("x1=%e a=%d\n",x1,a);
-      if(a<k){
-	if(a<m1){
-	  xu=x1;
-	  wu[m1]=x1;
-	}else {
-	  xu=x1;
-	  wu[a+1]=x1;
-	  if(x[a]>x1) x[a]=x1;
-	}
-      }else x0=x1;
-    }
-    x[k]=(x0+xu)/2;
-  }
-}
-}
--- a/lib/algorithms/iterative/get_eig.c
+++ b/lib/algorithms/iterative/get_eig.c
@@ -1 +0,0 @@
-