Cleaning up the dense matrix and lanczos sector

2026-07-22 03:23:28 +01:00 · 2017-04-15 08:54:11 +01:00
parent a9c22d5f43
commit a8db024c92
13 changed files with 61 additions and 695 deletions
@@ -1,6 +1,27 @@
 TODO:
 ---------------
 Peter's work list:
 -- Merge high precision reduction into develop
 -- Physical propagator interface
 -- Precision conversion and sort out localConvert
 -- slice* linalg routines for multiRHS, BlockCG
 -- Profile CG, BlockCG, etc... Flop count/rate
 -- Binary I/O speed up & x-strips
 -- Half-precision comms
 -- multiRHS DWF; benchmark on Cori/BNL for comms elimination
 -- GaugeFix into central location
 -- Help Julia with NPR code
 -- Switch to measurements
 -- Multigrid Wilson and DWF, compare to other Multigrid implementations
 -- Remove DenseVector, DenseMatrix; Use Eigen instead.
 -- quaternions                 -- Might not need
 -- Conserved currents
 -----
 * Forces; the UdSdU  term in gauge force term is half of what I think it should
  be. This is a consequence of taking ONLY the first term in:
@@ -21,16 +42,8 @@ TODO:
  This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
  This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
 Policies:
 * Link smearing/boundary conds; Policy class based implementation ; framework more in place
 * Support different boundary conditions (finite temp, chem. potential ... )
 * Support different fermion representations? 
  - contained entirely within the integrator presently
 - Sign of force term.
 - Reversibility test.
@@ -41,11 +54,6 @@ Policies:
 - Audit oIndex usage for cb behaviour
 - Rectangle gauge actions.
  Iwasaki,
  Symanzik,
  ... etc...
 - Prepare multigrid for HMC. - Alternate setup schemes.
 - Support for ILDG --- ugly, not done
@@ -55,9 +63,11 @@ Policies:
 - FFTnD ?
 - Gparity; hand opt use template specialisation elegance to enable the optimised paths ?
 - Gparity force term; Gparity (R)HMC.
- Random number state save restore
+
 - Mobius implementation clean up to rmove #if 0 stale code sequences
 - CG -- profile carefully, kernel fusion, whole CG performance measurements.
 ================================================================
@@ -90,6 +100,7 @@ Insert/Extract
 Not sure of status of this -- reverify. Things are working nicely now though.
 * Make the Tensor types and Complex etc... play more nicely.
  - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
    QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
    want to introduce a syntax that does not require this.
@@ -112,6 +123,8 @@ Not sure of status of this -- reverify. Things are working nicely now though.
 RECENT
 ---------------
  - Support different fermion representations? -- DONE
  - contained entirely within the integrator presently
  - Clean up HMC                                                             -- DONE
  - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE
  - Simplified the integrators a bit.                                        -- DONE
@@ -123,6 +136,26 @@ RECENT
  - Parallel io improvements                                  -- DONE
  - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
 DONE:
 - MultiArray -- MultiRHS done
 - ConjugateGradientMultiShift -- DONE
 - MCR                         -- DONE
 - Remez -- Mike or Boost?     -- DONE
 - Proto (ET)                  -- DONE
 - uBlas                       -- DONE ; Eigen
 - Potentially Useful Boost libraries -- DONE ; Eigen
 - Aligned allocator; memory pool -- DONE
 - Multiprecision              -- DONE
 - Serialization               -- DONE
 - Regex -- Not needed
 - Tokenize -- Why?
 - Random number state save restore -- DONE
 - Rectangle gauge actions. -- DONE
  Iwasaki,
  Symanzik,
  ... etc...
 Done: Cayley, Partial , ContFrac force terms.
 DONE
@@ -207,6 +240,7 @@ Done
 FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
 ======================================================================================================
 * Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE
 * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE
  user pass these? Is this a QCD specific?
@@ -46,7 +46,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
 // Lanczos support
-#include <Grid/algorithms/iterative/MatrixUtils.h>
+//#include <Grid/algorithms/iterative/MatrixUtils.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 #include <Grid/algorithms/CoarsenedMatrix.h>
 #include <Grid/algorithms/FFT.h>
@@ -30,6 +30,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #define GRID_IRL_H
 #include <string.h> //memset
 #ifdef USE_LAPACK
 void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
                   double *vl, double *vu, int *il, int *iu, double *abstol,
@@ -37,8 +38,9 @@ void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
                   double *work, int *lwork, int *iwork, int *liwork,
                   int *info);
 #endif
-#include "DenseMatrix.h"
+
-#include "EigenSort.h"
+#include <Grid/algorithms/densematrix/DenseMatrix.h>
 #include <Grid/algorithms/iterative/EigenSort.h>
 namespace Grid {
@@ -1088,8 +1090,6 @@ static void Lock(DenseMatrix<T> &H, 	// Hess mtx
 		 int dfg,
 		 bool herm)
 {	
  //ForceTridiagonal(H);
  int M = H.dim;
@@ -1122,7 +1122,6 @@ static void Lock(DenseMatrix<T> &H, 	// Hess mtx
  AH = Hermitian(QQ)*AH;
  AH = AH*QQ;
  for(int i=con;i<M;i++){
    for(int j=con;j<M;j++){
      Q[i][j]=QQ[i-con][j-con];
@@ -1,453 +0,0 @@
    /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./lib/algorithms/iterative/Matrix.h
    Copyright (C) 2015
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 #ifndef MATRIX_H
 #define MATRIX_H
 #include <cstdlib>
 #include <string>
 #include <cmath>
 #include <vector>
 #include <iostream>
 #include <iomanip>
 #include <complex>
 #include <typeinfo>
 #include <Grid/Grid.h>
 /** Sign function **/
 template <class T> T sign(T p){return ( p/abs(p) );}
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////// Hijack STL containers for our wicked means /////////////////////////////////////////
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
 template<class T> using Vector = Vector<T>;
 template<class T> using Matrix = Vector<Vector<T> >;
 template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); }
 template<class T> void Resize(Matrix<T > & mat, int N, int M) { 
  mat.resize(N);
  for(int i=0;i<N;i++){
    mat[i].resize(M);
  }
 }
 template<class T> void Size(Vector<T> & vec, int &N) 
 { 
  N= vec.size();
 }
 template<class T> void Size(Matrix<T> & mat, int &N,int &M) 
 { 
  N= mat.size();
  M= mat[0].size();
 }
 template<class T> void SizeSquare(Matrix<T> & mat, int &N) 
 { 
  int M; Size(mat,N,M);
  assert(N==M);
 }
 template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1) 
 { 
  int N2,M2;
  Size(mat1,N1,M1);
  Size(mat2,N2,M2);
  assert(N1==N2);
  assert(M1==M2);
 }
 //*****************************************
 //*	(Complex) Vector operations	*
 //*****************************************
 /**Conj of a Vector **/
 template <class T> Vector<T> conj(Vector<T> p){
 	Vector<T> q(p.size());
 	for(int i=0;i<p.size();i++){q[i] = conj(p[i]);}
 	return q;
 }
 /** Norm of a Vector**/
 template <class T> T norm(Vector<T> p){
 	T sum = 0;
 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
 	return abs(sqrt(sum));
 }
 /** Norm squared of a Vector **/
 template <class T> T norm2(Vector<T> p){
 	T sum = 0;
 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
 	return abs((sum));
 }
 /** Sum elements of a Vector **/
 template <class T> T trace(Vector<T> p){
 	T sum = 0;
 	for(int i=0;i<p.size();i++){sum = sum + p[i];}
 	return sum;
 }
 /** Fill a Vector with constant c **/
 template <class T> void Fill(Vector<T> &p, T c){
 	for(int i=0;i<p.size();i++){p[i] = c;}
 }
 /** Normalize a Vector **/
 template <class T> void normalize(Vector<T> &p){
 	T m = norm(p);
 	if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;}
 }
 /** Vector by scalar **/
 template <class T, class U> Vector<T> times(Vector<T> p, U s){
 	for(int i=0;i<p.size();i++){p[i] *= s;}
 	return p;
 }
 template <class T, class U> Vector<T> times(U s, Vector<T> p){
 	for(int i=0;i<p.size();i++){p[i] *= s;}
 	return p;
 }
 /** inner product of a and b = conj(a) . b **/
 template <class T> T inner(Vector<T> a, Vector<T> b){
 	T m = 0.;
 	for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];}
 	return m;
 }
 /** sum of a and b = a + b **/
 template <class T> Vector<T> add(Vector<T> a, Vector<T> b){
 	Vector<T> m(a.size());
 	for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];}
 	return m;
 }
 /** sum of a and b = a - b **/
 template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){
 	Vector<T> m(a.size());
 	for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];}
 	return m;
 }
 /** 
 *********************************
 *	Matrices	         *
 *********************************
 **/
 template<class T> void Fill(Matrix<T> & mat, T&val) { 
  int N,M;
  Size(mat,N,M);
  for(int i=0;i<N;i++){
  for(int j=0;j<M;j++){
    mat[i][j] = val;
  }}
 }
 /** Transpose of a matrix **/
 Matrix<T> Transpose(Matrix<T> & mat){
  int N,M;
  Size(mat,N,M);
  Matrix C; Resize(C,M,N);
  for(int i=0;i<M;i++){
  for(int j=0;j<N;j++){
    C[i][j] = mat[j][i];
  }} 
  return C;
 }
 /** Set Matrix to unit matrix **/
 template<class T> void Unity(Matrix<T> &mat){
  int N;  SizeSquare(mat,N);
  for(int i=0;i<N;i++){
    for(int j=0;j<N;j++){
      if ( i==j ) A[i][j] = 1;
      else        A[i][j] = 0;
    } 
  } 
 }
 /** Add C * I to matrix **/
 template<class T>
 void PlusUnit(Matrix<T> & A,T c){
  int dim;  SizeSquare(A,dim);
  for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;} 
 }
 /** return the Hermitian conjugate of matrix **/
 Matrix<T> HermitianConj(Matrix<T> &mat){
  int dim; SizeSquare(mat,dim);
  Matrix<T> C; Resize(C,dim,dim);
  for(int i=0;i<dim;i++){
    for(int j=0;j<dim;j++){
      C[i][j] = conj(mat[j][i]);
    } 
  } 
  return C;
 }
 /** return diagonal entries as a Vector **/
 Vector<T> diag(Matrix<T> &A)
 {
  int dim; SizeSquare(A,dim);
  Vector<T> d; Resize(d,dim);
  for(int i=0;i<dim;i++){
    d[i] = A[i][i];
  }
  return d;
 }
 /** Left multiply by a Vector **/
 Vector<T> operator *(Vector<T> &B,Matrix<T> &A)
 {
  int K,M,N; 
  Size(B,K);
  Size(A,M,N);
  assert(K==M);
  Vector<T> C; Resize(C,N);
  for(int j=0;j<N;j++){
    T sum = 0.0;
    for(int i=0;i<M;i++){
      sum += B[i] * A[i][j];
    }
    C[j] =  sum;
  }
  return C; 
 }
 /** return 1/diagonal entries as a Vector **/
 Vector<T> inv_diag(Matrix<T> & A){
  int dim; SizeSquare(A,dim);
  Vector<T> d; Resize(d,dim);
  for(int i=0;i<dim;i++){
    d[i] = 1.0/A[i][i];
  }
  return d;
 }
 /** Matrix Addition **/
 inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B)
 {
  int N,M  ; SizeSame(A,B,N,M);
  Matrix C; Resize(C,N,M);
  for(int i=0;i<N;i++){
    for(int j=0;j<M;j++){
      C[i][j] = A[i][j] +  B[i][j];
    } 
  } 
  return C;
 } 
 /** Matrix Subtraction **/
 inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){
  int N,M  ; SizeSame(A,B,N,M);
  Matrix C; Resize(C,N,M);
  for(int i=0;i<N;i++){
  for(int j=0;j<M;j++){
    C[i][j] = A[i][j] -  B[i][j];
  }}
  return C;
 } 
 /** Matrix scalar multiplication **/
 inline Matrix<T> operator* (Matrix<T> & A,T c){
  int N,M; Size(A,N,M);
  Matrix C; Resize(C,N,M);
  for(int i=0;i<N;i++){
  for(int j=0;j<M;j++){
    C[i][j] = A[i][j]*c;
  }} 
  return C;
 } 
 /** Matrix Matrix multiplication **/
 inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){
  int K,L,N,M;
  Size(A,K,L);
  Size(B,N,M); assert(L==N);
  Matrix C; Resize(C,K,M);
  for(int i=0;i<K;i++){
    for(int j=0;j<M;j++){
      T sum = 0.0;
      for(int k=0;k<N;k++) sum += A[i][k]*B[k][j];
      C[i][j] =sum;
    }
  }
  return C; 
 } 
 /** Matrix Vector multiplication **/
 inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){
  int M,N,K;
  Size(A,N,M);
  Size(B,K); assert(K==M);
  Vector<T> C; Resize(C,N);
  for(int i=0;i<N;i++){
    T sum = 0.0;
    for(int j=0;j<M;j++) sum += A[i][j]*B[j];
    C[i] =  sum;
  }
  return C; 
 } 
 /** Some version of Matrix norm **/
 /*
 inline T Norm(){ // this is not a usual L2 norm
    T norm = 0;
    for(int i=0;i<dim;i++){
      for(int j=0;j<dim;j++){
 	norm += abs(A[i][j]);
    }}
    return norm;
  }
 */
 /** Some version of Matrix norm **/
 template<class T> T LargestDiag(Matrix<T> &A)
 {
  int dim ; SizeSquare(A,dim); 
  T ld = abs(A[0][0]);
  for(int i=1;i<dim;i++){
    T cf = abs(A[i][i]);
    if(abs(cf) > abs(ld) ){ld = cf;}
  }
  return ld;
 }
 /** Look for entries on the leading subdiagonal that are smaller than 'small' **/
 template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small)
 {
  int dim; SizeSquare(A,dim);
  for(int l = dim - 1 - offset; l >= 1; l--) {             		
    if((U)abs(A[l][l - 1]) < (U)small) {
      A[l][l-1]=(U)0.0;
      return l;
    }
  }
  return 0;
 }
 /** Look for entries on the leading subdiagonal that are smaller than 'small' **/
 template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small) 
 {
  int dim; SizeSquare(A,dim);
  for(int l = dim - 1 - offset; l >= 1; l--) {
    if((U)abs(A[l][l - 1]) < (U)small) {
      A[l][l - 1] = (U)0.0;
      A[l - 1][l] = (U)0.0;
      return l;
    }
  }
  return 0;
 }
 /**Assign a submatrix to a larger one**/
 template<class T>
 void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
 {
  for(int i = row_st; i<row_end; i++){
    for(int j = col_st; j<col_end; j++){
      A[i][j] = S[i - row_st][j - col_st];
    }
  }
 }
 /**Get a square submatrix**/
 template <class T>
 Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end)
 {
  Matrix<T> H; Resize(row_end - row_st,col_end-col_st);
  for(int i = row_st; i<row_end; i++){
  for(int j = col_st; j<col_end; j++){
    H[i-row_st][j-col_st]=A[i][j];
  }}
  return H;
 }
 /**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/
 template<class T>
 void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
 {
  for(int i = row_st; i<row_end; i++){
  for(int j = col_st; j<col_end; j++){
    A[i][j] = S[i - row_st][j - col_st];
  }}
 }
 /** compute b_i A_ij b_j **/ // surprised no Conj
 template<class T> T proj(Matrix<T> A, Vector<T> B){
  int dim; SizeSquare(A,dim);
  int dimB; Size(B,dimB);
  assert(dimB==dim);
  T C = 0;
  for(int i=0;i<dim;i++){
    T sum = 0.0;
    for(int j=0;j<dim;j++){
      sum += A[i][j]*B[j];
    }
    C +=  B[i]*sum; // No conj?
  }
  return C; 
 }
 /*
 *************************************************************
 *
 * Matrix Vector products
 *
 *************************************************************
 */
 // Instead make a linop and call my CG;
 /// q -> q Q
 template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q)
 {
  int M; SizeSquare(Q,M);
  int N; Size(q,N); 
  assert(M==N);
  times(q,Q,N);
 }
 /// q -> q Q
 template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N)
 {
  GridBase *grid = q[0]._grid;
  int M; SizeSquare(Q,M);
  int K; Size(q,K); 
  assert(N<M);
  assert(N<K);
  Vector<Fermion> S(N,grid );
  for(int j=0;j<N;j++){
    S[j] = zero;
    for(int k=0;k<N;k++){
      S[j] = S[j] +  q[k]* Q[k][j]; 
    }
  }
  for(int j=0;j<q.size();j++){
    q[j] = S[j];
  }
 }
 #endif
@@ -1,75 +0,0 @@
    /*************************************************************************************
    Grid physics library, www.github.com/paboyle/Grid 
    Source file: ./lib/algorithms/iterative/MatrixUtils.h
    Copyright (C) 2015
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
 #ifndef GRID_MATRIX_UTILS_H
 #define GRID_MATRIX_UTILS_H
 namespace Grid {
  namespace MatrixUtils { 
    template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
      N=A.size(); assert(N>0);
      M=A[0].size();
      for(int i=0;i<N;i++){
 	assert(A[i].size()==M);
      }
    }
    template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
    {
      int M;
      Size(A,N,M);
      assert(N==M);
    }
    template<class T> inline void Fill(Matrix<T>& A,T & val)
    { 
      int N,M;
      Size(A,N,M);
      for(int i=0;i<N;i++){
      for(int j=0;j<M;j++){
 	A[i][j]=val;
      }}
    }
    template<class T> inline void Diagonal(Matrix<T>& A,T & val)
    { 
      int N;
      SizeSquare(A,N);
      for(int i=0;i<N;i++){
 	A[i][i]=val;
      }
    }
    template<class T> inline void Identity(Matrix<T>& A)
    {
      Fill(A,0.0);
      Diagonal(A,1.0);
    }
  };
 }
 #endif
@@ -1,15 +0,0 @@
 - ConjugateGradientMultiShift
 - MCR
 - Potentially Useful Boost libraries
 - MultiArray
 - Aligned allocator; memory pool
 - Remez -- Mike or Boost?
 - Multiprecision
 - quaternians
 - Tokenize
 - Serialization
 - Regex
 - Proto (ET)
 - uBlas
@@ -1,122 +0,0 @@
 #include <math.h>
 #include <stdlib.h>
 #include <vector>
 struct Bisection {
 static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
 {
  int i,j;
  std::vector<RealD> evec1(row_num+3);
  std::vector<RealD> evec2(row_num+3);
  RealD eps2;
  ALPHA[1]=0.;
  BETHA[1]=0.;
  for(i=0;i<row_num-1;i++) {
    ALPHA[i+1] = A[i*(row_num+1)].real();
    BETHA[i+2] = A[i*(row_num+1)+1].real();
  }
  ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
  bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
  bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
  // Do we really need to sort here?
  int begin=1;
  int end = row_num;
  int swapped=1;
  while(swapped) {
    swapped=0;
    for(i=begin;i<end;i++){
      if(mag(evec2[i])>mag(evec2[i+1]))	{
 	swap(evec2+i,evec2+i+1);
 	swapped=1;
      }
    }
    end--;
    for(i=end-1;i>=begin;i--){
      if(mag(evec2[i])>mag(evec2[i+1]))	{
 	swap(evec2+i,evec2+i+1);
 	swapped=1;
      }
    }
    begin++;
  }
  for(i=0;i<row_num;i++){
    for(j=0;j<row_num;j++) {
      if(i==j) H[i*row_num+j]=evec2[i+1];
      else H[i*row_num+j]=0.;
    }
  }
 }
 static void bisec(std::vector<RealD> &c,   
 		  std::vector<RealD> &b,
 		  int n,
 		  int m1,
 		  int m2,
 		  RealD eps1,
 		  RealD relfeh,
 		  std::vector<RealD> &x,
 		  RealD &eps2)
 {
  std::vector<RealD> wu(n+2);
  RealD h,q,x1,xu,x0,xmin,xmax; 
  int i,a,k;
  b[1]=0.0;
  xmin=c[n]-fabs(b[n]);
  xmax=c[n]+fabs(b[n]);
  for(i=1;i<n;i++){
    h=fabs(b[i])+fabs(b[i+1]);
    if(c[i]+h>xmax) xmax= c[i]+h;
    if(c[i]-h<xmin) xmin= c[i]-h;
  }
  xmax *=2.;
  eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
  if(eps1<=0.0) eps1=eps2;
  eps2=0.5*eps1+7.0*(eps2);
  x0=xmax;
  for(i=m1;i<=m2;i++){
    x[i]=xmax;
    wu[i]=xmin;
  }
  for(k=m2;k>=m1;k--){
    xu=xmin;
    i=k;
    do{
      if(xu<wu[i]){
 	xu=wu[i];
 	i=m1-1;
      }
      i--;
    }while(i>=m1);
    if(x0>x[k]) x0=x[k];
    while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
      x1=(xu+x0)/2;
      a=0;
      q=1.0;
      for(i=1;i<=n;i++){
 	q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
 	if(q<0) a++;
      }
      //			printf("x1=%e a=%d\n",x1,a);
      if(a<k){
 	if(a<m1){
 	  xu=x1;
 	  wu[m1]=x1;
 	}else {
 	  xu=x1;
 	  wu[a+1]=x1;
 	  if(x[a]>x1) x[a]=x1;
 	}
      }else x0=x1;
    }
    x[k]=(x0+xu)/2;
  }
 }
 }
@@ -1 +0,0 @@
@@ -311,8 +311,8 @@ void Grid_init(int *argc,char ***argv)
    std::cout<<GridLogMessage<<std::endl;
    std::cout<<GridLogMessage<<"Performance:"<<std::endl;
    std::cout<<GridLogMessage<<std::endl;
-    std::cout<<GridLogMessage<<"  --comms-isend   : Asynchronous MPI calls; several dirs at a time "<<std::endl;    
+    std::cout<<GridLogMessage<<"  --comms-concurrent : Asynchronous MPI calls; several dirs at a time "<<std::endl;    
-    std::cout<<GridLogMessage<<"  --comms-sendrecv: Synchronous MPI calls; one dirs at a time "<<std::endl;    
+    std::cout<<GridLogMessage<<"  --comms-sequential : Synchronous MPI calls; one dirs at a time "<<std::endl;    
    std::cout<<GridLogMessage<<"  --comms-overlap : Overlap comms with compute "<<std::endl;    
    std::cout<<GridLogMessage<<std::endl;
    std::cout<<GridLogMessage<<"  --dslash-generic: Wilson kernel for generic Nc"<<std::endl;    
@@ -115,8 +115,8 @@ int main (int argc, char ** argv)
  RNG.SeedFixedIntegers(seeds);
-  RealD alpha = 1.0;
+  RealD alpha = 1.2;
-  RealD beta  = 0.03;
+  RealD beta  = 0.1;
  RealD mu    = 0.0;
  int order = 11;
  ChebyshevLanczos<LatticeComplex> Cheby(alpha,beta,mu,order);
@@ -131,10 +131,9 @@ int main (int argc, char ** argv)
  const int Nit= 10000;
  int Nconv;
-  RealD eresid = 1.0e-8;
+  RealD eresid = 1.0e-6;
  ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nm,eresid,Nit);
  ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit);
  LatticeComplex src(grid); gaussian(RNG,src);
@@ -145,9 +144,9 @@ int main (int argc, char ** argv)
  }
  {
-    //    std::vector<RealD>          eval(Nm);
+    std::vector<RealD>          eval(Nm);
-    //    std::vector<LatticeComplex> evec(Nm,grid);
+    std::vector<LatticeComplex> evec(Nm,grid);
-    //    ChebyIRL.calc(eval,evec,src, Nconv);
+    ChebyIRL.calc(eval,evec,src, Nconv);
  }
  Grid_finalize();