From 9f0d9ade68249642215e1215b27d46d6e9ffc2b7 Mon Sep 17 00:00:00 2001
From: Jung <chulwoo@scidac1.phy.bnl.gov>
Date: Sat, 20 Feb 2016 02:50:32 -0500
Subject: [PATCH] Added configure flag for LAPACK. Tested
 ImplicitlyRestartedLanczos::calc() Checking in before cleaning up

---
 configure                                     |  56 ++++++++
 configure.ac                                  |  22 +++
 lib/algorithms/LinearOperator.h               |   9 +-
 lib/algorithms/approx/Chebyshev.h             |   2 +
 .../iterative/ConjugateGradientMultiShift.h   |   2 +-
 .../iterative/ImplicitlyRestartedLanczos.h    | 127 +++++++++++++-----
 lib/qcd/action/Actions.h                      |   2 +-
 tests/Make.inc                                |   6 +
 tests/Makefile.am                             |  10 ++
 tests/Test_dwf_lanczos.cc                     |  28 ++--
 10 files changed, 212 insertions(+), 52 deletions(-)

diff --git a/configure b/configure
index 8fca7966..9b80c73f 100755
--- a/configure
+++ b/configure
@@ -626,6 +626,10 @@ ac_subst_vars='am__EXEEXT_FALSE
 am__EXEEXT_TRUE
 LTLIBOBJS
 LIBOBJS
+USE_LAPACK_LIB_FALSE
+USE_LAPACK_LIB_TRUE
+USE_LAPACK_FALSE
+USE_LAPACK_TRUE
 BUILD_CHROMA_REGRESSION_FALSE
 BUILD_CHROMA_REGRESSION_TRUE
 BUILD_COMMS_NONE_FALSE
@@ -751,6 +755,7 @@ enable_precision
 enable_comms
 enable_rng
 enable_chroma
+enable_lapack
 '
       ac_precious_vars='build_alias
 host_alias
@@ -1399,6 +1404,7 @@ Optional Features:
   --enable-rng=ranlux48|mt19937
                           Select Random Number Generator to be used
   --enable-chroma         Expect chroma compiled under c++11
+  --enable-lapack         Location of lapack
 
 Some influential environment variables:
   CXX         C++ compiler command
@@ -6629,6 +6635,47 @@ else
 fi
 
 
+#
+# Chroma regression tests
+#
+# Check whether --enable-lapack was given.
+if test "${enable_lapack+set}" = set; then :
+  enableval=$enable_lapack; ac_LAPACK=${enable_lapack}
+else
+  ac_LAPACK=no
+fi
+
+
+case ${ac_LAPACK} in
+     yes)
+       echo Enabling lapack
+     ;;
+     no)
+       echo Disabling lapack
+     ;;
+     *)
+       echo Enabling lapack at ${ac_LAPACK}
+#     AC_MSG_ERROR([Enabli${ac_LAPACK} unsupported --enable-lapack option]);
+     ;;
+esac
+
+ if  test "X${ac_LAPACK}X" != "XnoX" ; then
+  USE_LAPACK_TRUE=
+  USE_LAPACK_FALSE='#'
+else
+  USE_LAPACK_TRUE='#'
+  USE_LAPACK_FALSE=
+fi
+
+ if  test "X${ac_LAPACK}X" != "XyesX" ; then
+  USE_LAPACK_LIB_TRUE=
+  USE_LAPACK_LIB_FALSE='#'
+else
+  USE_LAPACK_LIB_TRUE='#'
+  USE_LAPACK_LIB_FALSE=
+fi
+
+
 ###################################################################
 # Checks for doxygen support
 # if present enables the "make doxyfile" command
@@ -6808,6 +6855,14 @@ if test -z "${BUILD_CHROMA_REGRESSION_TRUE}" && test -z "${BUILD_CHROMA_REGRESSI
   as_fn_error $? "conditional \"BUILD_CHROMA_REGRESSION\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
+if test -z "${USE_LAPACK_TRUE}" && test -z "${USE_LAPACK_FALSE}"; then
+  as_fn_error $? "conditional \"USE_LAPACK\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${USE_LAPACK_LIB_TRUE}" && test -z "${USE_LAPACK_LIB_FALSE}"; then
+  as_fn_error $? "conditional \"USE_LAPACK_LIB\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 
 : "${CONFIG_STATUS=./config.status}"
 ac_write_fail=0
@@ -8154,6 +8209,7 @@ The following features are enabled:
 - communications type           : ${ac_COMMS}
 - default precision             : ${ac_PRECISION}
 - RNG choice                    : ${ac_RNG}
+- LAPACK	                : ${ac_LAPACK}
 
 
 "
diff --git a/configure.ac b/configure.ac
index 9e6464c4..63667c5c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -222,6 +222,27 @@ esac
 
 AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
 
+#
+# Chroma regression tests
+#
+AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Location of lapack ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
+
+case ${ac_LAPACK} in
+     yes)
+       echo Enabling lapack
+     ;;
+     no)
+       echo Disabling lapack
+     ;;
+     *)
+       echo Enabling lapack at ${ac_LAPACK}
+#     AC_MSG_ERROR([Enabli${ac_LAPACK} unsupported --enable-lapack option]); 
+     ;;
+esac
+
+AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ])
+AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ])
+
 ###################################################################
 # Checks for doxygen support
 # if present enables the "make doxyfile" command
@@ -265,6 +286,7 @@ The following features are enabled:
 - communications type           : ${ac_COMMS}
 - default precision             : ${ac_PRECISION}
 - RNG choice                    : ${ac_RNG} 
+- LAPACK	                : ${ac_LAPACK} 
 
 
 "
diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h
index 6706a0b3..ea47d43b 100644
--- a/lib/algorithms/LinearOperator.h
+++ b/lib/algorithms/LinearOperator.h
@@ -222,6 +222,7 @@ namespace Grid {
       SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
       virtual  RealD Mpc      (const Field &in, Field &out) {
 	Field tmp(in._grid);
+//	std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << "  _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
 
 	_Mat.Meooe(in,tmp);
 	_Mat.MooeeInv(tmp,out);
@@ -251,10 +252,10 @@ namespace Grid {
       virtual  RealD Mpc      (const Field &in, Field &out) {
 	Field tmp(in._grid);
 
-	_Mat.Meooe(in,tmp);
-	_Mat.MooeeInv(tmp,out);
-	_Mat.Meooe(out,tmp);
-	_Mat.MooeeInv(tmp,out);
+	_Mat.Meooe(in,out);
+	_Mat.MooeeInv(out,tmp);
+	_Mat.Meooe(tmp,out);
+	_Mat.MooeeInv(out,tmp);
 
 	return axpy_norm(out,-1.0,tmp,in);
       }
diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h
index 5f6c1607..96a75a92 100644
--- a/lib/algorithms/approx/Chebyshev.h
+++ b/lib/algorithms/approx/Chebyshev.h
@@ -198,6 +198,8 @@ namespace Grid {
     void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
 
       GridBase *grid=in._grid;
+//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
+//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
 
       int vol=grid->gSites();
 
diff --git a/lib/algorithms/iterative/ConjugateGradientMultiShift.h b/lib/algorithms/iterative/ConjugateGradientMultiShift.h
index d302a5b8..a9ccfd2c 100644
--- a/lib/algorithms/iterative/ConjugateGradientMultiShift.h
+++ b/lib/algorithms/iterative/ConjugateGradientMultiShift.h
@@ -274,7 +274,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
   }
   // ugly hack
   std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
-  assert(0);
+//  assert(0);
 }
 
   };
diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
index e6227bee..bad82fbf 100644
--- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
+++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
@@ -30,7 +30,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #define GRID_IRL_H
 
 #include <string.h> //memset
-#include <lapacke.h> //memset
+#ifdef USE_LAPACK
+#include <lapacke.h>
+#endif
 #include <algorithms/iterative/DenseMatrix.h>
 #include <algorithms/iterative/EigenSort.h>
 
@@ -60,7 +62,7 @@ public:
 
     SortEigen<Field> _sort;
 
-    GridCartesian &_fgrid;
+//    GridCartesian &_fgrid;
 
     LinearOperatorBase<Field> &_Linop;
 
@@ -72,14 +74,14 @@ public:
     void init(void){};
     void Abort(int ff, DenseVector<RealD> &evals,  DenseVector<DenseVector<RealD> > &evecs);
 
-    ImplicitlyRestartedLanczos(GridCartesian &fgrid, LinearOperatorBase<Field> &Linop, // op
+    ImplicitlyRestartedLanczos(
+				LinearOperatorBase<Field> &Linop, // op
 			       OperatorFunction<Field> & poly,   // polynmial
 			       int _Nstop, // sought vecs
 			       int _Nk, // sought vecs
 			       int _Nm, // spare vecs
 			       RealD _eresid, // resid in lmdue deficit 
 			       int _Niter) : // Max iterations
-      _fgrid(fgrid),
       _Linop(Linop),
       _poly(poly),
       Nstop(_Nstop),
@@ -91,6 +93,24 @@ public:
       Np = Nm-Nk; assert(Np>0);
     };
 
+    ImplicitlyRestartedLanczos(
+				LinearOperatorBase<Field> &Linop, // op
+			       OperatorFunction<Field> & poly,   // polynmial
+			       int _Nk, // sought vecs
+			       int _Nm, // spare vecs
+			       RealD _eresid, // resid in lmdue deficit 
+			       int _Niter) : // Max iterations
+      _Linop(Linop),
+      _poly(poly),
+      Nstop(_Nk),
+      Nk(_Nk),
+      Nm(_Nm),
+      eresid(_eresid),
+      Niter(_Niter)
+    { 
+      Np = Nm-Nk; assert(Np>0);
+    };
+
     /////////////////////////
     // Sanity checked this routine (step) against Saad.
     /////////////////////////
@@ -228,16 +248,17 @@ public:
       }
     }
 
-//int lapackcj_Wilkinson(Matrix<T> &AH, vector<T> &tevals, vector<vector<T> > &tevecs, double small) {
+#ifdef USE_LAPACK
     void diagonalize_lapack(DenseVector<RealD>& lmd,
 		     DenseVector<RealD>& lme, 
-		     int Nm2,
-		     int Nm,
-		     DenseVector<RealD>& Qt){
+		     int N1,
+		     int N2,
+		     DenseVector<RealD>& Qt,
+		     GridBase *grid){
   const int size = Nm;
 //  tevals.resize(size);
 //  tevecs.resize(size);
-  int NN = Nm;
+  int NN = N1;
   double evals_tmp[NN];
   double evec_tmp[NN][NN];
   memset(evec_tmp[0],0,sizeof(double)*NN*NN);
@@ -266,8 +287,9 @@ public:
   int info;
 //  int total = QMP_get_number_of_nodes();
 //  int node = QMP_get_node_number();
-  int total = _fgrid._Nprocessors;
-  int node = _fgrid._processor;
+//  GridBase *grid = evec[0]._grid;
+  int total = grid->_Nprocessors;
+  int node = grid->_processor;
   int interval = (NN/total)+1;
   double vl = 0.0, vu = 0.0;
   int il = interval*node+1 , iu = interval*(node+1);
@@ -298,40 +320,50 @@ public:
       {
 //        QMP_sum_double_array(evals_tmp,NN);
 //        QMP_sum_double_array((double *)evec_tmp,NN*NN);
-         _fgrid.GlobalSumVector(evals_tmp,NN);
-         _fgrid.GlobalSumVector((double*)evec_tmp,NN*NN);
+         grid->GlobalSumVector(evals_tmp,NN);
+         grid->GlobalSumVector((double*)evec_tmp,NN*NN);
       }
     } 
+// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
   for(int i=0;i<NN;i++){
     for(int j=0;j<NN;j++)
-      Qt[i*NN+j]=evec_tmp[i][j];
-      lmd [i]=evals_tmp[i];
+      Qt[(NN-1-i)*N2+j]=evec_tmp[i][j];
+      lmd [NN-1-i]=evals_tmp[i];
   }
 }
+#endif
 
 
     void diagonalize(DenseVector<RealD>& lmd,
 		     DenseVector<RealD>& lme, 
-		     int Nm2,
-		     int Nm,
-		     DenseVector<RealD>& Qt)
+		     int N2,
+		     int N1,
+		     DenseVector<RealD>& Qt,
+		     GridBase *grid)
     {
 
-if(1)
-{
-	DenseVector <RealD> lmd2(Nm);
-	DenseVector <RealD> lme2(Nm);
-         for(int k=0; k<Nm; ++k){
+#ifdef USE_LAPACK
+    const int check_lapack=0; // just use lapack if 0, check against lapack if 1
+
+    if(!check_lapack)
+	return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid);
+
+	DenseVector <RealD> lmd2(N1);
+	DenseVector <RealD> lme2(N1);
+	DenseVector<RealD> Qt2(N1*N1);
+         for(int k=0; k<N1; ++k){
 	    lmd2[k] = lmd[k];
 	    lme2[k] = lme[k];
 	  }
+         for(int k=0; k<N1*N1; ++k)
+	Qt2[k] = Qt[k];
 
-	return diagonalize_lapack(lmd,lme,Nm2,Nm,Qt);
-}
+//	diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
+#endif
 
-      int Niter = 100*Nm;
+      int Niter = 100*N1;
       int kmin = 1;
-      int kmax = Nk;
+      int kmax = N2;
       // (this should be more sophisticated)
 
       for(int iter=0; iter<Niter; ++iter){
@@ -343,7 +375,7 @@ if(1)
 	// (Dsh: shift)
 	
 	// transformation
-	qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax);
+	qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax);
 	
 	// Convergence criterion (redef of kmin and kamx)
 	for(int j=kmax-1; j>= kmin; --j){
@@ -354,6 +386,23 @@ if(1)
 	  }
 	}
 	Niter = iter;
+#ifdef USE_LAPACK
+    if(check_lapack){
+	const double SMALL=1e-8;
+	diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid);
+	DenseVector <RealD> lmd3(N2);
+         for(int k=0; k<N2; ++k) lmd3[k]=lmd[k];
+        _sort.push(lmd3,N2);
+        _sort.push(lmd2,N2);
+         for(int k=0; k<N2; ++k){
+	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
+//	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
+	  }
+         for(int k=0; k<N1*N1; ++k){
+//	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
+	}
+    }
+#endif
 	return;
 
       continued:
@@ -369,6 +418,7 @@ if(1)
       abort();
     }
 
+#if 1
     static RealD normalise(Field& v) 
     {
       RealD nn = norm2(v);
@@ -430,6 +480,7 @@ until convergence
       {
 
 	GridBase *grid = evec[0]._grid;
+	assert(grid == src._grid);
 
 	std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
 	std::cout << " -- Nm = " << Nm << std::endl;
@@ -460,9 +511,11 @@ until convergence
 	// (uniform vector) Why not src??
 	//	evec[0] = 1.0;
 	evec[0] = src;
-	std:: cout <<"norm2(src)= " << norm2(src) << std::endl;
+	std:: cout <<"norm2(src)= " << norm2(src)<<std::endl;
+// << src._grid  << std::endl;
 	normalise(evec[0]);
-	std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) << std::endl;
+	std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
+// << evec[0]._grid << std::endl;
 	
 	// Initial Nk steps
 	for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
@@ -494,7 +547,7 @@ until convergence
 	    lme2[k] = lme[k+k1-1];
 	  }
 	  setUnit_Qt(Nm,Qt);
-	  diagonalize(eval2,lme2,Nm,Nm,Qt);
+	  diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
 
 	  // sorting
 	  _sort.push(eval2,Nm);
@@ -533,7 +586,7 @@ until convergence
 	    lme2[k] = lme[k];
 	  }
 	  setUnit_Qt(Nm,Qt);
-	  diagonalize(eval2,lme2,Nk,Nm,Qt);
+	  diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
 	  
 	  for(int k = 0; k<Nk; ++k) B[k]=0.0;
 	  
@@ -541,13 +594,16 @@ until convergence
 	    for(int k = 0; k<Nk; ++k){
 	      B[j] += Qt[k+j*Nm] * evec[k];
 	    }
+//	    std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
 	  }
+//	_sort.push(eval2,B,Nk);
 
 	  Nconv = 0;
 	  //	  std::cout << std::setiosflags(std::ios_base::scientific);
 	  for(int i=0; i<Nk; ++i){
 
-	    _poly(_Linop,B[i],v);
+//	    _poly(_Linop,B[i],v);
+	    _Linop.HermOp(B[i],v);
 	    
 	    RealD vnum = real(innerProduct(B[i],v)); // HermOp.
 	    RealD vden = norm2(B[i]);
@@ -555,11 +611,13 @@ until convergence
 	    v -= eval2[i]*B[i];
 	    RealD vv = norm2(v);
 	    
+	    std::cout.precision(13);
 	    std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
 	    std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
 	    std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
 	    
-	    if(vv<eresid*eresid){
+	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
+	    if((vv<eresid*eresid) && (i == Nconv) ){
 	      Iconv[Nconv] = i;
 	      ++Nconv;
 	    }
@@ -1140,6 +1198,7 @@ static void Lock(DenseMatrix<T> &H, 	///Hess mtx
 
   }
 }
+#endif
 
 
  };
diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h
index d4a605aa..eafe3f67 100644
--- a/lib/qcd/action/Actions.h
+++ b/lib/qcd/action/Actions.h
@@ -209,7 +209,7 @@ typedef WilsonFermion<GparityWilsonImplD>     GparityWilsonFermionD;
 typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
 typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
 typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
-#if 0
+#if 1
 typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
 typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
 typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
diff --git a/tests/Make.inc b/tests/Make.inc
index f7c83671..dc109d38 100644
--- a/tests/Make.inc
+++ b/tests/Make.inc
@@ -96,6 +96,9 @@ Test_dwf_hdcr_LDADD=-lGrid
 
 Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
 Test_dwf_lanczos_LDADD=-lGrid
+if USE_LAPACK
+Test_dwf_lanczos_LDADD +=-lGrid -llapack -llapacke
+endif
 
 
 Test_gamma_SOURCES=Test_gamma.cc
@@ -232,6 +235,9 @@ Test_stencil_LDADD=-lGrid
 
 Test_synthetic_lanczos_SOURCES=Test_synthetic_lanczos.cc
 Test_synthetic_lanczos_LDADD=-lGrid
+if USE_LAPACK
+Test_synthetic_lanczos_LDADD +=-lGrid -llapack -llapacke
+endif
 
 
 Test_wilson_cg_prec_SOURCES=Test_wilson_cg_prec.cc
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 336cb6e7..5dff7e25 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -8,6 +8,16 @@ endif
 AM_CXXFLAGS = -I$(top_srcdir)/lib
 AM_LDFLAGS = -L$(top_builddir)/lib
 
+if USE_LAPACK
+AM_CXXFLAGS += -DUSE_LAPACK
+if USE_LAPACK_LIB
+#if test "X${ac_LAPACK}X" != XyesX 
+AM_CXXFLAGS += -I$(ac_LAPACK)/include
+AM_LDFLAGS += -L$(ac_LAPACK)/lib
+#fi
+endif
+endif
+
 if BUILD_ZMM
   bin_PROGRAMS=Test_zmm
 endif
diff --git a/tests/Test_dwf_lanczos.cc b/tests/Test_dwf_lanczos.cc
index f9bf03ac..27d23fba 100644
--- a/tests/Test_dwf_lanczos.cc
+++ b/tests/Test_dwf_lanczos.cc
@@ -43,13 +43,14 @@ int main (int argc, char ** argv)
   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+  printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid);
 
   std::vector<int> seeds4({1,2,3,4});
   std::vector<int> seeds5({5,6,7,8});
   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+  GridParallelRNG          RNG5rb(FrbGrid);  RNG5.SeedFixedIntegers(seeds5);
 
-  LatticeFermion    src(FGrid); gaussian(RNG5,src);
   LatticeGaugeField Umu(UGrid); 
   SU3::HotConfiguration(RNG4, Umu);
 
@@ -58,33 +59,36 @@ int main (int argc, char ** argv)
     U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
   }
   
-  RealD mass=0.1;
+  RealD mass=0.01;
   RealD M5=1.8;
   DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 
-  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
+//  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
+//  SchurDiagTwoOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
+  SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
 
   const int Nstop = 30;
   const int Nk = 40;
-  const int Np = 10;
+  const int Np = 40;
   const int Nm = Nk+Np;
   const int MaxIt= 10000;
   RealD resid = 1.0e-8;
 
-  std::vector<double> Coeffs { 0.,1.};
+  std::vector<double> Coeffs { 0.,-1.};
   Polynomial<LatticeFermion> PolyX(Coeffs);
-  Chebyshev<LatticeFermion> Cheb(1,80.,11);
+  Chebyshev<LatticeFermion> Cheb(1,80.,21);
 //  ChebyshevLanczos<LatticeFermion> Cheb(9.,1.,0.,20);
-  Cheb.csv(std::cout);
+//  Cheb.csv(std::cout);
 //  exit(-24);
-  ImplicitlyRestartedLanczos<LatticeFermion> IRL(*FGrid,HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt);
+  ImplicitlyRestartedLanczos<LatticeFermion> IRL(HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt);
 
   
   std::vector<RealD>          eval(Nm);
-  std::vector<LatticeFermion> evec(Nm,FGrid);
-//  for(int i=0;i<Nm;i++){
-//    std::cout << i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl;
-//  };
+  LatticeFermion    src(FrbGrid); gaussian(RNG5rb,src);
+  std::vector<LatticeFermion> evec(Nm,FrbGrid);
+  for(int i=0;i<1;i++){
+    std::cout << i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl;
+  };
 
   int Nconv;
   IRL.calc(eval,evec,