Added configure flag for LAPACK. Tested ImplicitlyRestartedLanczos::calc()

Checking in before cleaning up
2026-05-26 20:14:16 +01:00 · 2016-02-20 02:50:32 -05:00
parent bd84c23298
commit 9f0d9ade68
10 changed files with 212 additions and 52 deletions
@@ -626,6 +626,10 @@ ac_subst_vars='am__EXEEXT_FALSE
 am__EXEEXT_TRUE
 LTLIBOBJS
 LIBOBJS
 USE_LAPACK_LIB_FALSE
 USE_LAPACK_LIB_TRUE
 USE_LAPACK_FALSE
 USE_LAPACK_TRUE
 BUILD_CHROMA_REGRESSION_FALSE
 BUILD_CHROMA_REGRESSION_TRUE
 BUILD_COMMS_NONE_FALSE
@@ -751,6 +755,7 @@ enable_precision
 enable_comms
 enable_rng
 enable_chroma
 enable_lapack
 '
      ac_precious_vars='build_alias
 host_alias
@@ -1399,6 +1404,7 @@ Optional Features:
  --enable-rng=ranlux48|mt19937
                          Select Random Number Generator to be used
  --enable-chroma         Expect chroma compiled under c++11
  --enable-lapack         Location of lapack
 Some influential environment variables:
  CXX         C++ compiler command
@@ -6629,6 +6635,47 @@ else
 fi
 #
 # Chroma regression tests
 #
 # Check whether --enable-lapack was given.
 if test "${enable_lapack+set}" = set; then :
  enableval=$enable_lapack; ac_LAPACK=${enable_lapack}
 else
  ac_LAPACK=no
 fi
 case ${ac_LAPACK} in
     yes)
       echo Enabling lapack
     ;;
     no)
       echo Disabling lapack
     ;;
     *)
       echo Enabling lapack at ${ac_LAPACK}
 #     AC_MSG_ERROR([Enabli${ac_LAPACK} unsupported --enable-lapack option]);
     ;;
 esac
 if  test "X${ac_LAPACK}X" != "XnoX" ; then
  USE_LAPACK_TRUE=
  USE_LAPACK_FALSE='#'
 else
  USE_LAPACK_TRUE='#'
  USE_LAPACK_FALSE=
 fi
 if  test "X${ac_LAPACK}X" != "XyesX" ; then
  USE_LAPACK_LIB_TRUE=
  USE_LAPACK_LIB_FALSE='#'
 else
  USE_LAPACK_LIB_TRUE='#'
  USE_LAPACK_LIB_FALSE=
 fi
 ###################################################################
 # Checks for doxygen support
 # if present enables the "make doxyfile" command
@@ -6808,6 +6855,14 @@ if test -z "${BUILD_CHROMA_REGRESSION_TRUE}" && test -z "${BUILD_CHROMA_REGRESSI
  as_fn_error $? "conditional \"BUILD_CHROMA_REGRESSION\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
 if test -z "${USE_LAPACK_TRUE}" && test -z "${USE_LAPACK_FALSE}"; then
  as_fn_error $? "conditional \"USE_LAPACK\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
 if test -z "${USE_LAPACK_LIB_TRUE}" && test -z "${USE_LAPACK_LIB_FALSE}"; then
  as_fn_error $? "conditional \"USE_LAPACK_LIB\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
 : "${CONFIG_STATUS=./config.status}"
 ac_write_fail=0
@@ -8154,6 +8209,7 @@ The following features are enabled:
 - communications type           : ${ac_COMMS}
 - default precision             : ${ac_PRECISION}
 - RNG choice                    : ${ac_RNG}
 - LAPACK	                : ${ac_LAPACK}
 "
@@ -222,6 +222,27 @@ esac
 AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
 #
 # Chroma regression tests
 #
 AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Location of lapack ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
 case ${ac_LAPACK} in
     yes)
       echo Enabling lapack
     ;;
     no)
       echo Disabling lapack
     ;;
     *)
       echo Enabling lapack at ${ac_LAPACK}
 #     AC_MSG_ERROR([Enabli${ac_LAPACK} unsupported --enable-lapack option]); 
     ;;
 esac
 AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ])
 AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ])
 ###################################################################
 # Checks for doxygen support
 # if present enables the "make doxyfile" command
@@ -265,6 +286,7 @@ The following features are enabled:
 - communications type           : ${ac_COMMS}
 - default precision             : ${ac_PRECISION}
 - RNG choice                    : ${ac_RNG} 
 - LAPACK	                : ${ac_LAPACK} 
 "
@@ -222,6 +222,7 @@ namespace Grid {
      SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
      virtual  RealD Mpc      (const Field &in, Field &out) {
 	Field tmp(in._grid);
 //	std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << "  _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
 	_Mat.Meooe(in,tmp);
 	_Mat.MooeeInv(tmp,out);
@@ -251,10 +252,10 @@ namespace Grid {
      virtual  RealD Mpc      (const Field &in, Field &out) {
 	Field tmp(in._grid);
-	_Mat.Meooe(in,tmp);
+	_Mat.Meooe(in,out);
-	_Mat.MooeeInv(tmp,out);
+	_Mat.MooeeInv(out,tmp);
-	_Mat.Meooe(out,tmp);
+	_Mat.Meooe(tmp,out);
-	_Mat.MooeeInv(tmp,out);
+	_Mat.MooeeInv(out,tmp);
 	return axpy_norm(out,-1.0,tmp,in);
      }
@@ -198,6 +198,8 @@ namespace Grid {
    void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
      GridBase *grid=in._grid;
 //std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
 //<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
      int vol=grid->gSites();
@@ -274,7 +274,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
  }
  // ugly hack
  std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
-  assert(0);
+//  assert(0);
 }
  };
@@ -30,7 +30,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #define GRID_IRL_H
 #include <string.h> //memset
-#include <lapacke.h> //memset
+#ifdef USE_LAPACK
 #include <lapacke.h>
 #endif
 #include <algorithms/iterative/DenseMatrix.h>
 #include <algorithms/iterative/EigenSort.h>
@@ -60,7 +62,7 @@ public:
    SortEigen<Field> _sort;
-    GridCartesian &_fgrid;
+//    GridCartesian &_fgrid;
    LinearOperatorBase<Field> &_Linop;
@@ -72,14 +74,14 @@ public:
    void init(void){};
    void Abort(int ff, DenseVector<RealD> &evals,  DenseVector<DenseVector<RealD> > &evecs);
-    ImplicitlyRestartedLanczos(GridCartesian &fgrid, LinearOperatorBase<Field> &Linop, // op
+    ImplicitlyRestartedLanczos(
 				LinearOperatorBase<Field> &Linop, // op
 			       OperatorFunction<Field> & poly,   // polynmial
 			       int _Nstop, // sought vecs
 			       int _Nk, // sought vecs
 			       int _Nm, // spare vecs
 			       RealD _eresid, // resid in lmdue deficit 
 			       int _Niter) : // Max iterations
      _fgrid(fgrid),
      _Linop(Linop),
      _poly(poly),
      Nstop(_Nstop),
@@ -91,6 +93,24 @@ public:
      Np = Nm-Nk; assert(Np>0);
    };
    ImplicitlyRestartedLanczos(
 				LinearOperatorBase<Field> &Linop, // op
 			       OperatorFunction<Field> & poly,   // polynmial
 			       int _Nk, // sought vecs
 			       int _Nm, // spare vecs
 			       RealD _eresid, // resid in lmdue deficit 
 			       int _Niter) : // Max iterations
      _Linop(Linop),
      _poly(poly),
      Nstop(_Nk),
      Nk(_Nk),
      Nm(_Nm),
      eresid(_eresid),
      Niter(_Niter)
    { 
      Np = Nm-Nk; assert(Np>0);
    };
    /////////////////////////
    // Sanity checked this routine (step) against Saad.
    /////////////////////////
@@ -228,16 +248,17 @@ public:
      }
    }
-//int lapackcj_Wilkinson(Matrix<T> &AH, vector<T> &tevals, vector<vector<T> > &tevecs, double small) {
+#ifdef USE_LAPACK
    void diagonalize_lapack(DenseVector<RealD>& lmd,
 		     DenseVector<RealD>& lme, 
-		     int Nm2,
+		     int N1,
-		     int Nm,
+		     int N2,
-		     DenseVector<RealD>& Qt){
+		     DenseVector<RealD>& Qt,
 		     GridBase *grid){
  const int size = Nm;
 //  tevals.resize(size);
 //  tevecs.resize(size);
-  int NN = Nm;
+  int NN = N1;
  double evals_tmp[NN];
  double evec_tmp[NN][NN];
  memset(evec_tmp[0],0,sizeof(double)*NN*NN);
@@ -266,8 +287,9 @@ public:
  int info;
 //  int total = QMP_get_number_of_nodes();
 //  int node = QMP_get_node_number();
-  int total = _fgrid._Nprocessors;
+//  GridBase *grid = evec[0]._grid;
-  int node = _fgrid._processor;
+  int total = grid->_Nprocessors;
  int node = grid->_processor;
  int interval = (NN/total)+1;
  double vl = 0.0, vu = 0.0;
  int il = interval*node+1 , iu = interval*(node+1);
@@ -298,40 +320,50 @@ public:
      {
 //        QMP_sum_double_array(evals_tmp,NN);
 //        QMP_sum_double_array((double *)evec_tmp,NN*NN);
-         _fgrid.GlobalSumVector(evals_tmp,NN);
+         grid->GlobalSumVector(evals_tmp,NN);
-         _fgrid.GlobalSumVector((double*)evec_tmp,NN*NN);
+         grid->GlobalSumVector((double*)evec_tmp,NN*NN);
      }
    } 
 // cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
  for(int i=0;i<NN;i++){
    for(int j=0;j<NN;j++)
-      Qt[i*NN+j]=evec_tmp[i][j];
+      Qt[(NN-1-i)*N2+j]=evec_tmp[i][j];
-      lmd [i]=evals_tmp[i];
+      lmd [NN-1-i]=evals_tmp[i];
  }
 }
 #endif
    void diagonalize(DenseVector<RealD>& lmd,
 		     DenseVector<RealD>& lme, 
-		     int Nm2,
+		     int N2,
-		     int Nm,
+		     int N1,
-		     DenseVector<RealD>& Qt)
+		     DenseVector<RealD>& Qt,
 		     GridBase *grid)
    {
-if(1)
+#ifdef USE_LAPACK
-{
+    const int check_lapack=0; // just use lapack if 0, check against lapack if 1
-	DenseVector <RealD> lmd2(Nm);
+
-	DenseVector <RealD> lme2(Nm);
+    if(!check_lapack)
-         for(int k=0; k<Nm; ++k){
+	return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid);
 	DenseVector <RealD> lmd2(N1);
 	DenseVector <RealD> lme2(N1);
 	DenseVector<RealD> Qt2(N1*N1);
         for(int k=0; k<N1; ++k){
 	    lmd2[k] = lmd[k];
 	    lme2[k] = lme[k];
 	  }
         for(int k=0; k<N1*N1; ++k)
 	Qt2[k] = Qt[k];
-	return diagonalize_lapack(lmd,lme,Nm2,Nm,Qt);
+//	diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
-}
+#endif
-      int Niter = 100*Nm;
+      int Niter = 100*N1;
      int kmin = 1;
-      int kmax = Nk;
+      int kmax = N2;
      // (this should be more sophisticated)
      for(int iter=0; iter<Niter; ++iter){
@@ -343,7 +375,7 @@ if(1)
 	// (Dsh: shift)
 	// transformation
-	qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax);
+	qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax);
 	// Convergence criterion (redef of kmin and kamx)
 	for(int j=kmax-1; j>= kmin; --j){
@@ -354,6 +386,23 @@ if(1)
 	  }
 	}
 	Niter = iter;
 #ifdef USE_LAPACK
    if(check_lapack){
 	const double SMALL=1e-8;
 	diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid);
 	DenseVector <RealD> lmd3(N2);
         for(int k=0; k<N2; ++k) lmd3[k]=lmd[k];
        _sort.push(lmd3,N2);
        _sort.push(lmd2,N2);
         for(int k=0; k<N2; ++k){
 	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
 //	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
 	  }
         for(int k=0; k<N1*N1; ++k){
 //	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
 	}
    }
 #endif
 	return;
      continued:
@@ -369,6 +418,7 @@ if(1)
      abort();
    }
 #if 1
    static RealD normalise(Field& v) 
    {
      RealD nn = norm2(v);
@@ -430,6 +480,7 @@ until convergence
      {
 	GridBase *grid = evec[0]._grid;
 	assert(grid == src._grid);
 	std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
 	std::cout << " -- Nm = " << Nm << std::endl;
@@ -461,8 +512,10 @@ until convergence
 	//	evec[0] = 1.0;
 	evec[0] = src;
 	std:: cout <<"norm2(src)= " << norm2(src)<<std::endl;
 // << src._grid  << std::endl;
 	normalise(evec[0]);
 	std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
 // << evec[0]._grid << std::endl;
 	// Initial Nk steps
 	for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
@@ -494,7 +547,7 @@ until convergence
 	    lme2[k] = lme[k+k1-1];
 	  }
 	  setUnit_Qt(Nm,Qt);
-	  diagonalize(eval2,lme2,Nm,Nm,Qt);
+	  diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
 	  // sorting
 	  _sort.push(eval2,Nm);
@@ -533,7 +586,7 @@ until convergence
 	    lme2[k] = lme[k];
 	  }
 	  setUnit_Qt(Nm,Qt);
-	  diagonalize(eval2,lme2,Nk,Nm,Qt);
+	  diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
 	  for(int k = 0; k<Nk; ++k) B[k]=0.0;
@@ -541,13 +594,16 @@ until convergence
 	    for(int k = 0; k<Nk; ++k){
 	      B[j] += Qt[k+j*Nm] * evec[k];
 	    }
 //	    std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
 	  }
 //	_sort.push(eval2,B,Nk);
 	  Nconv = 0;
 	  //	  std::cout << std::setiosflags(std::ios_base::scientific);
 	  for(int i=0; i<Nk; ++i){
-	    _poly(_Linop,B[i],v);
+//	    _poly(_Linop,B[i],v);
 	    _Linop.HermOp(B[i],v);
 	    RealD vnum = real(innerProduct(B[i],v)); // HermOp.
 	    RealD vden = norm2(B[i]);
@@ -555,11 +611,13 @@ until convergence
 	    v -= eval2[i]*B[i];
 	    RealD vv = norm2(v);
 	    std::cout.precision(13);
 	    std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
 	    std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
 	    std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
-	    if(vv<eresid*eresid){
+	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
 	    if((vv<eresid*eresid) && (i == Nconv) ){
 	      Iconv[Nconv] = i;
 	      ++Nconv;
 	    }
@@ -1140,6 +1198,7 @@ static void Lock(DenseMatrix<T> &H, 	///Hess mtx
  }
 }
 #endif
 };
@@ -209,7 +209,7 @@ typedef WilsonFermion<GparityWilsonImplD>     GparityWilsonFermionD;
 typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
 typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
 typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
-#if 0
+#if 1
 typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
 typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
 typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
@@ -96,6 +96,9 @@ Test_dwf_hdcr_LDADD=-lGrid
 Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
 Test_dwf_lanczos_LDADD=-lGrid
 if USE_LAPACK
 Test_dwf_lanczos_LDADD +=-lGrid -llapack -llapacke
 endif
 Test_gamma_SOURCES=Test_gamma.cc
@@ -232,6 +235,9 @@ Test_stencil_LDADD=-lGrid
 Test_synthetic_lanczos_SOURCES=Test_synthetic_lanczos.cc
 Test_synthetic_lanczos_LDADD=-lGrid
 if USE_LAPACK
 Test_synthetic_lanczos_LDADD +=-lGrid -llapack -llapacke
 endif
 Test_wilson_cg_prec_SOURCES=Test_wilson_cg_prec.cc
@@ -8,6 +8,16 @@ endif
 AM_CXXFLAGS = -I$(top_srcdir)/lib
 AM_LDFLAGS = -L$(top_builddir)/lib
 if USE_LAPACK
 AM_CXXFLAGS += -DUSE_LAPACK
 if USE_LAPACK_LIB
 #if test "X${ac_LAPACK}X" != XyesX 
 AM_CXXFLAGS += -I$(ac_LAPACK)/include
 AM_LDFLAGS += -L$(ac_LAPACK)/lib
 #fi
 endif
 endif
 if BUILD_ZMM
  bin_PROGRAMS=Test_zmm
 endif
@@ -43,13 +43,14 @@ int main (int argc, char ** argv)
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
  printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid);
  std::vector<int> seeds4({1,2,3,4});
  std::vector<int> seeds5({5,6,7,8});
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
  GridParallelRNG          RNG5rb(FrbGrid);  RNG5.SeedFixedIntegers(seeds5);
  LatticeFermion    src(FGrid); gaussian(RNG5,src);
  LatticeGaugeField Umu(UGrid); 
  SU3::HotConfiguration(RNG4, Umu);
@@ -58,33 +59,36 @@ int main (int argc, char ** argv)
    U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
  }
-  RealD mass=0.1;
+  RealD mass=0.01;
  RealD M5=1.8;
  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
+//  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
 //  SchurDiagTwoOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
  SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
  const int Nstop = 30;
  const int Nk = 40;
-  const int Np = 10;
+  const int Np = 40;
  const int Nm = Nk+Np;
  const int MaxIt= 10000;
  RealD resid = 1.0e-8;
-  std::vector<double> Coeffs { 0.,1.};
+  std::vector<double> Coeffs { 0.,-1.};
  Polynomial<LatticeFermion> PolyX(Coeffs);
-  Chebyshev<LatticeFermion> Cheb(1,80.,11);
+  Chebyshev<LatticeFermion> Cheb(1,80.,21);
 //  ChebyshevLanczos<LatticeFermion> Cheb(9.,1.,0.,20);
-  Cheb.csv(std::cout);
+//  Cheb.csv(std::cout);
 //  exit(-24);
-  ImplicitlyRestartedLanczos<LatticeFermion> IRL(*FGrid,HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt);
+  ImplicitlyRestartedLanczos<LatticeFermion> IRL(HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt);
  std::vector<RealD>          eval(Nm);
-  std::vector<LatticeFermion> evec(Nm,FGrid);
+  LatticeFermion    src(FrbGrid); gaussian(RNG5rb,src);
-//  for(int i=0;i<Nm;i++){
+  std::vector<LatticeFermion> evec(Nm,FrbGrid);
-//    std::cout << i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl;
+  for(int i=0;i<1;i++){
-//  };
+    std::cout << i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl;
  };
  int Nconv;
  IRL.calc(eval,evec,