From 9f0d9ade68249642215e1215b27d46d6e9ffc2b7 Mon Sep 17 00:00:00 2001 From: Jung Date: Sat, 20 Feb 2016 02:50:32 -0500 Subject: [PATCH] Added configure flag for LAPACK. Tested ImplicitlyRestartedLanczos::calc() Checking in before cleaning up --- configure | 56 ++++++++ configure.ac | 22 +++ lib/algorithms/LinearOperator.h | 9 +- lib/algorithms/approx/Chebyshev.h | 2 + .../iterative/ConjugateGradientMultiShift.h | 2 +- .../iterative/ImplicitlyRestartedLanczos.h | 127 +++++++++++++----- lib/qcd/action/Actions.h | 2 +- tests/Make.inc | 6 + tests/Makefile.am | 10 ++ tests/Test_dwf_lanczos.cc | 28 ++-- 10 files changed, 212 insertions(+), 52 deletions(-) diff --git a/configure b/configure index 8fca7966..9b80c73f 100755 --- a/configure +++ b/configure @@ -626,6 +626,10 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS +USE_LAPACK_LIB_FALSE +USE_LAPACK_LIB_TRUE +USE_LAPACK_FALSE +USE_LAPACK_TRUE BUILD_CHROMA_REGRESSION_FALSE BUILD_CHROMA_REGRESSION_TRUE BUILD_COMMS_NONE_FALSE @@ -751,6 +755,7 @@ enable_precision enable_comms enable_rng enable_chroma +enable_lapack ' ac_precious_vars='build_alias host_alias @@ -1399,6 +1404,7 @@ Optional Features: --enable-rng=ranlux48|mt19937 Select Random Number Generator to be used --enable-chroma Expect chroma compiled under c++11 + --enable-lapack Location of lapack Some influential environment variables: CXX C++ compiler command @@ -6629,6 +6635,47 @@ else fi +# +# Chroma regression tests +# +# Check whether --enable-lapack was given. +if test "${enable_lapack+set}" = set; then : + enableval=$enable_lapack; ac_LAPACK=${enable_lapack} +else + ac_LAPACK=no +fi + + +case ${ac_LAPACK} in + yes) + echo Enabling lapack + ;; + no) + echo Disabling lapack + ;; + *) + echo Enabling lapack at ${ac_LAPACK} +# AC_MSG_ERROR([Enabli${ac_LAPACK} unsupported --enable-lapack option]); + ;; +esac + + if test "X${ac_LAPACK}X" != "XnoX" ; then + USE_LAPACK_TRUE= + USE_LAPACK_FALSE='#' +else + USE_LAPACK_TRUE='#' + USE_LAPACK_FALSE= +fi + + if test "X${ac_LAPACK}X" != "XyesX" ; then + USE_LAPACK_LIB_TRUE= + USE_LAPACK_LIB_FALSE='#' +else + USE_LAPACK_LIB_TRUE='#' + USE_LAPACK_LIB_FALSE= +fi + + ################################################################### # Checks for doxygen support # if present enables the "make doxyfile" command @@ -6808,6 +6855,14 @@ if test -z "${BUILD_CHROMA_REGRESSION_TRUE}" && test -z "${BUILD_CHROMA_REGRESSI as_fn_error $? "conditional \"BUILD_CHROMA_REGRESSION\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${USE_LAPACK_TRUE}" && test -z "${USE_LAPACK_FALSE}"; then + as_fn_error $? "conditional \"USE_LAPACK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${USE_LAPACK_LIB_TRUE}" && test -z "${USE_LAPACK_LIB_FALSE}"; then + as_fn_error $? "conditional \"USE_LAPACK_LIB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 @@ -8154,6 +8209,7 @@ The following features are enabled: - communications type : ${ac_COMMS} - default precision : ${ac_PRECISION} - RNG choice : ${ac_RNG} +- LAPACK : ${ac_LAPACK} " diff --git a/configure.ac b/configure.ac index 9e6464c4..63667c5c 100644 --- a/configure.ac +++ b/configure.ac @@ -222,6 +222,27 @@ esac AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ]) +# +# Chroma regression tests +# +AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Location of lapack ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no]) + +case ${ac_LAPACK} in + yes) + echo Enabling lapack + ;; + no) + echo Disabling lapack + ;; + *) + echo Enabling lapack at ${ac_LAPACK} +# AC_MSG_ERROR([Enabli${ac_LAPACK} unsupported --enable-lapack option]); + ;; +esac + +AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ]) +AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ]) + ################################################################### # Checks for doxygen support # if present enables the "make doxyfile" command @@ -265,6 +286,7 @@ The following features are enabled: - communications type : ${ac_COMMS} - default precision : ${ac_PRECISION} - RNG choice : ${ac_RNG} +- LAPACK : ${ac_LAPACK} " diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 6706a0b3..ea47d43b 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -222,6 +222,7 @@ namespace Grid { SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; virtual RealD Mpc (const Field &in, Field &out) { Field tmp(in._grid); +// std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl; _Mat.Meooe(in,tmp); _Mat.MooeeInv(tmp,out); @@ -251,10 +252,10 @@ namespace Grid { virtual RealD Mpc (const Field &in, Field &out) { Field tmp(in._grid); - _Mat.Meooe(in,tmp); - _Mat.MooeeInv(tmp,out); - _Mat.Meooe(out,tmp); - _Mat.MooeeInv(tmp,out); + _Mat.Meooe(in,out); + _Mat.MooeeInv(out,tmp); + _Mat.Meooe(tmp,out); + _Mat.MooeeInv(out,tmp); return axpy_norm(out,-1.0,tmp,in); } diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index 5f6c1607..96a75a92 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -198,6 +198,8 @@ namespace Grid { void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { GridBase *grid=in._grid; +//std::cout << "Chevyshef(): in._grid="< #define GRID_IRL_H #include //memset -#include //memset +#ifdef USE_LAPACK +#include +#endif #include #include @@ -60,7 +62,7 @@ public: SortEigen _sort; - GridCartesian &_fgrid; +// GridCartesian &_fgrid; LinearOperatorBase &_Linop; @@ -72,14 +74,14 @@ public: void init(void){}; void Abort(int ff, DenseVector &evals, DenseVector > &evecs); - ImplicitlyRestartedLanczos(GridCartesian &fgrid, LinearOperatorBase &Linop, // op + ImplicitlyRestartedLanczos( + LinearOperatorBase &Linop, // op OperatorFunction & poly, // polynmial int _Nstop, // sought vecs int _Nk, // sought vecs int _Nm, // spare vecs RealD _eresid, // resid in lmdue deficit int _Niter) : // Max iterations - _fgrid(fgrid), _Linop(Linop), _poly(poly), Nstop(_Nstop), @@ -91,6 +93,24 @@ public: Np = Nm-Nk; assert(Np>0); }; + ImplicitlyRestartedLanczos( + LinearOperatorBase &Linop, // op + OperatorFunction & poly, // polynmial + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + int _Niter) : // Max iterations + _Linop(Linop), + _poly(poly), + Nstop(_Nk), + Nk(_Nk), + Nm(_Nm), + eresid(_eresid), + Niter(_Niter) + { + Np = Nm-Nk; assert(Np>0); + }; + ///////////////////////// // Sanity checked this routine (step) against Saad. ///////////////////////// @@ -228,16 +248,17 @@ public: } } -//int lapackcj_Wilkinson(Matrix &AH, vector &tevals, vector > &tevecs, double small) { +#ifdef USE_LAPACK void diagonalize_lapack(DenseVector& lmd, DenseVector& lme, - int Nm2, - int Nm, - DenseVector& Qt){ + int N1, + int N2, + DenseVector& Qt, + GridBase *grid){ const int size = Nm; // tevals.resize(size); // tevecs.resize(size); - int NN = Nm; + int NN = N1; double evals_tmp[NN]; double evec_tmp[NN][NN]; memset(evec_tmp[0],0,sizeof(double)*NN*NN); @@ -266,8 +287,9 @@ public: int info; // int total = QMP_get_number_of_nodes(); // int node = QMP_get_node_number(); - int total = _fgrid._Nprocessors; - int node = _fgrid._processor; +// GridBase *grid = evec[0]._grid; + int total = grid->_Nprocessors; + int node = grid->_processor; int interval = (NN/total)+1; double vl = 0.0, vu = 0.0; int il = interval*node+1 , iu = interval*(node+1); @@ -298,40 +320,50 @@ public: { // QMP_sum_double_array(evals_tmp,NN); // QMP_sum_double_array((double *)evec_tmp,NN*NN); - _fgrid.GlobalSumVector(evals_tmp,NN); - _fgrid.GlobalSumVector((double*)evec_tmp,NN*NN); + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,NN*NN); } } +// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. for(int i=0;i& lmd, DenseVector& lme, - int Nm2, - int Nm, - DenseVector& Qt) + int N2, + int N1, + DenseVector& Qt, + GridBase *grid) { -if(1) -{ - DenseVector lmd2(Nm); - DenseVector lme2(Nm); - for(int k=0; k lmd2(N1); + DenseVector lme2(N1); + DenseVector Qt2(N1*N1); + for(int k=0; k= kmin; --j){ @@ -354,6 +386,23 @@ if(1) } } Niter = iter; +#ifdef USE_LAPACK + if(check_lapack){ + const double SMALL=1e-8; + diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid); + DenseVector lmd3(N2); + for(int k=0; kSMALL) std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <SMALL) std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <SMALL) std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] < &H, ///Hess mtx } } +#endif }; diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index d4a605aa..eafe3f67 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -209,7 +209,7 @@ typedef WilsonFermion GparityWilsonFermionD; typedef DomainWallFermion GparityDomainWallFermionR; typedef DomainWallFermion GparityDomainWallFermionF; typedef DomainWallFermion GparityDomainWallFermionD; -#if 0 +#if 1 typedef WilsonTMFermion GparityWilsonTMFermionR; typedef WilsonTMFermion GparityWilsonTMFermionF; typedef WilsonTMFermion GparityWilsonTMFermionD; diff --git a/tests/Make.inc b/tests/Make.inc index f7c83671..dc109d38 100644 --- a/tests/Make.inc +++ b/tests/Make.inc @@ -96,6 +96,9 @@ Test_dwf_hdcr_LDADD=-lGrid Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc Test_dwf_lanczos_LDADD=-lGrid +if USE_LAPACK +Test_dwf_lanczos_LDADD +=-lGrid -llapack -llapacke +endif Test_gamma_SOURCES=Test_gamma.cc @@ -232,6 +235,9 @@ Test_stencil_LDADD=-lGrid Test_synthetic_lanczos_SOURCES=Test_synthetic_lanczos.cc Test_synthetic_lanczos_LDADD=-lGrid +if USE_LAPACK +Test_synthetic_lanczos_LDADD +=-lGrid -llapack -llapacke +endif Test_wilson_cg_prec_SOURCES=Test_wilson_cg_prec.cc diff --git a/tests/Makefile.am b/tests/Makefile.am index 336cb6e7..5dff7e25 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -8,6 +8,16 @@ endif AM_CXXFLAGS = -I$(top_srcdir)/lib AM_LDFLAGS = -L$(top_builddir)/lib +if USE_LAPACK +AM_CXXFLAGS += -DUSE_LAPACK +if USE_LAPACK_LIB +#if test "X${ac_LAPACK}X" != XyesX +AM_CXXFLAGS += -I$(ac_LAPACK)/include +AM_LDFLAGS += -L$(ac_LAPACK)/lib +#fi +endif +endif + if BUILD_ZMM bin_PROGRAMS=Test_zmm endif diff --git a/tests/Test_dwf_lanczos.cc b/tests/Test_dwf_lanczos.cc index f9bf03ac..27d23fba 100644 --- a/tests/Test_dwf_lanczos.cc +++ b/tests/Test_dwf_lanczos.cc @@ -43,13 +43,14 @@ int main (int argc, char ** argv) GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid); std::vector seeds4({1,2,3,4}); std::vector seeds5({5,6,7,8}); GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); - LatticeFermion src(FGrid); gaussian(RNG5,src); LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4, Umu); @@ -58,33 +59,36 @@ int main (int argc, char ** argv) U[mu] = PeekIndex(Umu,mu); } - RealD mass=0.1; + RealD mass=0.01; RealD M5=1.8; DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - MdagMLinearOperator HermOp(Ddwf); +// MdagMLinearOperator HermOp(Ddwf); +// SchurDiagTwoOperator HermOp(Ddwf); + SchurDiagMooeeOperator HermOp(Ddwf); const int Nstop = 30; const int Nk = 40; - const int Np = 10; + const int Np = 40; const int Nm = Nk+Np; const int MaxIt= 10000; RealD resid = 1.0e-8; - std::vector Coeffs { 0.,1.}; + std::vector Coeffs { 0.,-1.}; Polynomial PolyX(Coeffs); - Chebyshev Cheb(1,80.,11); + Chebyshev Cheb(1,80.,21); // ChebyshevLanczos Cheb(9.,1.,0.,20); - Cheb.csv(std::cout); +// Cheb.csv(std::cout); // exit(-24); - ImplicitlyRestartedLanczos IRL(*FGrid,HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt); + ImplicitlyRestartedLanczos IRL(HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt); std::vector eval(Nm); - std::vector evec(Nm,FGrid); -// for(int i=0;i evec(Nm,FrbGrid); + for(int i=0;i<1;i++){ + std::cout << i<<" / "<< Nm<< " grid pointer "<