diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h deleted file mode 100644 index de3f1790..00000000 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h +++ /dev/null @@ -1,789 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h - - Copyright (C) 2015 - -Author: Peter Boyle -Author: paboyle -Author: Chulwoo Jung -Author: Christoph Lehner - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_BIRL_H -#define GRID_BIRL_H - -#include //memset -//#include -#include - - -namespace Grid { - -template -void basisOrthogonalize(std::vector &basis,Field &w,int k) -{ - for(int j=0; j -void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) -{ - typedef typename Field::vector_object vobj; - GridBase* grid = basis[0]._grid; - - parallel_region - { - std::vector < vobj > B(Nm); // Thread private - - parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ - for(int j=j0; j -void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) -{ - int vlen = idx.size(); - - assert(vlen>=1); - assert(vlen<=sort_vals.size()); - assert(vlen<=_v.size()); - - for (size_t i=0;i i); - ////////////////////////////////////// - // idx[i] is a table of desired sources giving a permutation. - // - // Swap v[i] with v[idx[i]]. - // - // Find j>i for which _vnew[j] = _vold[i], - // track the move idx[j] => idx[i] - // track the move idx[i] => i - ////////////////////////////////////// - size_t j; - for (j=i;j basisSortGetIndex(std::vector& sort_vals) -{ - std::vector idx(sort_vals.size()); - std::iota(idx.begin(), idx.end(), 0); - - // sort indexes based on comparing values in v - std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { - return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); - }); - return idx; -} - -template -void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) -{ - std::vector idx = basisSortGetIndex(sort_vals); - if (reverse) - std::reverse(idx.begin(), idx.end()); - - basisReorderInPlace(_v,sort_vals,idx); -} - -// PAB: faster to compute the inner products first then fuse loops. -// If performance critical can improve. -template -void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { - result = zero; - assert(_v.size()==eval.size()); - int N = (int)_v.size(); - for (int i=0;i -class BlockImplicitlyRestartedLanczos { - private: - const RealD small = 1.0e-8; - int MaxIter; - int MinRestart; // Minimum number of restarts; only check for convergence after - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - // int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk - int Nm; // Nm -- total number of vectors - IRLdiagonalisation diagonalisation; - int orth_period; - - RealD OrthoTime; - RealD eresid, betastp; - //////////////////////////////// - // Embedded objects - //////////////////////////////// - // SortEigen _sort; - LinearFunction &_HermOp; - LinearFunction &_HermOpTest; - ///////////////////////// - // Constructor - ///////////////////////// -public: - BlockImplicitlyRestartedLanczos(LinearFunction & HermOp, - LinearFunction & HermOpTest, - int _Nstop, // sought vecs - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - RealD _betastp, // if beta(k) < betastp: converged - int _MaxIter, // Max iterations - int _MinRestart, int _orth_period = 1, - IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : - _HermOp(HermOp), _HermOpTest(HermOpTest), - Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), - eresid(_eresid), betastp(_betastp), - MaxIter(_MaxIter) , MinRestart(_MinRestart), - orth_period(_orth_period), diagonalisation(_diagonalisation) { }; - - //////////////////////////////// - // Helpers - //////////////////////////////// - template static RealD normalise(T& v) - { - RealD nn = norm2(v); - nn = sqrt(nn); - v = v * (1.0/nn); - return nn; - } - - void orthogonalize(Field& w, std::vector& evec,int k) - { - OrthoTime-=usecond()/1e6; - basisOrthogonalize(evec,w,k); - normalise(w); - OrthoTime+=usecond()/1e6; - } - -/* Rudy Arthur's thesis pp.137 ------------------------- -Require: M > K P = M − K † -Compute the factorization AVM = VM HM + fM eM -repeat - Q=I - for i = 1,...,P do - QiRi =HM −θiI Q = QQi - H M = Q †i H M Q i - end for - βK =HM(K+1,K) σK =Q(M,K) - r=vK+1βK +rσK - VK =VM(1:M)Q(1:M,1:K) - HK =HM(1:K,1:K) - →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM -until convergence -*/ - void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) - { - GridBase *grid = src._grid; - assert(grid == evec[0]._grid); - - GridLogIRL.TimingMode(1); - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; - std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; - std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl; - std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl; - std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl; - if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { - std::cout << GridLogIRL << "Diagonalisation is DSTEGR "< lme(Nm); - std::vector lme2(Nm); - std::vector eval2(Nm); - std::vector eval2_copy(Nm); - Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); - - Field f(grid); - Field v(grid); - int k1 = 1; - int k2 = Nk; - RealD beta_k; - - Nconv = 0; - - // Set initial vector - evec[0] = src; - normalise(evec[0]); - - // Initial Nk steps - OrthoTime=0.; - for(int k=0; k0); - basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis - - std::cout<= MinRestart) { - std::cout << GridLogIRL << "Rotation to test convergence " << std::endl; - - Field ev0_orig(grid); - ev0_orig = evec[0]; - - basisRotate(evec,Qt,0,Nk,0,Nk,Nm); - - { - std::cout << GridLogIRL << "Test convergence" << std::endl; - Field B(grid); - - for(int j = 0; j=Nstop || beta_k < betastp){ - goto converged; - } - - //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; - { - Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk - for (int k=0;k& lmd, - std::vector& lme, - std::vector& evec, - Field& w,int Nm,int k) - { - const RealD tiny = 1.0e-20; - assert( k< Nm ); - - GridStopWatch gsw_op,gsw_o; - - Field& evec_k = evec[k]; - - _HermOp(evec_k,w); - std::cout<0) w -= lme[k-1] * evec[k-1]; - - ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) - RealD alph = real(zalph); - - w = w - alph * evec_k;// 5. wk:=wk−αkvk - - RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop - // 7. vk+1 := wk/βk+1 - - lmd[k] = alph; - lme[k] = beta; - - std::cout<0 && k % orth_period == 0) { - orthogonalize(w,evec,k); // orthonormalise - std::cout<& lmd, std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, // Nm x Nm - GridBase *grid) - { - Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); - - for(int i=0;i eigensolver(TriDiag); - - for (int i = 0; i < Nk; i++) { - lmd[Nk-1-i] = eigensolver.eigenvalues()(i); - } - for (int i = 0; i < Nk; i++) { - for (int j = 0; j < Nk; j++) { - Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); - } - } - } - - /////////////////////////////////////////////////////////////////////////// - // File could end here if settle on Eigen ??? - /////////////////////////////////////////////////////////////////////////// - - void QR_decomp(std::vector& lmd, // Nm - std::vector& lme, // Nm - int Nk, int Nm, // Nk, Nm - Eigen::MatrixXd& Qt, // Nm x Nm matrix - RealD Dsh, int kmin, int kmax) - { - int k = kmin-1; - RealD x; - - RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); - RealD c = ( lmd[k] -Dsh) *Fden; - RealD s = -lme[k] *Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; - - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - x =-s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - - for(int i=0; i& lmd, std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, - GridBase *grid) - { - Qt = Eigen::MatrixXd::Identity(Nm,Nm); - if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { - diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid); - } else if ( diagonalisation == IRLdiagonaliseWithQR ) { - diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid); - } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { - diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid); - } else { - assert(0); - } - } - -#ifdef USE_LAPACK -void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, - double *vl, double *vu, int *il, int *iu, double *abstol, - int *m, double *w, double *z, int *ldz, int *isuppz, - double *work, int *lwork, int *iwork, int *liwork, - int *info); -#endif - -void diagonalize_lapack(std::vector& lmd, - std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd& Qt, - GridBase *grid) -{ -#ifdef USE_LAPACK - const int size = Nm; - int NN = Nk; - double evals_tmp[NN]; - double evec_tmp[NN][NN]; - memset(evec_tmp[0],0,sizeof(double)*NN*NN); - double DD[NN]; - double EE[NN]; - for (int i = 0; i< NN; i++) { - for (int j = i - 1; j <= i + 1; j++) { - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; - } - } - } - int evals_found; - int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - int liwork = 3+NN*10 ; - int iwork[liwork]; - double work[lwork]; - int isuppz[2*NN]; - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - int ifail[NN]; - int info; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - int il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(evals_tmp,0,sizeof(double)*NN); - if ( il <= NN){ - LAPACK_dstegr(&jobz, &range, &NN, - (double*)DD, (double*)EE, - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, evals_tmp, (double*)evec_tmp, &NN, - isuppz, - work, &lwork, iwork, &liwork, - &info); - for (int i = iu-1; i>= il-1; i--){ - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; - if (il>1) evec_tmp[i-(il-1)][j]=0.; - } - } - } - { - grid->GlobalSumVector(evals_tmp,NN); - grid->GlobalSumVector((double*)evec_tmp,NN*NN); - } - } - // Safer to sort instead of just reversing it, - // but the document of the routine says evals are sorted in increasing order. - // qr gives evals in decreasing order. - for(int i=0;i& lmd, std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, - GridBase *grid) - { - int QRiter = 100*Nm; - int kmin = 1; - int kmax = Nk; - - // (this should be more sophisticated) - for(int iter=0; iter= kmin; --j){ - RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); - if(fabs(lme[j-1])+dds > dds){ - kmax = j+1; - goto continued; - } - } - QRiter = iter; - return; - - continued: - for(int j=0; j dds){ - kmin = j+1; - break; - } - } - } - std::cout << GridLogError << "[QL method] Error - Too many iteration: "<Reset(); + StopWatch->Start(); + } + void TimingMode(int on) { + timing_mode = on; + if(on) { + StopWatch = &LocalStopWatch; + Reset(); + } + } friend std::ostream& operator<< (std::ostream& stream, Logger& log){ @@ -117,10 +131,10 @@ public: stream << log.background()<< std::left << log.topName << log.background()<< " : "; stream << log.colour() << std::left << log.name << log.background() << " : "; if ( log.timestamp ) { - StopWatch.Stop(); - GridTime now = StopWatch.Elapsed(); - if ( log.timing_mode==1 ) StopWatch.Reset(); - StopWatch.Start(); + log.StopWatch->Stop(); + GridTime now = log.StopWatch->Elapsed(); + if ( log.timing_mode==1 ) log.StopWatch->Reset(); + log.StopWatch->Start(); stream << log.evidence()<< now << log.background() << " : " ; } stream << log.colour(); diff --git a/lib/util/Init.cc b/lib/util/Init.cc index 1266d34d..031f8f5a 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -208,7 +208,7 @@ static int Grid_is_initialised = 0; void Grid_init(int *argc,char ***argv) { - GridLogger::StopWatch.Start(); + GridLogger::GlobalStopWatch.Start(); std::string arg; diff --git a/tests/lanczos/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc index 7fe37387..544d0358 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -21,7 +21,14 @@ (ortho krylov low poly); and then fix up lowest say 200 eigenvalues by 1 run with high-degree poly (600 could be enough) */ #include -#include +#include +///////////////////////////////////////////////////////////////////////////// +// The following are now decoupled from the Lanczos and deal with grids. +// Safe to replace functionality +///////////////////////////////////////////////////////////////////////////// +#include +#include +#include #include "FieldVectorIO.h" #include "Params.h" @@ -319,7 +326,7 @@ void CoarseGridLanczos(BlockProjector& pr,RealD alpha2,RealD beta,int Npo Op2 = &Op2plain; } ProjectedHermOp,LatticeFermion> Op2nopoly(pr,HermOp); - BlockImplicitlyRestartedLanczos > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2); + ImplicitlyRestartedLanczos > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2); src_coarse = 1.0; @@ -350,7 +357,7 @@ void CoarseGridLanczos(BlockProjector& pr,RealD alpha2,RealD beta,int Npo ) { - IRL2.calc(eval2,coef,src_coarse,Nconv,true,SkipTest2); + IRL2.calc(eval2,coef._v,src_coarse,Nconv,true,SkipTest2); coef.resize(Nstop2); eval2.resize(Nstop2); @@ -641,7 +648,7 @@ int main (int argc, char ** argv) { } // First round of Lanczos to get low mode basis - BlockImplicitlyRestartedLanczos IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1); + ImplicitlyRestartedLanczos IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1); int Nconv; char tag[1024]; @@ -650,7 +657,7 @@ int main (int argc, char ** argv) { if (simple_krylov_basis) { quick_krylov_basis(evec,src,Op1,Nstop1); } else { - IRL1.calc(eval1,evec,src,Nconv,false,1); + IRL1.calc(eval1,evec._v,src,Nconv,false,1); } evec.resize(Nstop1); // and throw away superfluous eval1.resize(Nstop1);