/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h Copyright (C) 2015 Author: Peter Boyle Author: paboyle Author: Chulwoo Jung Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_BIRL_H #define GRID_BIRL_H #include //memset #include #include #include #include #include #include namespace Grid { ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// template class BlockImplicitlyRestartedLanczos { const RealD small = 1.0e-16; public: int lock; int get; int Niter; int converged; int Nminres; // Minimum number of restarts; only check for convergence after int Nstop; // Number of evecs checked for convergence int Nk; // Number of converged sought int Np; // Np -- Number of spare vecs in kryloc space int Nm; // Nm -- total number of vectors int orth_period; RealD OrthoTime; RealD eresid, betastp; SortEigen _sort; LinearFunction &_HermOp; LinearFunction &_HermOpTest; ///////////////////////// // Constructor ///////////////////////// BlockImplicitlyRestartedLanczos( LinearFunction & HermOp, LinearFunction & HermOpTest, int _Nstop, // sought vecs int _Nk, // sought vecs int _Nm, // spare vecs RealD _eresid, // resid in lmdue deficit RealD _betastp, // if beta(k) < betastp: converged int _Niter, // Max iterations int _Nminres, int _orth_period = 1) : _HermOp(HermOp), _HermOpTest(HermOpTest), Nstop(_Nstop), Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), Niter(_Niter), Nminres(_Nminres), orth_period(_orth_period) { Np = Nm-Nk; assert(Np>0); }; BlockImplicitlyRestartedLanczos( LinearFunction & HermOp, LinearFunction & HermOpTest, int _Nk, // sought vecs int _Nm, // spare vecs RealD _eresid, // resid in lmdue deficit RealD _betastp, // if beta(k) < betastp: converged int _Niter, // Max iterations int _Nminres, int _orth_period = 1) : _HermOp(HermOp), _HermOpTest(HermOpTest), Nstop(_Nk), Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), Niter(_Niter), Nminres(_Nminres), orth_period(_orth_period) { Np = Nm-Nk; assert(Np>0); }; /* Saad PP. 195 1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 2. For k = 1,2,...,m Do: 3. wk:=Avk−βkv_{k−1} 4. αk:=(wk,vk) // 5. wk:=wk−αkvk // wk orthog vk 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop 7. vk+1 := wk/βk+1 8. EndDo */ void step(std::vector& lmd, std::vector& lme, BasisFieldVector& evec, Field& w,int Nm,int k) { assert( k< Nm ); GridStopWatch gsw_op,gsw_o; Field& evec_k = evec[k]; gsw_op.Start(); _HermOp(evec_k,w); gsw_op.Stop(); if(k>0){ w -= lme[k-1] * evec[k-1]; } ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) RealD alph = real(zalph); w = w - alph * evec_k;// 5. wk:=wk−αkvk RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop // 7. vk+1 := wk/βk+1 std::cout<0 && k % orth_period == 0) { orthogonalize(w,evec,k); // orthonormalise } gsw_o.Stop(); if(k < Nm-1) { evec[k+1] = w; } std::cout << GridLogMessage << "Timing: operator=" << gsw_op.Elapsed() << " orth=" << gsw_o.Elapsed() << std::endl; } void qr_decomp(std::vector& lmd, std::vector& lme, int Nk, int Nm, std::vector& Qt, RealD Dsh, int kmin, int kmax) { int k = kmin-1; RealD x; RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); RealD c = ( lmd[k] -Dsh) *Fden; RealD s = -lme[k] *Fden; RealD tmpa1 = lmd[k]; RealD tmpa2 = lmd[k+1]; RealD tmpb = lme[k]; lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; x =-s*lme[k+1]; lme[k+1] = c*lme[k+1]; for(int i=0; i& lmd, std::vector& lme, int N1, int N2, std::vector& Qt, GridBase *grid){ std::cout << GridLogMessage << "diagonalize_lapack start\n"; GridStopWatch gsw; const int size = Nm; // tevals.resize(size); // tevecs.resize(size); LAPACK_INT NN = N1; std::vector evals_tmp(NN); std::vector evec_tmp(NN*NN); memset(&evec_tmp[0],0,sizeof(double)*NN*NN); // double AA[NN][NN]; std::vector DD(NN); std::vector EE(NN); for (int i = 0; i< NN; i++) for (int j = i - 1; j <= i + 1; j++) if ( j < NN && j >= 0 ) { if (i==j) DD[i] = lmd[i]; if (i==j) evals_tmp[i] = lmd[i]; if (j==(i-1)) EE[j] = lme[j]; } LAPACK_INT evals_found; LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; LAPACK_INT liwork = 3+NN*10 ; std::vector iwork(liwork); std::vector work(lwork); std::vector isuppz(2*NN); char jobz = 'V'; // calculate evals & evecs char range = 'I'; // calculate all evals // char range = 'A'; // calculate all evals char uplo = 'U'; // refer to upper half of original matrix char compz = 'I'; // Compute eigenvectors of tridiagonal matrix std::vector ifail(NN); LAPACK_INT info; // int total = QMP_get_number_of_nodes(); // int node = QMP_get_node_number(); // GridBase *grid = evec[0]._grid; int total = grid->_Nprocessors; int node = grid->_processor; int interval = (NN/total)+1; double vl = 0.0, vu = 0.0; LAPACK_INT il = interval*node+1 , iu = interval*(node+1); if (iu > NN) iu=NN; double tol = 0.0; if (1) { memset(&evals_tmp[0],0,sizeof(double)*NN); if ( il <= NN){ std::cout << GridLogMessage << "dstegr started" << std::endl; gsw.Start(); dstegr(&jobz, &range, &NN, (double*)&DD[0], (double*)&EE[0], &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' &tol, // tolerance &evals_found, &evals_tmp[0], (double*)&evec_tmp[0], &NN, &isuppz[0], &work[0], &lwork, &iwork[0], &liwork, &info); gsw.Stop(); std::cout << GridLogMessage << "dstegr completed in " << gsw.Elapsed() << std::endl; for (int i = iu-1; i>= il-1; i--){ evals_tmp[i] = evals_tmp[i - (il-1)]; if (il>1) evals_tmp[i-(il-1)]=0.; for (int j = 0; j< NN; j++){ evec_tmp[i*NN + j] = evec_tmp[(i - (il-1)) * NN + j]; if (il>1) evec_tmp[(i-(il-1)) * NN + j]=0.; } } } { // QMP_sum_double_array(evals_tmp,NN); // QMP_sum_double_array((double *)evec_tmp,NN*NN); grid->GlobalSumVector(&evals_tmp[0],NN); grid->GlobalSumVector(&evec_tmp[0],NN*NN); } } // cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. for(int i=0;i& lmd, std::vector& lme, int N2, int N1, std::vector& Qt, GridBase *grid) { #ifdef USE_LAPACK_IRL const int check_lapack=0; // just use lapack if 0, check against lapack if 1 if(!check_lapack) return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid); std::vector lmd2(N1); std::vector lme2(N1); std::vector Qt2(N1*N1); for(int k=0; k= kmin; --j){ RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); if(fabs(lme[j-1])+dds > dds){ kmax = j+1; goto continued; } } Niter = iter; #ifdef USE_LAPACK_IRL if(check_lapack){ const double SMALL=1e-8; diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid); std::vector lmd3(N2); for(int k=0; kSMALL) std::cout<SMALL) std::cout<SMALL) std::cout< dds){ kmin = j+1; break; } } } std::cout< static RealD normalise(T& v) { RealD nn = norm2(v); nn = sqrt(nn); v = v * (1.0/nn); return nn; } void orthogonalize(Field& w, BasisFieldVector& evec, int k) { double t0=-usecond()/1e6; evec.orthogonalize(w,k); normalise(w); t0+=usecond()/1e6; OrthoTime +=t0; } void setUnit_Qt(int Nm, std::vector &Qt) { for(int i=0; i K P = M − K † Compute the factorization AVM = VM HM + fM eM repeat Q=I for i = 1,...,P do QiRi =HM −θiI Q = QQi H M = Q †i H M Q i end for βK =HM(K+1,K) σK =Q(M,K) r=vK+1βK +rσK VK =VM(1:M)Q(1:M,1:K) HK =HM(1:K,1:K) →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ void calc(std::vector& eval, BasisFieldVector& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) { GridBase *grid = evec._v[0]._grid;//evec.get(0 + evec_offset)._grid; assert(grid == src._grid); std::cout< lme(Nm); std::vector lme2(Nm); std::vector eval2(Nm); std::vector eval2_copy(Nm); std::vector Qt(Nm*Nm); Field f(grid); Field v(grid); int k1 = 1; int k2 = Nk; Nconv = 0; RealD beta_k; // Set initial vector evec[0] = src; normalise(evec[0]); std:: cout<0); evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm); t1=usecond()/1e6; std::cout<= Nminres) { std::cout << GridLogMessage << "Rotation to test convergence " << std::endl; Field ev0_orig(grid); ev0_orig = evec[0]; evec.rotate(Qt,0,Nk,0,Nk,Nm); { std::cout << GridLogMessage << "Test convergence" << std::endl; Field B(grid); for(int j = 0; j=Nstop || beta_k < betastp){ goto converged; } std::cout << GridLogMessage << "Rotate back" << std::endl; //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; { Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); for (int k=0;k QtI(Nm*Nm); for (int k=0;k