/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h Copyright (C) 2015 Author: Peter Boyle Author: paboyle Author: Chulwoo Jung Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_BIRL_H #define GRID_BIRL_H #include //memset //#include #include namespace Grid { //////////////////////////////////////////////////////// // Move following 100 LOC to lattice/Lattice_basis.h //////////////////////////////////////////////////////// template void basisOrthogonalize(std::vector &basis,Field &w,int k) { for(int j=0; j void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { typedef typename Field::vector_object vobj; GridBase* grid = basis[0]._grid; parallel_region { std::vector < vobj > B(Nm); // Thread private parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ for(int j=j0; j void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) { typedef typename Field::vector_object vobj; GridBase* grid = basis[0]._grid; result.checkerboard = basis[0].checkerboard; parallel_for(int ss=0;ss < grid->oSites();ss++){ vobj B = zero; for(int k=k0; k void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) { int vlen = idx.size(); assert(vlen>=1); assert(vlen<=sort_vals.size()); assert(vlen<=_v.size()); for (size_t i=0;ii for which _vnew[j] = _vold[i], // track the move idx[j] => idx[i] // track the move idx[i] => i ////////////////////////////////////// size_t j; for (j=i;j i); assert(j!=idx.size()); assert(idx[j]==i); std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy std::swap(sort_vals[i],sort_vals[idx[i]]); idx[j] = idx[i]; idx[i] = i; } } } inline std::vector basisSortGetIndex(std::vector& sort_vals) { std::vector idx(sort_vals.size()); std::iota(idx.begin(), idx.end(), 0); // sort indexes based on comparing values in v std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); }); return idx; } template void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) { std::vector idx = basisSortGetIndex(sort_vals); if (reverse) std::reverse(idx.begin(), idx.end()); basisReorderInPlace(_v,sort_vals,idx); } // PAB: faster to compute the inner products first then fuse loops. // If performance critical can improve. template void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { result = zero; assert(_v.size()==eval.size()); int N = (int)_v.size(); for (int i=0;i class ImplicitlyRestartedLanczosTester { public: virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); }; enum IRLdiagonalisation { IRLdiagonaliseWithDSTEGR, IRLdiagonaliseWithQR, IRLdiagonaliseWithEigen }; template class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester { public: LinearFunction &_HermOp; ImplicitlyRestartedLanczosHermOpTester(LinearFunction &HermOp) : _HermOp(HermOp) { }; int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) { return TestConvergence(j,resid,B,eval,evalMaxApprox); } int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox) { Field v(B); RealD eval_poly = eval; // Apply operator _HermOp(B,v); RealD vnum = real(innerProduct(B,v)); // HermOp. RealD vden = norm2(B); RealD vv0 = norm2(v); eval = vnum/vden; v -= eval*B; RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); std::cout.precision(13); std::cout< class ImplicitlyRestartedLanczos { private: const RealD small = 1.0e-8; int MaxIter; int MinRestart; // Minimum number of restarts; only check for convergence after int Nstop; // Number of evecs checked for convergence int Nk; // Number of converged sought // int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk int Nm; // Nm -- total number of vectors IRLdiagonalisation diagonalisation; int orth_period; RealD OrthoTime; RealD eresid, betastp; //////////////////////////////// // Embedded objects //////////////////////////////// LinearFunction &_PolyOp; LinearFunction &_HermOp; ImplicitlyRestartedLanczosTester &_Tester; // Default tester provided (we need a ref to something in default case) ImplicitlyRestartedLanczosHermOpTester SimpleTester; ///////////////////////// // Constructor ///////////////////////// public: ////////////////////////////////////////////////////////////////// // PAB: ////////////////////////////////////////////////////////////////// // Too many options & knobs. // Eliminate: // orth_period // betastp // MinRestart // // Do we really need orth_period // What is the theoretical basis & guarantees of betastp ? // Nstop=Nk viable? // MinRestart avoidable with new convergence test? // Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation) // HermOp could be eliminated if we dropped the Power method for max eval. // -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear ////////////////////////////////////////////////////////////////// ImplicitlyRestartedLanczos(LinearFunction & PolyOp, LinearFunction & HermOp, ImplicitlyRestartedLanczosTester & Tester, int _Nstop, // sought vecs int _Nk, // sought vecs int _Nm, // spare vecs RealD _eresid, // resid in lmdue deficit int _MaxIter, // Max iterations RealD _betastp=0.0, // if beta(k) < betastp: converged int _MinRestart=1, int _orth_period = 1, IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), MaxIter(_MaxIter) , MinRestart(_MinRestart), orth_period(_orth_period), diagonalisation(_diagonalisation) { }; ImplicitlyRestartedLanczos(LinearFunction & PolyOp, LinearFunction & HermOp, int _Nstop, // sought vecs int _Nk, // sought vecs int _Nm, // spare vecs RealD _eresid, // resid in lmdue deficit int _MaxIter, // Max iterations RealD _betastp=0.0, // if beta(k) < betastp: converged int _MinRestart=1, int _orth_period = 1, IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), MaxIter(_MaxIter) , MinRestart(_MinRestart), orth_period(_orth_period), diagonalisation(_diagonalisation) { }; //////////////////////////////// // Helpers //////////////////////////////// template static RealD normalise(T& v) { RealD nn = norm2(v); nn = sqrt(nn); v = v * (1.0/nn); return nn; } void orthogonalize(Field& w, std::vector& evec,int k) { OrthoTime-=usecond()/1e6; basisOrthogonalize(evec,w,k); normalise(w); OrthoTime+=usecond()/1e6; } /* Rudy Arthur's thesis pp.137 ------------------------ Require: M > K P = M − K † Compute the factorization AVM = VM HM + fM eM repeat Q=I for i = 1,...,P do QiRi =HM −θiI Q = QQi H M = Q †i H M Q i end for βK =HM(K+1,K) σK =Q(M,K) r=vK+1βK +rσK VK =VM(1:M)Q(1:M,1:K) HK =HM(1:K,1:K) →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse=false) { GridBase *grid = src._grid; assert(grid == evec[0]._grid); GridLogIRL.TimingMode(1); std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl; std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl; std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl; if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { std::cout << GridLogIRL << "Diagonalisation is DSTEGR "< lme(Nm); std::vector lme2(Nm); std::vector eval2(Nm); std::vector eval2_copy(Nm); Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); Field f(grid); Field v(grid); int k1 = 1; int k2 = Nk; RealD beta_k; Nconv = 0; // Set initial vector evec[0] = src; normalise(evec[0]); // Initial Nk steps OrthoTime=0.; for(int k=0; k()); std::cout<0); basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis std::cout<= MinRestart) { std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl; Field B(grid); B.checkerboard = evec[0].checkerboard; // power of two search pattern; not every evalue in eval2 is assessed. for(int jj = 1; jj<=Nstop; jj*=2){ int j = Nstop-jj; RealD e = eval2_copy[j]; // Discard the evalue basisRotateJ(B,evec,Qt,j,0,Nk,Nm); if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { if ( j > Nconv ) { Nconv=j+1; jj=Nstop; // Terminate the scan } } } // Do evec[0] for good measure { int j=0; RealD e = eval2_copy[0]; basisRotateJ(B,evec,Qt,j,0,Nk,Nm); _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox); } // test if we converged, if so, terminate std::cout<= "<=Nstop || beta_k < betastp){ if( Nconv>=Nstop){ goto converged; } } else { std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n"; } // end of iter loop } std::cout<& lmd, std::vector& lme, std::vector& evec, Field& w,int Nm,int k) { const RealD tiny = 1.0e-20; assert( k< Nm ); GridStopWatch gsw_op,gsw_o; Field& evec_k = evec[k]; _PolyOp(evec_k,w); std::cout<0) w -= lme[k-1] * evec[k-1]; ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) RealD alph = real(zalph); w = w - alph * evec_k;// 5. wk:=wk−αkvk RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop // 7. vk+1 := wk/βk+1 lmd[k] = alph; lme[k] = beta; if (k>0 && k % orth_period == 0) { orthogonalize(w,evec,k); // orthonormalise std::cout<& lmd, std::vector& lme, int Nk, int Nm, Eigen::MatrixXd & Qt, // Nm x Nm GridBase *grid) { Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); for(int i=0;i eigensolver(TriDiag); for (int i = 0; i < Nk; i++) { lmd[Nk-1-i] = eigensolver.eigenvalues()(i); } for (int i = 0; i < Nk; i++) { for (int j = 0; j < Nk; j++) { Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); } } } /////////////////////////////////////////////////////////////////////////// // File could end here if settle on Eigen ??? !!! /////////////////////////////////////////////////////////////////////////// void QR_decomp(std::vector& lmd, // Nm std::vector& lme, // Nm int Nk, int Nm, // Nk, Nm Eigen::MatrixXd& Qt, // Nm x Nm matrix RealD Dsh, int kmin, int kmax) { int k = kmin-1; RealD x; RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); RealD c = ( lmd[k] -Dsh) *Fden; RealD s = -lme[k] *Fden; RealD tmpa1 = lmd[k]; RealD tmpa2 = lmd[k+1]; RealD tmpb = lme[k]; lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; x =-s*lme[k+1]; lme[k+1] = c*lme[k+1]; for(int i=0; i& lmd, std::vector& lme, int Nk, int Nm, Eigen::MatrixXd & Qt, GridBase *grid) { Qt = Eigen::MatrixXd::Identity(Nm,Nm); if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid); } else if ( diagonalisation == IRLdiagonaliseWithQR ) { diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid); } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid); } else { assert(0); } } #ifdef USE_LAPACK void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, double *vl, double *vu, int *il, int *iu, double *abstol, int *m, double *w, double *z, int *ldz, int *isuppz, double *work, int *lwork, int *iwork, int *liwork, int *info); #endif void diagonalize_lapack(std::vector& lmd, std::vector& lme, int Nk, int Nm, Eigen::MatrixXd& Qt, GridBase *grid) { #ifdef USE_LAPACK const int size = Nm; int NN = Nk; double evals_tmp[NN]; double evec_tmp[NN][NN]; memset(evec_tmp[0],0,sizeof(double)*NN*NN); double DD[NN]; double EE[NN]; for (int i = 0; i< NN; i++) { for (int j = i - 1; j <= i + 1; j++) { if ( j < NN && j >= 0 ) { if (i==j) DD[i] = lmd[i]; if (i==j) evals_tmp[i] = lmd[i]; if (j==(i-1)) EE[j] = lme[j]; } } } int evals_found; int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; int liwork = 3+NN*10 ; int iwork[liwork]; double work[lwork]; int isuppz[2*NN]; char jobz = 'V'; // calculate evals & evecs char range = 'I'; // calculate all evals // char range = 'A'; // calculate all evals char uplo = 'U'; // refer to upper half of original matrix char compz = 'I'; // Compute eigenvectors of tridiagonal matrix int ifail[NN]; int info; int total = grid->_Nprocessors; int node = grid->_processor; int interval = (NN/total)+1; double vl = 0.0, vu = 0.0; int il = interval*node+1 , iu = interval*(node+1); if (iu > NN) iu=NN; double tol = 0.0; if (1) { memset(evals_tmp,0,sizeof(double)*NN); if ( il <= NN){ LAPACK_dstegr(&jobz, &range, &NN, (double*)DD, (double*)EE, &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' &tol, // tolerance &evals_found, evals_tmp, (double*)evec_tmp, &NN, isuppz, work, &lwork, iwork, &liwork, &info); for (int i = iu-1; i>= il-1; i--){ evals_tmp[i] = evals_tmp[i - (il-1)]; if (il>1) evals_tmp[i-(il-1)]=0.; for (int j = 0; j< NN; j++){ evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; if (il>1) evec_tmp[i-(il-1)][j]=0.; } } } { grid->GlobalSumVector(evals_tmp,NN); grid->GlobalSumVector((double*)evec_tmp,NN*NN); } } // Safer to sort instead of just reversing it, // but the document of the routine says evals are sorted in increasing order. // qr gives evals in decreasing order. for(int i=0;i& lmd, std::vector& lme, int Nk, int Nm, Eigen::MatrixXd & Qt, GridBase *grid) { int QRiter = 100*Nm; int kmin = 1; int kmax = Nk; // (this should be more sophisticated) for(int iter=0; iter= kmin; --j){ RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); if(fabs(lme[j-1])+dds > dds){ kmax = j+1; goto continued; } } QRiter = iter; return; continued: for(int j=0; j dds){ kmin = j+1; break; } } } std::cout << GridLogError << "[QL method] Error - Too many iteration: "<