/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h Copyright (C) 2015 Author: Peter Boyle Author: paboyle Author: Chulwoo Jung This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_LANC_H #define GRID_LANC_H #include //memset #ifdef USE_LAPACK #ifdef USE_MKL #include #else void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, double *vl, double *vu, int *il, int *iu, double *abstol, int *m, double *w, double *z, int *ldz, int *isuppz, double *work, int *lwork, int *iwork, int *liwork, int *info); //#include #endif #endif #include #include // eliminate temorary vector in calc() #define MEM_SAVE namespace Grid { struct Bisection { #if 0 static void get_eig2(int row_num,std::vector &ALPHA,std::vector &BETA, std::vector & eig) { int i,j; std::vector evec1(row_num+3); std::vector evec2(row_num+3); RealD eps2; ALPHA[1]=0.; BETHA[1]=0.; for(i=0;imag(evec2[i+1])) { swap(evec2+i,evec2+i+1); swapped=1; } } end--; for(i=end-1;i>=begin;i--){ if(mag(evec2[i])>mag(evec2[i+1])) { swap(evec2+i,evec2+i+1); swapped=1; } } begin++; } for(i=0;i &c, std::vector &b, int n, int m1, int m2, RealD eps1, RealD relfeh, std::vector &x, RealD &eps2) { std::vector wu(n+2); RealD h,q,x1,xu,x0,xmin,xmax; int i,a,k; b[1]=0.0; xmin=c[n]-fabs(b[n]); xmax=c[n]+fabs(b[n]); for(i=1;ixmax) xmax= c[i]+h; if(c[i]-h0.0 ? xmax : -xmin); if(eps1<=0.0) eps1=eps2; eps2=0.5*eps1+7.0*(eps2); x0=xmax; for(i=m1;i<=m2;i++){ x[i]=xmax; wu[i]=xmin; } for(k=m2;k>=m1;k--){ xu=xmin; i=k; do{ if(xu=m1); if(x0>x[k]) x0=x[k]; while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){ x1=(xu+x0)/2; a=0; q=1.0; for(i=1;i<=n;i++){ q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh); if(q<0) a++; } // printf("x1=%0.14e a=%d\n",x1,a); if(ax1) x[a]=x1; } }else x0=x1; } printf("x0=%0.14e xu=%0.14e k=%d\n",x0,xu,k); x[k]=(x0+xu)/2; } } }; ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// template class SimpleLanczos { const RealD small = 1.0e-16; public: int lock; int get; int Niter; int converged; int Nstop; // Number of evecs checked for convergence int Nk; // Number of converged sought int Np; // Np -- Number of spare vecs in kryloc space int Nm; // Nm -- total number of vectors RealD OrthoTime; RealD eresid; SortEigen _sort; LinearOperatorBase &_Linop; OperatorFunction &_poly; ///////////////////////// // Constructor ///////////////////////// void init(void){}; void Abort(int ff, DenseVector &evals, DenseVector > &evecs); SimpleLanczos( LinearOperatorBase &Linop, // op OperatorFunction & poly, // polynmial int _Nstop, // sought vecs int _Nk, // sought vecs int _Nm, // spare vecs RealD _eresid, // resid in lmdue deficit int _Niter) : // Max iterations _Linop(Linop), _poly(poly), Nstop(_Nstop), Nk(_Nk), Nm(_Nm), eresid(_eresid), Niter(_Niter) { Np = Nm-Nk; assert(Np>0); }; ///////////////////////// // Sanity checked this routine (step) against Saad. ///////////////////////// void RitzMatrix(DenseVector& evec,int k){ if(1) return; GridBase *grid = evec[0]._grid; Field w(grid); std::cout<1 ) { if (abs(in) >1.0e-9 ) { std::cout<& lmd, DenseVector& lme, Field& last, Field& current, Field & next, uint64_t k) { if (lmd.size()<=k) lmd.resize(k+Nm); if (lme.size()<=k) lme.resize(k+Nm); _poly(_Linop,current,next ); // 3. wk:=Avk−βkv_{k−1} if(k>0){ next -= lme[k-1] * last; } // std::cout<" << innerProduct(last,next) <" << innerProduct(current,next) <& lmd, DenseVector& lme, int Nk, int Nm, DenseVector& Qt, RealD Dsh, int kmin, int kmax) { int k = kmin-1; RealD x; RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); RealD c = ( lmd[k] -Dsh) *Fden; RealD s = -lme[k] *Fden; RealD tmpa1 = lmd[k]; RealD tmpa2 = lmd[k+1]; RealD tmpb = lme[k]; lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; x =-s*lme[k+1]; lme[k+1] = c*lme[k+1]; for(int i=0; i& lmd, DenseVector& lme, int N1, // all int N2, // get GridBase *grid) { const int size = Nm; LAPACK_INT NN = N1; double evals_tmp[NN]; double DD[NN]; double EE[NN]; for (int i = 0; i< NN; i++) for (int j = i - 1; j <= i + 1; j++) if ( j < NN && j >= 0 ) { if (i==j) DD[i] = lmd[i]; if (i==j) evals_tmp[i] = lmd[i]; if (j==(i-1)) EE[j] = lme[j]; } LAPACK_INT evals_found; LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; LAPACK_INT liwork = 3+NN*10 ; LAPACK_INT iwork[liwork]; double work[lwork]; LAPACK_INT isuppz[2*NN]; char jobz = 'N'; // calculate evals only char range = 'I'; // calculate il-th to iu-th evals // char range = 'A'; // calculate all evals char uplo = 'U'; // refer to upper half of original matrix char compz = 'I'; // Compute eigenvectors of tridiagonal matrix int ifail[NN]; LAPACK_INT info; // int total = QMP_get_number_of_nodes(); // int node = QMP_get_node_number(); // GridBase *grid = evec[0]._grid; int total = grid->_Nprocessors; int node = grid->_processor; int interval = (NN/total)+1; double vl = 0.0, vu = 0.0; LAPACK_INT il = interval*node+1 , iu = interval*(node+1); if (iu > NN) iu=NN; double tol = 0.0; if (1) { memset(evals_tmp,0,sizeof(double)*NN); if ( il <= NN){ printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); #ifdef USE_MKL dstegr(&jobz, &range, &NN, #else LAPACK_dstegr(&jobz, &range, &NN, #endif (double*)DD, (double*)EE, &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' &tol, // tolerance &evals_found, evals_tmp, (double*)NULL, &NN, isuppz, work, &lwork, iwork, &liwork, &info); for (int i = iu-1; i>= il-1; i--){ printf("node=%d evals_found=%d evals_tmp[%d] = %g\n",node,evals_found, i - (il-1),evals_tmp[i - (il-1)]); evals_tmp[i] = evals_tmp[i - (il-1)]; if (il>1) evals_tmp[i-(il-1)]=0.; } } { grid->GlobalSumVector(evals_tmp,NN); } } // cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. } #undef LAPACK_INT #endif void diagonalize(DenseVector& lmd, DenseVector& lme, int N2, int N1, GridBase *grid) { #ifdef USE_LAPACK const int check_lapack=0; // just use lapack if 0, check against lapack if 1 if(!check_lapack) return diagonalize_lapack(lmd,lme,N2,N1,grid); // diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid); #endif } #if 1 static RealD normalise(Field& v) { RealD nn = norm2(v); nn = sqrt(nn); v = v * (1.0/nn); return nn; } void orthogonalize(Field& w, DenseVector& evec, int k) { double t0=-usecond()/1e6; typedef typename Field::scalar_type MyComplex; MyComplex ip; if ( 0 ) { for(int j=0; j &Qt) { for(int i=0; i& eval, const Field& src, int& Nconv) { GridBase *grid = src._grid; // assert(grid == src._grid); std::cout<_Nprocessors; int node = grid->_processor; int interval = (Nstop/total)+1; int iu = (iter+1) - (interval*node+1); int il = (iter+1) - (interval*(node+1)); RealD eps2; Bisection::bisec(lmd2,lme2,iter,il,iu,1e-16,1e-10, eval,eps2); // diagonalize(eval2,lme2,iter,Nk,grid); for(int i=il;i<=iu;i++) printf("eval[%d]=%0.14e\n",i,eval[i]); t1=usecond()/1e6; std::cout< static T orthQ(DenseMatrix &Q, DenseVector y){ int N = y.size(); //Matrix Size Fill(Q,0.0); T tau; for(int i=0;i 0.0){ T gam = conj( (y[j]/tau)/tau0 ); for(int k=0;k<=j-1;k++){ Q[k][j]=-gam*y[k]; } Q[j][j]=tau0/tau; } else { Q[j-1][j]=1.0; } tau0 = tau; } return tau; } /** There is some matrix Q such that for any vector y Q.e_k = y and Q is unitary. **/ template< class T> static T orthU(DenseMatrix &Q, DenseVector y){ T tau = orthQ(Q,y); SL(Q); return tau; } /** Wind up with a matrix with the first con rows untouched say con = 2 Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum and the matrix is upper hessenberg and with f and Q appropriately modidied with Q is the arnoldi factorization **/ template static void Lock(DenseMatrix &H, ///Hess mtx DenseMatrix &Q, ///Lock Transform T val, ///value to be locked int con, ///number already locked RealD small, int dfg, bool herm) { //ForceTridiagonal(H); int M = H.dim; DenseVector vec; Resize(vec,M-con); DenseMatrix AH; Resize(AH,M-con,M-con); AH = GetSubMtx(H,con, M, con, M); DenseMatrix QQ; Resize(QQ,M-con,M-con); Unity(Q); Unity(QQ); DenseVector evals; Resize(evals,M-con); DenseMatrix evecs; Resize(evecs,M-con,M-con); Wilkinson(AH, evals, evecs, small); int k=0; RealD cold = abs( val - evals[k]); for(int i=1;icon+2; j--){ DenseMatrix U; Resize(U,j-1-con,j-1-con); DenseVector z; Resize(z,j-1-con); T nm = norm(z); for(int k = con+0;k Hb; Resize(Hb,j-1-con,M); for(int a = 0;a Qb; Resize(Qb,M,M); for(int a = 0;a Hc; Resize(Hc,M,M); for(int a = 0;a