From 05d04ceff80ef9eaf4888b5c0689a6d7e7897069 Mon Sep 17 00:00:00 2001 From: Chulwoo Jung Date: Thu, 25 May 2017 12:30:47 -0400 Subject: [PATCH] Adding SimpleLanczos --- lib/algorithms/iterative/SimpleLanczos.h | 776 +++++++++++++++++++++++ 1 file changed, 776 insertions(+) create mode 100644 lib/algorithms/iterative/SimpleLanczos.h diff --git a/lib/algorithms/iterative/SimpleLanczos.h b/lib/algorithms/iterative/SimpleLanczos.h new file mode 100644 index 00000000..19ebbd26 --- /dev/null +++ b/lib/algorithms/iterative/SimpleLanczos.h @@ -0,0 +1,776 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle +Author: Chulwoo Jung + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_LANC_H +#define GRID_LANC_H + +#include //memset + +#ifdef USE_LAPACK +#ifdef USE_MKL +#include +#else +void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, + double *vl, double *vu, int *il, int *iu, double *abstol, + int *m, double *w, double *z, int *ldz, int *isuppz, + double *work, int *lwork, int *iwork, int *liwork, + int *info); +//#include +#endif +#endif + +#include +#include + +// eliminate temorary vector in calc() +#define MEM_SAVE + +namespace Grid { + +struct Bisection { + +#if 0 +static void get_eig2(int row_num,std::vector &ALPHA,std::vector &BETA, std::vector & eig) +{ + int i,j; + std::vector evec1(row_num+3); + std::vector evec2(row_num+3); + RealD eps2; + ALPHA[1]=0.; + BETHA[1]=0.; + for(i=0;imag(evec2[i+1])) { + swap(evec2+i,evec2+i+1); + swapped=1; + } + } + end--; + for(i=end-1;i>=begin;i--){ + if(mag(evec2[i])>mag(evec2[i+1])) { + swap(evec2+i,evec2+i+1); + swapped=1; + } + } + begin++; + } + + for(i=0;i &c, + std::vector &b, + int n, + int m1, + int m2, + RealD eps1, + RealD relfeh, + std::vector &x, + RealD &eps2) +{ + std::vector wu(n+2); + + RealD h,q,x1,xu,x0,xmin,xmax; + int i,a,k; + + b[1]=0.0; + xmin=c[n]-fabs(b[n]); + xmax=c[n]+fabs(b[n]); + for(i=1;ixmax) xmax= c[i]+h; + if(c[i]-h0.0 ? xmax : -xmin); + if(eps1<=0.0) eps1=eps2; + eps2=0.5*eps1+7.0*(eps2); + x0=xmax; + for(i=m1;i<=m2;i++){ + x[i]=xmax; + wu[i]=xmin; + } + + for(k=m2;k>=m1;k--){ + xu=xmin; + i=k; + do{ + if(xu=m1); + if(x0>x[k]) x0=x[k]; + while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){ + x1=(xu+x0)/2; + + a=0; + q=1.0; + for(i=1;i<=n;i++){ + q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh); + if(q<0) a++; + } +// printf("x1=%0.14e a=%d\n",x1,a); + if(ax1) x[a]=x1; + } + }else x0=x1; + } + printf("x0=%0.14e xu=%0.14e k=%d\n",x0,xu,k); + x[k]=(x0+xu)/2; + } +} +}; + +///////////////////////////////////////////////////////////// +// Implicitly restarted lanczos +///////////////////////////////////////////////////////////// + + +template + class SimpleLanczos { + + const RealD small = 1.0e-16; +public: + int lock; + int get; + int Niter; + int converged; + + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + int Np; // Np -- Number of spare vecs in kryloc space + int Nm; // Nm -- total number of vectors + + + RealD OrthoTime; + + RealD eresid; + + SortEigen _sort; + + LinearOperatorBase &_Linop; + + OperatorFunction &_poly; + + ///////////////////////// + // Constructor + ///////////////////////// + void init(void){}; + void Abort(int ff, DenseVector &evals, DenseVector > &evecs); + + SimpleLanczos( + LinearOperatorBase &Linop, // op + OperatorFunction & poly, // polynmial + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + int _Niter) : // Max iterations + _Linop(Linop), + _poly(poly), + Nstop(_Nstop), + Nk(_Nk), + Nm(_Nm), + eresid(_eresid), + Niter(_Niter) + { + Np = Nm-Nk; assert(Np>0); + }; + + ///////////////////////// + // Sanity checked this routine (step) against Saad. + ///////////////////////// + void RitzMatrix(DenseVector& evec,int k){ + + if(1) return; + + GridBase *grid = evec[0]._grid; + Field w(grid); + std::cout<1 ) { + if (abs(in) >1.0e-9 ) { + std::cout<& lmd, + DenseVector& lme, + Field& last, + Field& current, + Field & next, + uint64_t k) + { + if (lmd.size()<=k) lmd.resize(k+Nm); + if (lme.size()<=k) lme.resize(k+Nm); + + + _poly(_Linop,current,next ); // 3. wk:=Avk−βkv_{k−1} + if(k>0){ + next -= lme[k-1] * last; + } +// std::cout<" << innerProduct(last,next) <" << innerProduct(current,next) <& lmd, + DenseVector& lme, + int Nk, + int Nm, + DenseVector& Qt, + RealD Dsh, + int kmin, + int kmax) + { + int k = kmin-1; + RealD x; + + RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); + RealD c = ( lmd[k] -Dsh) *Fden; + RealD s = -lme[k] *Fden; + + RealD tmpa1 = lmd[k]; + RealD tmpa2 = lmd[k+1]; + RealD tmpb = lme[k]; + + lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; + lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; + lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; + x =-s*lme[k+1]; + lme[k+1] = c*lme[k+1]; + + for(int i=0; i& lmd, + DenseVector& lme, + int N1, // all + int N2, // get + GridBase *grid) { + const int size = Nm; + LAPACK_INT NN = N1; + double evals_tmp[NN]; + double DD[NN]; + double EE[NN]; + for (int i = 0; i< NN; i++) + for (int j = i - 1; j <= i + 1; j++) + if ( j < NN && j >= 0 ) { + if (i==j) DD[i] = lmd[i]; + if (i==j) evals_tmp[i] = lmd[i]; + if (j==(i-1)) EE[j] = lme[j]; + } + LAPACK_INT evals_found; + LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; + LAPACK_INT liwork = 3+NN*10 ; + LAPACK_INT iwork[liwork]; + double work[lwork]; + LAPACK_INT isuppz[2*NN]; + char jobz = 'N'; // calculate evals only + char range = 'I'; // calculate il-th to iu-th evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + int ifail[NN]; + LAPACK_INT info; +// int total = QMP_get_number_of_nodes(); +// int node = QMP_get_node_number(); +// GridBase *grid = evec[0]._grid; + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + LAPACK_INT il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + double tol = 0.0; + if (1) { + memset(evals_tmp,0,sizeof(double)*NN); + if ( il <= NN){ + printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); +#ifdef USE_MKL + dstegr(&jobz, &range, &NN, +#else + LAPACK_dstegr(&jobz, &range, &NN, +#endif + (double*)DD, (double*)EE, + &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, evals_tmp, (double*)NULL, &NN, + isuppz, + work, &lwork, iwork, &liwork, + &info); + for (int i = iu-1; i>= il-1; i--){ + printf("node=%d evals_found=%d evals_tmp[%d] = %g\n",node,evals_found, i - (il-1),evals_tmp[i - (il-1)]); + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + } + } +// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. +} +#undef LAPACK_INT +#endif + + + void diagonalize(DenseVector& lmd, + DenseVector& lme, + int N2, + int N1, + GridBase *grid) + { + +#ifdef USE_LAPACK + const int check_lapack=0; // just use lapack if 0, check against lapack if 1 + + if(!check_lapack) + return diagonalize_lapack(lmd,lme,N2,N1,grid); + +// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid); +#endif + } + +#if 1 + static RealD normalise(Field& v) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void orthogonalize(Field& w, + DenseVector& evec, + int k) + { + double t0=-usecond()/1e6; + typedef typename Field::scalar_type MyComplex; + MyComplex ip; + + if ( 0 ) { + for(int j=0; j &Qt) { + for(int i=0; i& eval, + const Field& src, + int& Nconv) + { + + GridBase *grid = src._grid; +// assert(grid == src._grid); + + std::cout<_Nprocessors; + int node = grid->_processor; + int interval = (Nstop/total)+1; + int iu = (iter+1) - (interval*node+1); + int il = (iter+1) - (interval*(node+1)); + RealD eps2; + Bisection::bisec(lmd2,lme2,iter,il,iu,1e-16,1e-10, eval,eps2); +// diagonalize(eval2,lme2,iter,Nk,grid); + for(int i=il;i<=iu;i++) + printf("eval[%d]=%0.14e\n",i,eval[i]); + t1=usecond()/1e6; + std::cout< + static T orthQ(DenseMatrix &Q, DenseVector y){ + int N = y.size(); //Matrix Size + Fill(Q,0.0); + T tau; + for(int i=0;i 0.0){ + + T gam = conj( (y[j]/tau)/tau0 ); + for(int k=0;k<=j-1;k++){ + Q[k][j]=-gam*y[k]; + } + Q[j][j]=tau0/tau; + } else { + Q[j-1][j]=1.0; + } + tau0 = tau; + } + return tau; + } + +/** + There is some matrix Q such that for any vector y + Q.e_k = y and Q is unitary. +**/ + template< class T> + static T orthU(DenseMatrix &Q, DenseVector y){ + T tau = orthQ(Q,y); + SL(Q); + return tau; + } + + +/** + Wind up with a matrix with the first con rows untouched + +say con = 2 + Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum + and the matrix is upper hessenberg + and with f and Q appropriately modidied with Q is the arnoldi factorization + +**/ + +template +static void Lock(DenseMatrix &H, ///Hess mtx + DenseMatrix &Q, ///Lock Transform + T val, ///value to be locked + int con, ///number already locked + RealD small, + int dfg, + bool herm) +{ + //ForceTridiagonal(H); + + int M = H.dim; + DenseVector vec; Resize(vec,M-con); + + DenseMatrix AH; Resize(AH,M-con,M-con); + AH = GetSubMtx(H,con, M, con, M); + + DenseMatrix QQ; Resize(QQ,M-con,M-con); + + Unity(Q); Unity(QQ); + + DenseVector evals; Resize(evals,M-con); + DenseMatrix evecs; Resize(evecs,M-con,M-con); + + Wilkinson(AH, evals, evecs, small); + + int k=0; + RealD cold = abs( val - evals[k]); + for(int i=1;icon+2; j--){ + + DenseMatrix U; Resize(U,j-1-con,j-1-con); + DenseVector z; Resize(z,j-1-con); + T nm = norm(z); + for(int k = con+0;k Hb; Resize(Hb,j-1-con,M); + + for(int a = 0;a Qb; Resize(Qb,M,M); + + for(int a = 0;a Hc; Resize(Hc,M,M); + + for(int a = 0;a