From e73e4b4002cdc4634961f341e31bb68dc6f28d9b Mon Sep 17 00:00:00 2001 From: Chulwoo Jung Date: Fri, 11 Aug 2017 01:35:25 -0400 Subject: [PATCH] Minor changes fixes --- lib/algorithms/iterative/ConjugateGradient.h | 1 + .../iterative/ConjugateGradientMixedPrec.h | 1 + lib/algorithms/iterative/SimpleLanczos.h | 1283 +++++++++-------- 3 files changed, 722 insertions(+), 563 deletions(-) diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index ed453161..eb813a17 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -52,6 +52,7 @@ class ConjugateGradient : public OperatorFunction { MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){}; + void operator()(LinearOperatorBase &Linop, const Field &src, Field &psi) { psi.checkerboard = src.checkerboard; diff --git a/lib/algorithms/iterative/ConjugateGradientMixedPrec.h b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h index c7332455..88a47088 100644 --- a/lib/algorithms/iterative/ConjugateGradientMixedPrec.h +++ b/lib/algorithms/iterative/ConjugateGradientMixedPrec.h @@ -60,6 +60,7 @@ namespace Grid { } void operator() (const FieldD &src_d_in, FieldD &sol_d){ + TotalInnerIterations = 0; GridStopWatch TotalTimer; diff --git a/lib/algorithms/iterative/SimpleLanczos.h b/lib/algorithms/iterative/SimpleLanczos.h index a69c381d..79e4bd74 100644 --- a/lib/algorithms/iterative/SimpleLanczos.h +++ b/lib/algorithms/iterative/SimpleLanczos.h @@ -30,17 +30,17 @@ Author: Chulwoo Jung #ifndef GRID_LANC_H #define GRID_LANC_H -#include //memset +#include //memset #ifdef USE_LAPACK #ifdef USE_MKL #include #else -void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, - double *vl, double *vu, int *il, int *iu, double *abstol, - int *m, double *w, double *z, int *ldz, int *isuppz, - double *work, int *lwork, int *iwork, int *liwork, - int *info); +void LAPACK_dstegr (char *jobz, char *range, int *n, double *d, double *e, + double *vl, double *vu, int *il, int *iu, double *abstol, + int *m, double *w, double *z, int *ldz, int *isuppz, + double *work, int *lwork, int *iwork, int *liwork, + int *info); //#include #endif #endif @@ -51,315 +51,365 @@ void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, // eliminate temorary vector in calc() #define MEM_SAVE -namespace Grid { +namespace Grid +{ -struct Bisection { + struct Bisection + { #if 0 -static void get_eig2(int row_num,std::vector &ALPHA,std::vector &BETA, std::vector & eig) -{ - int i,j; - std::vector evec1(row_num+3); - std::vector evec2(row_num+3); - RealD eps2; - ALPHA[1]=0.; - BETHA[1]=0.; - for(i=0;i &ALPHA, + std::vector < RealD > &BETA, + std::vector < RealD > &eig) + { + int i, j; + std::vector < RealD > evec1 (row_num + 3); + std::vector < RealD > evec2 (row_num + 3); + RealD eps2; + ALPHA[1] = 0.; + BETHA[1] = 0.; + for (i = 0; i < row_num - 1; i++) + { + ALPHA[i + 1] = A[i * (row_num + 1)].real (); + BETHA[i + 2] = A[i * (row_num + 1) + 1].real (); + } + ALPHA[row_num] = A[(row_num - 1) * (row_num + 1)].real (); + bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-10, 1e-10, evec1, eps2); + bisec (ALPHA, BETHA, row_num, 1, row_num, 1e-16, 1e-16, evec2, eps2); - // Do we really need to sort here? - int begin=1; - int end = row_num; - int swapped=1; - while(swapped) { - swapped=0; - for(i=begin;imag(evec2[i+1])) { - swap(evec2+i,evec2+i+1); - swapped=1; - } - } - end--; - for(i=end-1;i>=begin;i--){ - if(mag(evec2[i])>mag(evec2[i+1])) { - swap(evec2+i,evec2+i+1); - swapped=1; - } - } - begin++; - } + // Do we really need to sort here? + int begin = 1; + int end = row_num; + int swapped = 1; + while (swapped) + { + swapped = 0; + for (i = begin; i < end; i++) + { + if (mag (evec2[i]) > mag (evec2[i + 1])) + { + swap (evec2 + i, evec2 + i + 1); + swapped = 1; + } + } + end--; + for (i = end - 1; i >= begin; i--) + { + if (mag (evec2[i]) > mag (evec2[i + 1])) + { + swap (evec2 + i, evec2 + i + 1); + swapped = 1; + } + } + begin++; + } - for(i=0;i &c, - std::vector &b, - int n, - int m1, - int m2, - RealD eps1, - RealD relfeh, - std::vector &x, - RealD &eps2) -{ - std::vector wu(n+2); + static void bisec (std::vector < RealD > &c, + std::vector < RealD > &b, + int n, + int m1, + int m2, + RealD eps1, + RealD relfeh, std::vector < RealD > &x, RealD & eps2) + { + std::vector < RealD > wu (n + 2); - RealD h,q,x1,xu,x0,xmin,xmax; - int i,a,k; + RealD h, q, x1, xu, x0, xmin, xmax; + int i, a, k; - b[1]=0.0; - xmin=c[n]-fabs(b[n]); - xmax=c[n]+fabs(b[n]); - for(i=1;ixmax) xmax= c[i]+h; - if(c[i]-h0.0 ? xmax : -xmin); - if(eps1<=0.0) eps1=eps2; - eps2=0.5*eps1+7.0*(eps2); - x0=xmax; - for(i=m1;i<=m2;i++){ - x[i]=xmax; - wu[i]=xmin; - } - - for(k=m2;k>=m1;k--){ - xu=xmin; - i=k; - do{ - if(xu=m1); - if(x0>x[k]) x0=x[k]; - while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){ - x1=(xu+x0)/2; - - a=0; - q=1.0; - for(i=1;i<=n;i++){ - q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh); - if(q<0) a++; - } -// printf("x1=%0.14e a=%d\n",x1,a); - if(ax1) x[a]=x1; + b[1] = 0.0; + xmin = c[n] - fabs (b[n]); + xmax = c[n] + fabs (b[n]); + for (i = 1; i < n; i++) + { + h = fabs (b[i]) + fabs (b[i + 1]); + if (c[i] + h > xmax) + xmax = c[i] + h; + if (c[i] - h < xmin) + xmin = c[i] - h; + } + xmax *= 2.; + + eps2 = relfeh * ((xmin + xmax) > 0.0 ? xmax : -xmin); + if (eps1 <= 0.0) + eps1 = eps2; + eps2 = 0.5 * eps1 + 7.0 * (eps2); + x0 = xmax; + for (i = m1; i <= m2; i++) + { + x[i] = xmax; + wu[i] = xmin; + } + + for (k = m2; k >= m1; k--) + { + xu = xmin; + i = k; + do + { + if (xu < wu[i]) + { + xu = wu[i]; + i = m1 - 1; + } + i--; + } + while (i >= m1); + if (x0 > x[k]) + x0 = x[k]; + while ((x0 - xu) > 2 * relfeh * (fabs (xu) + fabs (x0)) + eps1) + { + x1 = (xu + x0) / 2; + + a = 0; + q = 1.0; + for (i = 1; i <= n; i++) + { + q = + c[i] - x1 - + ((q != 0.0) ? b[i] * b[i] / q : fabs (b[i]) / relfeh); + if (q < 0) + a++; + } +// printf("x1=%0.14e a=%d\n",x1,a); + if (a < k) + { + if (a < m1) + { + xu = x1; + wu[m1] = x1; + } + else + { + xu = x1; + wu[a + 1] = x1; + if (x[a] > x1) + x[a] = x1; + } + } + else + x0 = x1; + } + printf ("x0=%0.14e xu=%0.14e k=%d\n", x0, xu, k); + x[k] = (x0 + xu) / 2; } - }else x0=x1; } - printf("x0=%0.14e xu=%0.14e k=%d\n",x0,xu,k); - x[k]=(x0+xu)/2; - } -} -}; + }; ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// -template - class SimpleLanczos { + template < class Field > class SimpleLanczos + { const RealD small = 1.0e-16; -public: + public: int lock; int get; int Niter; int converged; - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Np; // Np -- Number of spare vecs in kryloc space - int Nm; // Nm -- total number of vectors + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + int Np; // Np -- Number of spare vecs in kryloc space + int Nm; // Nm -- total number of vectors RealD OrthoTime; RealD eresid; - SortEigen _sort; + SortEigen < Field > _sort; - LinearOperatorBase &_Linop; + LinearOperatorBase < Field > &_Linop; - OperatorFunction &_poly; + OperatorFunction < Field > &_poly; ///////////////////////// // Constructor ///////////////////////// - void init(void){}; - void Abort(int ff, DenseVector &evals, DenseVector > &evecs); + void init (void) + { + }; + void Abort (int ff, DenseVector < RealD > &evals, + DenseVector < DenseVector < RealD > >&evecs); - SimpleLanczos( - LinearOperatorBase &Linop, // op - OperatorFunction & poly, // polynmial - int _Nstop, // sought vecs - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - int _Niter) : // Max iterations - _Linop(Linop), - _poly(poly), - Nstop(_Nstop), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - Niter(_Niter) - { - Np = Nm-Nk; assert(Np>0); + SimpleLanczos (LinearOperatorBase < Field > &Linop, // op + OperatorFunction < Field > &poly, // polynmial + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + int _Niter): // Max iterations + + _Linop (Linop), + _poly (poly), + Nstop (_Nstop), Nk (_Nk), Nm (_Nm), eresid (_eresid), Niter (_Niter) + { + Np = Nm - Nk; + assert (Np > 0); }; ///////////////////////// // Sanity checked this routine (step) against Saad. ///////////////////////// - void RitzMatrix(DenseVector& evec,int k){ + void RitzMatrix (DenseVector < Field > &evec, int k) + { - if(1) return; + if (1) + return; GridBase *grid = evec[0]._grid; - Field w(grid); - std::cout<1 ) { - if (abs(in) >1.0e-9 ) { - std::cout< 1) + { + if (abs (in) > 1.0e-9) + { + std::cout << GridLogMessage << "oops" << std::endl; + abort (); + } + else + std::cout << GridLogMessage << " 0 "; + } + else + { + std::cout << GridLogMessage << " " << in << " "; + } + } + std::cout << GridLogMessage << std::endl; } - std::cout<& lmd, - DenseVector& lme, - Field& last, - Field& current, - Field & next, - uint64_t k) + void step (DenseVector < RealD > &lmd, + DenseVector < RealD > &lme, + Field & last, Field & current, Field & next, uint64_t k) { - if (lmd.size()<=k) lmd.resize(k+Nm); - if (lme.size()<=k) lme.resize(k+Nm); - - - _poly(_Linop,current,next ); // 3. wk:=Avk−βkv_{k−1} - if(k>0){ - next -= lme[k-1] * last; - } -// std::cout<" << innerProduct(last,next) <" << innerProduct(current,next) < 0) + { + next -= lme[k - 1] * last; + } +// std::cout<" << innerProduct(last,next) <" << innerProduct(current,next) <& lmd, - DenseVector& lme, - int Nk, - int Nm, - DenseVector& Qt, - RealD Dsh, - int kmin, - int kmax) + void qr_decomp (DenseVector < RealD > &lmd, + DenseVector < RealD > &lme, + int Nk, + int Nm, + DenseVector < RealD > &Qt, RealD Dsh, int kmin, int kmax) { - int k = kmin-1; + int k = kmin - 1; RealD x; - RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); - RealD c = ( lmd[k] -Dsh) *Fden; - RealD s = -lme[k] *Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; + RealD Fden = 1.0 / hypot (lmd[k] - Dsh, lme[k]); + RealD c = (lmd[k] - Dsh) * Fden; + RealD s = -lme[k] * Fden; - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - x =-s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - - for(int i=0; i& lmd, - DenseVector& lme, - int N1, // all - int N2, // get - GridBase *grid) { - const int size = Nm; - LAPACK_INT NN = N1; - double evals_tmp[NN]; - double DD[NN]; - double EE[NN]; - for (int i = 0; i< NN; i++) - for (int j = i - 1; j <= i + 1; j++) - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; - } - LAPACK_INT evals_found; - LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - LAPACK_INT liwork = 3+NN*10 ; - LAPACK_INT iwork[liwork]; - double work[lwork]; - LAPACK_INT isuppz[2*NN]; - char jobz = 'N'; // calculate evals only - char range = 'I'; // calculate il-th to iu-th evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - int ifail[NN]; - LAPACK_INT info; + void diagonalize_lapack (DenseVector < RealD > &lmd, DenseVector < RealD > &lme, int N1, // all + int N2, // get + GridBase * grid) + { + const int size = Nm; + LAPACK_INT NN = N1; + double evals_tmp[NN]; + double DD[NN]; + double EE[NN]; + for (int i = 0; i < NN; i++) + for (int j = i - 1; j <= i + 1; j++) + if (j < NN && j >= 0) + { + if (i == j) + DD[i] = lmd[i]; + if (i == j) + evals_tmp[i] = lmd[i]; + if (j == (i - 1)) + EE[j] = lme[j]; + } + LAPACK_INT evals_found; + LAPACK_INT lwork = + ((18 * NN) > + (1 + 4 * NN + NN * NN) ? (18 * NN) : (1 + 4 * NN + NN * NN)); + LAPACK_INT liwork = 3 + NN * 10; + LAPACK_INT iwork[liwork]; + double work[lwork]; + LAPACK_INT isuppz[2 * NN]; + char jobz = 'N'; // calculate evals only + char range = 'I'; // calculate il-th to iu-th evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + int ifail[NN]; + LAPACK_INT info; // int total = QMP_get_number_of_nodes(); // int node = QMP_get_node_number(); // GridBase *grid = evec[0]._grid; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - LAPACK_INT il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(evals_tmp,0,sizeof(double)*NN); - if ( il <= NN){ - printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN / total) + 1; + double vl = 0.0, vu = 0.0; + LAPACK_INT il = interval * node + 1, iu = interval * (node + 1); + if (iu > NN) + iu = NN; + double tol = 0.0; + if (1) + { + memset (evals_tmp, 0, sizeof (double) * NN); + if (il <= NN) + { + printf ("total=%d node=%d il=%d iu=%d\n", total, node, il, iu); #ifdef USE_MKL - dstegr(&jobz, &range, &NN, + dstegr (&jobz, &range, &NN, #else - LAPACK_dstegr(&jobz, &range, &NN, + LAPACK_dstegr (&jobz, &range, &NN, #endif - (double*)DD, (double*)EE, - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, evals_tmp, (double*)NULL, &NN, - isuppz, - work, &lwork, iwork, &liwork, - &info); - for (int i = iu-1; i>= il-1; i--){ - printf("node=%d evals_found=%d evals_tmp[%d] = %g\n",node,evals_found, i - (il-1),evals_tmp[i - (il-1)]); - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - } - } - { - grid->GlobalSumVector(evals_tmp,NN); - } - } + (double *) DD, (double *) EE, &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, evals_tmp, (double *) NULL, &NN, + isuppz, work, &lwork, iwork, &liwork, &info); + for (int i = iu - 1; i >= il - 1; i--) + { + printf ("node=%d evals_found=%d evals_tmp[%d] = %g\n", node, + evals_found, i - (il - 1), evals_tmp[i - (il - 1)]); + evals_tmp[i] = evals_tmp[i - (il - 1)]; + if (il > 1) + evals_tmp[i - (il - 1)] = 0.; + } + } + { + grid->GlobalSumVector (evals_tmp, NN); + } + } // cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. -} -#undef LAPACK_INT + } +#undef LAPACK_INT #endif - void diagonalize(DenseVector& lmd, - DenseVector& lme, - int N2, - int N1, - GridBase *grid) + void diagonalize (DenseVector < RealD > &lmd, + DenseVector < RealD > &lme, + int N2, int N1, GridBase * grid) { #ifdef USE_LAPACK - const int check_lapack=0; // just use lapack if 0, check against lapack if 1 + const int check_lapack = 0; // just use lapack if 0, check against lapack if 1 - if(!check_lapack) - return diagonalize_lapack(lmd,lme,N2,N1,grid); + if (!check_lapack) + return diagonalize_lapack (lmd, lme, N2, N1, grid); -// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid); +// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid); #endif } #if 1 - static RealD normalise(Field& v) + static RealD normalise (Field & v) { - RealD nn = norm2(v); - nn = sqrt(nn); - v = v * (1.0/nn); + RealD nn = norm2 (v); + nn = sqrt (nn); + v = v * (1.0 / nn); return nn; } - void orthogonalize(Field& w, - DenseVector& evec, - int k) + void orthogonalize (Field & w, DenseVector < Field > &evec, int k) { - double t0=-usecond()/1e6; + double t0 = -usecond () / 1e6; typedef typename Field::scalar_type MyComplex; MyComplex ip; - if ( 0 ) { - for(int j=0; j &Qt) { - for(int i=0; i &Qt) + { + for (int i = 0; i < Qt.size (); ++i) + Qt[i] = 0.0; + for (int k = 0; k < Nm; ++k) + Qt[k + k * Nm] = 1.0; } - void calc( - DenseVector& eval, - const Field& src, - int& Nconv) - { + void calc (DenseVector < RealD > &eval, const Field & src, int &Nconv) + { - GridBase *grid = src._grid; -// assert(grid == src._grid); + GridBase *grid = src._grid; +// assert(grid == src._grid); - std::cout<_Nprocessors; - int node = grid->_processor; - int interval = (Nstop/total)+1; - int iu = (iter+1) - (interval*node+1); - int il = (iter+1) - (interval*(node+1)); - RealD eps2; - Bisection::bisec(lmd2,lme2,iter,il,iu,1e-16,1e-10, eval,eps2); -// diagonalize(eval2,lme2,iter,Nk,grid); - for(int i=il;i<=iu;i++) - printf("eval[%d]=%0.14e\n",i,eval[i]); - t1=usecond()/1e6; - std::cout<_Nprocessors; + int node = grid->_processor; + int interval = (Nstop / total) + 1; + int iu = (iter + 1) - (interval * node + 1); + int il = (iter + 1) - (interval * (node + 1)); + std::vector < RealD > eval2 (iter + 3); + RealD eps2; + Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2, + eps2); +// diagonalize(eval2,lme2,iter,Nk,grid); + RealD diff = 0.; + for (int i = il; i <= iu; i++) { + if (initted) + diff = + fabs (eval2[i] - high[iu-i]) / (fabs (eval2[i]) + + fabs (high[iu-i])); + if (initted && (diff > eresid)) + cont = 1.; + if (initted) + printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i], + high[iu-i], diff); + high[iu-i] = eval2[i]; + } + il = (interval * node + 1); + iu = (interval * (node + 1)); + Bisection::bisec (lmd2, lme2, iter, il, iu, 1e-16, 1e-10, eval2, + eps2); + for (int i = il; i <= iu; i++) { + if (initted) + diff = + fabs (eval2[i] - low[i]) / (fabs (eval2[i]) + + fabs (low[i])); + if (initted && (diff > eresid)) + cont = 1.; + if (initted) + printf ("eval[%d]=%0.14e %0.14e, %0.14e\n", i, eval2[i], + low[i], diff); + low[i] = eval2[i]; + } + t1 = usecond () / 1e6; + std::cout << GridLogMessage << "IRL:: diagonalize: " << t1 - + t0 << "seconds" << std::endl; + t0 = t1; + } + + for (uint64_t k = 0; k < Nk; ++k) { +// eval[k] = eval2[k]; + } + if (initted) + { + grid->GlobalSumVector (&cont, 1); + if (cont < 1.) return; + } + initted = true; + } - for(uint64_t k=0; k - static T orthQ(DenseMatrix &Q, DenseVector y){ - int N = y.size(); //Matrix Size - Fill(Q,0.0); - T tau; - for(int i=0;i 0.0){ - - T gam = conj( (y[j]/tau)/tau0 ); - for(int k=0;k<=j-1;k++){ - Q[k][j]=-gam*y[k]; + template < class T > + static T orthQ (DenseMatrix < T > &Q, DenseVector < T > y) + { + int N = y.size (); //Matrix Size + Fill (Q, 0.0); + T tau; + for (int i = 0; i < N; i++) + { + Q[i][0] = y[i]; } - Q[j][j]=tau0/tau; - } else { - Q[j-1][j]=1.0; - } - tau0 = tau; + T sig = conj (y[0]) * y[0]; + T tau0 = fabs (sqrt (sig)); + + for (int j = 1; j < N; j++) + { + sig += conj (y[j]) * y[j]; + tau = abs (sqrt (sig)); + + if (abs (tau0) > 0.0) + { + + T gam = conj ((y[j] / tau) / tau0); + for (int k = 0; k <= j - 1; k++) + { + Q[k][j] = -gam * y[k]; + } + Q[j][j] = tau0 / tau; + } + else + { + Q[j - 1][j] = 1.0; + } + tau0 = tau; + } + return tau; } - return tau; - } /** There is some matrix Q such that for any vector y Q.e_k = y and Q is unitary. **/ - template< class T> - static T orthU(DenseMatrix &Q, DenseVector y){ - T tau = orthQ(Q,y); - SL(Q); - return tau; - } + template < class T > + static T orthU (DenseMatrix < T > &Q, DenseVector < T > y) + { + T tau = orthQ (Q, y); + SL (Q); + return tau; + } /** @@ -645,132 +765,169 @@ say con = 2 **/ -template -static void Lock(DenseMatrix &H, ///Hess mtx - DenseMatrix &Q, ///Lock Transform - T val, ///value to be locked - int con, ///number already locked - RealD small, - int dfg, - bool herm) -{ - //ForceTridiagonal(H); + template < class T > static void Lock (DenseMatrix < T > &H, ///Hess mtx + DenseMatrix < T > &Q, ///Lock Transform + T val, ///value to be locked + int con, ///number already locked + RealD small, int dfg, bool herm) + { + //ForceTridiagonal(H); - int M = H.dim; - DenseVector vec; Resize(vec,M-con); + int M = H.dim; + DenseVector < T > vec; + Resize (vec, M - con); - DenseMatrix AH; Resize(AH,M-con,M-con); - AH = GetSubMtx(H,con, M, con, M); + DenseMatrix < T > AH; + Resize (AH, M - con, M - con); + AH = GetSubMtx (H, con, M, con, M); - DenseMatrix QQ; Resize(QQ,M-con,M-con); + DenseMatrix < T > QQ; + Resize (QQ, M - con, M - con); - Unity(Q); Unity(QQ); - - DenseVector evals; Resize(evals,M-con); - DenseMatrix evecs; Resize(evecs,M-con,M-con); + Unity (Q); + Unity (QQ); - Wilkinson(AH, evals, evecs, small); + DenseVector < T > evals; + Resize (evals, M - con); + DenseMatrix < T > evecs; + Resize (evecs, M - con, M - con); - int k=0; - RealD cold = abs( val - evals[k]); - for(int i=1;i (AH, evals, evecs, small); - ComplexD tau; - orthQ(QQ,vec); - //orthQM(QQ,AH,vec); + int k = 0; + RealD cold = abs (val - evals[k]); + for (int i = 1; i < M - con; i++) + { + RealD cnew = abs (val - evals[i]); + if (cnew < cold) + { + k = i; + cold = cnew; + } + } + vec = evecs[k]; - AH = Hermitian(QQ)*AH; - AH = AH*QQ; + ComplexD tau; + orthQ (QQ, vec); + //orthQM(QQ,AH,vec); - for(int i=con;i con + 2; j--) + { + + DenseMatrix < T > U; + Resize (U, j - 1 - con, j - 1 - con); + DenseVector < T > z; + Resize (z, j - 1 - con); + T nm = norm (z); + for (int k = con + 0; k < j - 1; k++) + { + z[k - con] = conj (H (j, k + 1)); + } + normalise (z); + + RealD tmp = 0; + for (int i = 0; i < z.size () - 1; i++) + { + tmp = tmp + abs (z[i]); + } + + if (tmp < small / ((RealD) z.size () - 1.0)) + { + continue; + } + + tau = orthU (U, z); + + DenseMatrix < T > Hb; + Resize (Hb, j - 1 - con, M); + + for (int a = 0; a < M; a++) + { + for (int b = 0; b < j - 1 - con; b++) + { + T sum = 0; + for (int c = 0; c < j - 1 - con; c++) + { + sum += H[a][con + 1 + c] * U[c][b]; + } //sum += H(a,con+1+c)*U(c,b);} + Hb[b][a] = sum; + } + } + + for (int k = con + 1; k < j; k++) + { + for (int l = 0; l < M; l++) + { + H[l][k] = Hb[k - 1 - con][l]; + } + } //H(Hb[k-1-con][l] , l,k);}} + + DenseMatrix < T > Qb; + Resize (Qb, M, M); + + for (int a = 0; a < M; a++) + { + for (int b = 0; b < j - 1 - con; b++) + { + T sum = 0; + for (int c = 0; c < j - 1 - con; c++) + { + sum += Q[a][con + 1 + c] * U[c][b]; + } //sum += Q(a,con+1+c)*U(c,b);} + Qb[b][a] = sum; + } + } + + for (int k = con + 1; k < j; k++) + { + for (int l = 0; l < M; l++) + { + Q[l][k] = Qb[k - 1 - con][l]; + } + } //Q(Qb[k-1-con][l] , l,k);}} + + DenseMatrix < T > Hc; + Resize (Hc, M, M); + + for (int a = 0; a < j - 1 - con; a++) + { + for (int b = 0; b < M; b++) + { + T sum = 0; + for (int c = 0; c < j - 1 - con; c++) + { + sum += conj (U[c][a]) * H[con + 1 + c][b]; + } //sum += conj( U(c,a) )*H(con+1+c,b);} + Hc[b][a] = sum; + } + } + + for (int k = 0; k < M; k++) + { + for (int l = con + 1; l < j; l++) + { + H[l][k] = Hc[k][l - 1 - con]; + } + } //H(Hc[k][l-1-con] , l,k);}} + + } } - } - - for(int j = M-1; j>con+2; j--){ - - DenseMatrix U; Resize(U,j-1-con,j-1-con); - DenseVector z; Resize(z,j-1-con); - T nm = norm(z); - for(int k = con+0;k Hb; Resize(Hb,j-1-con,M); - - for(int a = 0;a Qb; Resize(Qb,M,M); - - for(int a = 0;a Hc; Resize(Hc,M,M); - - for(int a = 0;a