From 0486ff8e7901dccd53f47031cececf04af70f1fd Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 20 Jun 2017 18:46:01 +0100 Subject: [PATCH 1/5] Improved the lancos --- TODO | 28 +- lib/algorithms/densematrix/DenseMatrix.h | 137 --- lib/algorithms/densematrix/Francis.h | 525 ---------- lib/algorithms/densematrix/Householder.h | 242 ----- .../iterative/ImplicitlyRestartedLanczos.h | 987 ++++-------------- lib/qcd/hmc/checkpointers/ILDGCheckpointer.h | 2 +- tests/solver/Test_dwf_lanczos.cc | 2 +- 7 files changed, 211 insertions(+), 1712 deletions(-) delete mode 100644 lib/algorithms/densematrix/DenseMatrix.h delete mode 100644 lib/algorithms/densematrix/Francis.h delete mode 100644 lib/algorithms/densematrix/Householder.h diff --git a/TODO b/TODO index a5d4cabd..eeb7dfa5 100644 --- a/TODO +++ b/TODO @@ -1,24 +1,28 @@ TODO: --------------- -Peter's work list: -1)- Precision conversion and sort out localConvert <-- -2)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- - --- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet --- Physical propagator interface --- Conserved currents --- GaugeFix into central location --- Multigrid Wilson and DWF, compare to other Multigrid implementations --- HDCR resume +Large item work list: +1)- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- +2)- MultiRHS with spread out extra dim +3)- BG/Q port and check +4)- Precision conversion and sort out localConvert <-- partial + - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet +5)- Physical propagator interface +6)- Conserved currents +7)- Multigrid Wilson and DWF, compare to other Multigrid implementations +8)- HDCR resume Recent DONE +-- GaugeFix into central location <-- DONE +-- Scidac and Ildg metadata handling <-- DONE +-- Binary I/O MPI2 IO <-- DONE -- Binary I/O speed up & x-strips <-- DONE -- Cut down the exterior overhead <-- DONE -- Interior legs from SHM comms <-- DONE -- Half-precision comms <-- DONE --- Merge high precision reduction into develop --- multiRHS DWF; benchmark on Cori/BNL for comms elimination +-- Merge high precision reduction into develop <-- DONE +-- BlockCG, BCGrQ <-- DONE +-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE -- slice* linalg routines for multiRHS, BlockCG ----- diff --git a/lib/algorithms/densematrix/DenseMatrix.h b/lib/algorithms/densematrix/DenseMatrix.h deleted file mode 100644 index d86add21..00000000 --- a/lib/algorithms/densematrix/DenseMatrix.h +++ /dev/null @@ -1,137 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/DenseMatrix.h - - Copyright (C) 2015 - -Author: Peter Boyle -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_DENSE_MATRIX_H -#define GRID_DENSE_MATRIX_H - -namespace Grid { - ///////////////////////////////////////////////////////////// - // Matrix untils - ///////////////////////////////////////////////////////////// - -template using DenseVector = std::vector; -template using DenseMatrix = DenseVector >; - -template void Size(DenseVector & vec, int &N) -{ - N= vec.size(); -} -template void Size(DenseMatrix & mat, int &N,int &M) -{ - N= mat.size(); - M= mat[0].size(); -} - -template void SizeSquare(DenseMatrix & mat, int &N) -{ - int M; Size(mat,N,M); - assert(N==M); -} - -template void Resize(DenseVector & mat, int N) { - mat.resize(N); -} -template void Resize(DenseMatrix & mat, int N, int M) { - mat.resize(N); - for(int i=0;i void Fill(DenseMatrix & mat, T&val) { - int N,M; - Size(mat,N,M); - for(int i=0;i DenseMatrix Transpose(DenseMatrix & mat){ - int N,M; - Size(mat,N,M); - DenseMatrix C; Resize(C,M,N); - for(int i=0;i void Unity(DenseMatrix &A){ - int N; SizeSquare(A,N); - for(int i=0;i -void PlusUnit(DenseMatrix & A,T c){ - int dim; SizeSquare(A,dim); - for(int i=0;i -DenseMatrix HermitianConj(DenseMatrix &mat){ - - int dim; SizeSquare(mat,dim); - - DenseMatrix C; Resize(C,dim,dim); - - for(int i=0;i -DenseMatrix GetSubMtx(DenseMatrix &A,int row_st, int row_end, int col_st, int col_end) -{ - DenseMatrix H; Resize(H,row_end - row_st,col_end-col_st); - - for(int i = row_st; i - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef FRANCIS_H -#define FRANCIS_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -//#include -//#include -//#include - -namespace Grid { - -template int SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseMatrix &evecs, RealD small); -template int Eigensystem(DenseMatrix &Ain, DenseVector &evals, DenseMatrix &evecs, RealD small); - -/** - Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. -H = - x x x x x x x x x - x x x x x x x x x - 0 x x x x x x x x - 0 0 x x x x x x x - 0 0 0 x x x x x x - 0 0 0 0 x x x x x - 0 0 0 0 0 x x x x - 0 0 0 0 0 0 x x x - 0 0 0 0 0 0 0 x x -Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. -**/ -template -int QReigensystem(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small) -{ - DenseMatrix H = Hin; - - int N ; SizeSquare(H,N); - int M = N; - - Fill(evals,0); - Fill(evecs,0); - - T s,t,x=0,y=0,z=0; - T u,d; - T apd,amd,bc; - DenseVector p(N,0); - T nrm = Norm(H); ///DenseMatrix Norm - int n, m; - int e = 0; - int it = 0; - int tot_it = 0; - int l = 0; - int r = 0; - DenseMatrix P; Resize(P,N,N); Unity(P); - DenseVector trows(N,0); - - /// Check if the matrix is really hessenberg, if not abort - RealD sth = 0; - for(int j=0;j small){ - std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; - exit(1); - } - } - } - - do{ - std::cout << "Francis QR Step N = " << N << std::endl; - /** Check for convergence - x x x x x - 0 x x x x - 0 0 x x x - 0 0 x x x - 0 0 0 0 x - for this matrix l = 4 - **/ - do{ - l = Chop_subdiag(H,nrm,e,small); - r = 0; ///May have converged on more than one eval - ///Single eval - if(l == N-1){ - evals[e] = H[l][l]; - N--; e++; r++; it = 0; - } - ///RealD eval - if(l == N-2){ - trows[l+1] = 1; ///Needed for UTSolve - apd = H[l][l] + H[l+1][l+1]; - amd = H[l][l] - H[l+1][l+1]; - bc = (T)4.0*H[l+1][l]*H[l][l+1]; - evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) ); - evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); - N-=2; e+=2; r++; it = 0; - } - } while(r>0); - - if(N ==0) break; - - DenseVector ck; Resize(ck,3); - DenseVector v; Resize(v,3); - - for(int m = N-3; m >= l; m--){ - ///Starting vector essentially random shift. - if(it%10 == 0 && N >= 3 && it > 0){ - s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); - t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); - x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; - y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); - z = H[m+1][m]*H[m+2][m+1]; - } - ///Starting vector implicit Q theorem - else{ - s = (H[N-2][N-2] + H[N-1][N-1]); - t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); - x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; - y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); - z = H[m+1][m]*H[m+2][m+1]; - } - ck[0] = x; ck[1] = y; ck[2] = z; - - if(m == l) break; - - /** Some stupid thing from numerical recipies, seems to work**/ - // PAB.. for heaven's sake quote page, purpose, evidence it works. - // what sort of comment is that!?!?!? - u=abs(H[m][m-1])*(abs(y)+abs(z)); - d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); - if ((T)abs(u+d) == (T)abs(d) ){ - l = m; break; - } - - //if (u < small){l = m; break;} - } - if(it > 100000){ - std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; - std::cout << "got " << e << " evals " << l << " " << N << std::endl; - exit(1); - } - normalize(ck); ///Normalization cancels in PHP anyway - T beta; - Householder_vector(ck, 0, 2, v, beta); - Householder_mult(H,v,beta,0,l,l+2,0); - Householder_mult(H,v,beta,0,l,l+2,1); - ///Accumulate eigenvector - Householder_mult(P,v,beta,0,l,l+2,1); - int sw = 0; ///Are we on the last row? - for(int k=l;k(ck, 0, 2-sw, v, beta); - Householder_mult(H,v, beta,0,k+1,k+3-sw,0); - Householder_mult(H,v, beta,0,k+1,k+3-sw,1); - ///Accumulate eigenvector - Householder_mult(P,v, beta,0,k+1,k+3-sw,1); - } - it++; - tot_it++; - }while(N > 1); - N = evals.size(); - ///Annoying - UT solves in reverse order; - DenseVector tmp; Resize(tmp,N); - for(int i=0;i -int my_Wilkinson(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small) -{ - /** - Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. - H = - x x 0 0 0 0 - x x x 0 0 0 - 0 x x x 0 0 - 0 0 x x x 0 - 0 0 0 x x x - 0 0 0 0 x x - Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/ - return my_Wilkinson(Hin, evals, evecs, small, small); -} - -template -int my_Wilkinson(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small, RealD tol) -{ - int N; SizeSquare(Hin,N); - int M = N; - - ///I don't want to modify the input but matricies must be passed by reference - //Scale a matrix by its "norm" - //RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm); - DenseMatrix H; H = Hin; - - RealD Hnorm = abs(Norm(Hin)); - H = H * (1.0 / Hnorm); - - // TODO use openmp and memset - Fill(evals,0); - Fill(evecs,0); - - T s, t, x = 0, y = 0, z = 0; - T u, d; - T apd, amd, bc; - DenseVector p; Resize(p,N); Fill(p,0); - - T nrm = Norm(H); ///DenseMatrix Norm - int n, m; - int e = 0; - int it = 0; - int tot_it = 0; - int l = 0; - int r = 0; - DenseMatrix P; Resize(P,N,N); - Unity(P); - DenseVector trows(N, 0); - /// Check if the matrix is really symm tridiag - RealD sth = 0; - for(int j = 0; j < N; ++j) - { - for(int i = j + 2; i < N; ++i) - { - if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) - { - std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; - std::cout << "Warning tridiagonalize and call again" << std::endl; - // exit(1); // see what is going on - //return; - } - } - } - - do{ - do{ - //Jasper - //Check if the subdiagonal term is small enough ( 0); - //Jasper - //Already converged - //-------------- - if(N == 0) break; - - DenseVector ck,v; Resize(ck,2); Resize(v,2); - - for(int m = N - 3; m >= l; m--) - { - ///Starting vector essentially random shift. - if(it%10 == 0 && N >= 3 && it > 0) - { - t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); - x = H[m][m] - t; - z = H[m + 1][m]; - } else { - ///Starting vector implicit Q theorem - d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; - t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2] - / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); - x = H[m][m] - t; - z = H[m + 1][m]; - } - //Jasper - //why it is here???? - //----------------------- - if(m == l) - break; - - u = abs(H[m][m - 1]) * (abs(y) + abs(z)); - d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); - if ((T)abs(u + d) == (T)abs(d)) - { - l = m; - break; - } - } - //Jasper - if(it > 1000000) - { - std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; - std::cout << "got " << e << " evals " << l << " " << N << std::endl; - exit(1); - } - // - T s, c; - Givens_calc(x, z, c, s); - Givens_mult(H, l, l + 1, c, -s, 0); - Givens_mult(H, l, l + 1, c, s, 1); - Givens_mult(P, l, l + 1, c, s, 1); - // - for(int k = l; k < N - 2; ++k) - { - x = H.A[k + 1][k]; - z = H.A[k + 2][k]; - Givens_calc(x, z, c, s); - Givens_mult(H, k + 1, k + 2, c, -s, 0); - Givens_mult(H, k + 1, k + 2, c, s, 1); - Givens_mult(P, k + 1, k + 2, c, s, 1); - } - it++; - tot_it++; - }while(N > 1); - - N = evals.size(); - ///Annoying - UT solves in reverse order; - DenseVector tmp(N); - for(int i = 0; i < N; ++i) - tmp[i] = evals[N-i-1]; - evals = tmp; - // - UTeigenvectors(H, trows, evals, evecs); - //UTSymmEigenvectors(H, trows, evals, evecs); - for(int i = 0; i < evals.size(); ++i) - { - evecs[i] = P * evecs[i]; - normalize(evecs[i]); - evals[i] = evals[i] * Hnorm; - } - // // FIXME this is to test - // Hin.write("evecs3", evecs); - // Hin.write("evals3", evals); - // // check rsd - // for(int i = 0; i < M; i++) { - // vector Aevec = Hin * evecs[i]; - // RealD norm2(0.); - // for(int j = 0; j < M; j++) { - // norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); - // } - // } - return tot_it; -} - -template -void Hess(DenseMatrix &A, DenseMatrix &Q, int start){ - - /** - turn a matrix A = - x x x x x - x x x x x - x x x x x - x x x x x - x x x x x - into - x x x x x - x x x x x - 0 x x x x - 0 0 x x x - 0 0 0 x x - with householder rotations - Slow. - */ - int N ; SizeSquare(A,N); - DenseVector p; Resize(p,N); Fill(p,0); - - for(int k=start;k ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); - for(int i=k+1;i(ck, 0, ck.size()-1, v, beta); ///Householder vector - Householder_mult(A,v,beta,start,k+1,N-1,0); ///A -> PA - Householder_mult(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H - ///Accumulate eigenvector - Householder_mult(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H - } - /*for(int l=0;l -void Tri(DenseMatrix &A, DenseMatrix &Q, int start){ -///Tridiagonalize a matrix - int N; SizeSquare(A,N); - Hess(A,Q,start); - /*for(int l=0;l -void ForceTridiagonal(DenseMatrix &A){ -///Tridiagonalize a matrix - int N ; SizeSquare(A,N); - for(int l=0;l -int my_SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - ///Solve a symmetric eigensystem, not necessarily in tridiagonal form - int N; SizeSquare(Ain,N); - DenseMatrix A; A = Ain; - DenseMatrix Q; Resize(Q,N,N); Unity(Q); - Tri(A,Q,0); - int it = my_Wilkinson(A, evals, evecs, small); - for(int k=0;k -int Wilkinson(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - return my_Wilkinson(Ain, evals, evecs, small); -} - -template -int SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - return my_SymmEigensystem(Ain, evals, evecs, small); -} - -template -int Eigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ -///Solve a general eigensystem, not necessarily in tridiagonal form - int N = Ain.dim; - DenseMatrix A(N); A = Ain; - DenseMatrix Q(N);Q.Unity(); - Hess(A,Q,0); - int it = QReigensystem(A, evals, evecs, small); - for(int k=0;k - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef HOUSEHOLDER_H -#define HOUSEHOLDER_H - -#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; -#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; -#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace Grid { -/** Comparison function for finding the max element in a vector **/ -template bool cf(T i, T j) { - return abs(i) < abs(j); -} - -/** - Calculate a real Givens angle - **/ -template inline void Givens_calc(T y, T z, T &c, T &s){ - - RealD mz = (RealD)abs(z); - - if(mz==0.0){ - c = 1; s = 0; - } - if(mz >= (RealD)abs(y)){ - T t = -y/z; - s = (T)1.0 / sqrt ((T)1.0 + t * t); - c = s * t; - } else { - T t = -z/y; - c = (T)1.0 / sqrt ((T)1.0 + t * t); - s = c * t; - } -} - -template inline void Givens_mult(DenseMatrix &A, int i, int k, T c, T s, int dir) -{ - int q ; SizeSquare(A,q); - - if(dir == 0){ - for(int j=0;j inline void Householder_vector(DenseVector input, int k, int j, DenseVector &v, T &beta) -{ - int N ; Size(input,N); - T m = *max_element(input.begin() + k, input.begin() + j + 1, cf ); - - if(abs(m) > 0.0){ - T alpha = 0; - - for(int i=k; i 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha; - else v[k] = -alpha; - } else{ - for(int i=k; i inline void Householder_vector(DenseVector input, int k, int j, int dir, DenseVector &v, T &beta) -{ - int N = input.size(); - T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); - - if(abs(m) > 0.0){ - T alpha = 0; - - for(int i=k; i 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; - else v[dir] = -alpha; - }else{ - for(int i=k; i inline void Householder_mult(DenseMatrix &A , DenseVector v, T beta, int l, int k, int j, int trans) -{ - int N ; SizeSquare(A,N); - - if(abs(beta) > 0.0){ - for(int p=l; p inline void Householder_mult_tri(DenseMatrix &A , DenseVector v, T beta, int l, int M, int k, int j, int trans) -{ - if(abs(beta) > 0.0){ - - int N ; SizeSquare(A,N); - - DenseMatrix tmp; Resize(tmp,N,N); Fill(tmp,0); - - T s; - for(int p=l; p +template using DenseVector = std::vector; + +//#include #include namespace Grid { @@ -47,104 +49,85 @@ namespace Grid { ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// - - template - class ImplicitlyRestartedLanczos { +class ImplicitlyRestartedLanczos { - const RealD small = 1.0e-16; public: - int lock; - int get; - int Niter; - int converged; + int Niter; // Max iterations + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + int Nm; // Nm -- total number of vectors - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Np; // Np -- Number of spare vecs in kryloc space - int Nm; // Nm -- total number of vectors + RealD eresid; - RealD eresid; + //////////////////////////////////// + // Embedded objects + //////////////////////////////////// + SortEigen _sort; + LinearOperatorBase &_Linop; + OperatorFunction &_poly; - SortEigen _sort; + ///////////////////////// + // Constructor + ///////////////////////// + ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op + OperatorFunction & poly, // polynmial + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // total vecs + RealD _eresid, // resid in lmdue deficit + int _Niter) : // Max iterations + _Linop(Linop), _poly(poly), + Nstop(_Nstop), Nk(_Nk), Nm(_Nm), + eresid(_eresid), Niter(_Niter) { }; -// GridCartesian &_fgrid; - - LinearOperatorBase &_Linop; - - OperatorFunction &_poly; - - ///////////////////////// - // Constructor - ///////////////////////// - void init(void){}; - void Abort(int ff, DenseVector &evals, DenseVector > &evecs); - - ImplicitlyRestartedLanczos( - LinearOperatorBase &Linop, // op - OperatorFunction & poly, // polynmial - int _Nstop, // sought vecs - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - int _Niter) : // Max iterations - _Linop(Linop), - _poly(poly), - Nstop(_Nstop), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - Niter(_Niter) - { - Np = Nm-Nk; assert(Np>0); - }; - - ImplicitlyRestartedLanczos( - LinearOperatorBase &Linop, // op +#if 0 + ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op OperatorFunction & poly, // polynmial int _Nk, // sought vecs - int _Nm, // spare vecs + int _Nm, // total vecs RealD _eresid, // resid in lmdue deficit int _Niter) : // Max iterations - _Linop(Linop), - _poly(poly), - Nstop(_Nk), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - Niter(_Niter) - { - Np = Nm-Nk; assert(Np>0); - }; + _Linop(Linop), _poly(poly), + Nstop(_Nk), Nk(_Nk), Nm(_Nm), + eresid(_eresid), Niter(_Niter) { }; +#endif - ///////////////////////// - // Sanity checked this routine (step) against Saad. - ///////////////////////// - void RitzMatrix(DenseVector& evec,int k){ +#if 0 + void calc(DenseVector& eval, + DenseVector& evec, + const Field& src, + int& Nconv); - if(1) return; + void step(DenseVector& lmd, + DenseVector& lme, + DenseVector& evec, + Field& w,int Nm,int k); - GridBase *grid = evec[0]._grid; - Field w(grid); - std::cout << "RitzMatrix "<1 ) { - if (abs(in) >1.0e-9 ) { - std::cout<<"oops"< &Qt) ; + + static RealD normalise(Field& v) ; + void orthogonalize(Field& w, DenseVector& evec, int k); + void diagonalize(DenseVector& lmd, + DenseVector& lme, + int N2, int N1, + DenseVector& Qt, + GridBase *grid); + + void qr_decomp(DenseVector& lmd, + DenseVector& lme, + int Nk, int Nm, + DenseVector& Qt, + RealD Dsh, int kmin, int kmax); + +#ifdef USE_LAPACK + void diagonalize_lapack(DenseVector& lmd, + DenseVector& lme, + int N1, int N2, + DenseVector& Qt, + GridBase *grid); +#endif +#endif /* Saad PP. 195 1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 @@ -161,12 +144,12 @@ public: DenseVector& evec, Field& w,int Nm,int k) { + const RealD tiny = 1.0e-20; assert( k< Nm ); _poly(_Linop,evec[k],w); // 3. wk:=Avk−βkv_{k−1} - if(k>0){ - w -= lme[k-1] * evec[k-1]; - } + + if(k>0) w -= lme[k-1] * evec[k-1]; ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) RealD alph = real(zalph); @@ -176,29 +159,20 @@ public: RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop // 7. vk+1 := wk/βk+1 -// std::cout << "alpha = " << zalph << " beta "<0) { - orthogonalize(w,evec,k); // orthonormalise - } - - if(k < Nm-1) evec[k+1] = w; + if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise + if ( k < Nm-1) evec[k+1] = w; + + if ( beta < tiny ) std::cout << " beta is tiny "<& lmd, - DenseVector& lme, - int Nk, - int Nm, - DenseVector& Qt, - RealD Dsh, - int kmin, - int kmax) + + void qr_decomp(DenseVector& lmd, // Nm + DenseVector& lme, // Nm + int Nk, int Nm, + DenseVector& Qt, // Nm x Nm matrix + RealD Dsh, int kmin, int kmax) { int k = kmin-1; RealD x; @@ -218,7 +192,7 @@ public: lme[k+1] = c*lme[k+1]; for(int i=0; i& lmd, - DenseVector& lme, - int N1, - int N2, - DenseVector& Qt, - GridBase *grid){ - const int size = Nm; -// tevals.resize(size); -// tevecs.resize(size); - int NN = N1; - double evals_tmp[NN]; - double evec_tmp[NN][NN]; - memset(evec_tmp[0],0,sizeof(double)*NN*NN); -// double AA[NN][NN]; - double DD[NN]; - double EE[NN]; - for (int i = 0; i< NN; i++) - for (int j = i - 1; j <= i + 1; j++) - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; + DenseVector& lme, + int N1, + int N2, + DenseVector& Qt, + GridBase *grid) + { + const int size = Nm; + int NN = N1; + double evals_tmp[NN]; + double evec_tmp[NN][NN]; + memset(evec_tmp[0],0,sizeof(double)*NN*NN); + double DD[NN]; + double EE[NN]; + for (int i = 0; i< NN; i++) { + for (int j = i - 1; j <= i + 1; j++) { + if ( j < NN && j >= 0 ) { + if (i==j) DD[i] = lmd[i]; + if (i==j) evals_tmp[i] = lmd[i]; + if (j==(i-1)) EE[j] = lme[j]; + } + } } - int evals_found; - int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - int liwork = 3+NN*10 ; - int iwork[liwork]; - double work[lwork]; - int isuppz[2*NN]; - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - int ifail[NN]; - int info; -// int total = QMP_get_number_of_nodes(); -// int node = QMP_get_node_number(); -// GridBase *grid = evec[0]._grid; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - int il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(evals_tmp,0,sizeof(double)*NN); - if ( il <= NN){ - printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); - LAPACK_dstegr(&jobz, &range, &NN, - (double*)DD, (double*)EE, - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, evals_tmp, (double*)evec_tmp, &NN, - isuppz, - work, &lwork, iwork, &liwork, - &info); - for (int i = iu-1; i>= il-1; i--){ - printf("node=%d evals_found=%d evals_tmp[%d] = %g\n",node,evals_found, i - (il-1),evals_tmp[i - (il-1)]); - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; - if (il>1) evec_tmp[i-(il-1)][j]=0.; - } - } + int evals_found; + int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; + int liwork = 3+NN*10 ; + int iwork[liwork]; + double work[lwork]; + int isuppz[2*NN]; + char jobz = 'V'; // calculate evals & evecs + char range = 'I'; // calculate all evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + int ifail[NN]; + int info; + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + int il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + double tol = 0.0; + if (1) { + memset(evals_tmp,0,sizeof(double)*NN); + if ( il <= NN){ + LAPACK_dstegr(&jobz, &range, &NN, + (double*)DD, (double*)EE, + &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, evals_tmp, (double*)evec_tmp, &NN, + isuppz, + work, &lwork, iwork, &liwork, + &info); + for (int i = iu-1; i>= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; + if (il>1) evec_tmp[i-(il-1)][j]=0.; + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,NN*NN); + } + } + // cheating a bit. + // It is better to sort instead of just reversing it, + // but the document of the routine says evals are sorted in increasing order. + // qr gives evals in decreasing order. + for(int i=0;iGlobalSumVector(evals_tmp,NN); - grid->GlobalSumVector((double*)evec_tmp,NN*NN); - } - } -// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. - for(int i=0;i& lmd, DenseVector& lme, int N2, @@ -354,24 +324,23 @@ public: if(!check_lapack) return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid); - DenseVector lmd2(N1); - DenseVector lme2(N1); - DenseVector Qt2(N1*N1); - for(int k=0; k lmd2(N1); + DenseVector lme2(N1); + DenseVector Qt2(N1*N1); + for(int k=0; k lmd3(N2); - for(int k=0; k lmd3(N2); + for(int k=0; kSMALL) std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <SMALL) std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <SMALL) std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] < &Qt) { for(int i=0; i & bq, - Field &bf, - DenseMatrix &H){ - - GridBase *grid = bq[0]._grid; - - RealD beta; - RealD sqbt; - RealD alpha; - - for(int i=start;i 1) std::cout << "orthagonality refined " << re << " times" < evals, - DenseVector evecs){ - int N= evals.size(); - _sort.push(evals,evecs, evals.size(),N); - } - - void ImplicitRestart(int TM, DenseVector &evals, DenseVector > &evecs, DenseVector &bq, Field &bf, int cont) - { - std::cout << "ImplicitRestart begin. Eigensort starting\n"; - - DenseMatrix H; Resize(H,Nm,Nm); - - EigenSort(evals, evecs); - - ///Assign shifts - int K=Nk; - int M=Nm; - int P=Np; - int converged=0; - if(K - converged < 4) P = (M - K-1); //one - // DenseVector shifts(P + shift_extra.size()); - DenseVector shifts(P); - for(int k = 0; k < P; ++k) - shifts[k] = evals[k]; - - /// Shift to form a new H and q - DenseMatrix Q; Resize(Q,TM,TM); - Unity(Q); - Shift(Q, shifts); // H is implicitly passed in in Rudy's Shift routine - - int ff = K; - - /// Shifted H defines a new K step Arnoldi factorization - RealD beta = H[ff][ff-1]; - RealD sig = Q[TM - 1][ff - 1]; - std::cout << "beta = " << beta << " sig = " << real(sig) < q Q - times_real(bq, Q, TM); - - std::cout << norm2(bq[0]) << " -- after " << ff < &bq, Field &bf, DenseVector > & evecs,DenseVector &evals) - { - init(); - - int M=Nm; - - DenseMatrix H; Resize(H,Nm,Nm); - Resize(evals,Nm); - Resize(evecs,Nm); - - int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with - - if(ff < M) { - std::cout << "Krylov: aborting ff "< " << it << std::endl; - int lock_num = lock ? converged : 0; - DenseVector tevals(M - lock_num ); - DenseMatrix tevecs; Resize(tevecs,M - lock_num,M - lock_num); - - //check residual of polynominal - TestConv(H,M, tevals, tevecs); - - if(converged >= Nk) - break; - - ImplicitRestart(ff, tevals,tevecs,H); - } - Wilkinson(H, evals, evecs, small); - // Check(); - - std::cout << "Done "< & H,DenseMatrix &Q, DenseVector shifts) { - - int P; Size(shifts,P); - int M; SizeSquare(Q,M); - - Unity(Q); - - int lock_num = lock ? converged : 0; - - RealD t_Househoulder_vector(0.0); - RealD t_Househoulder_mult(0.0); - - for(int i=0;i ck(3), v(3); - - x = H[lock_num+0][lock_num+0]-shifts[i]; - y = H[lock_num+1][lock_num+0]; - ck[0] = x; ck[1] = y; ck[2] = 0; - - normalise(ck); ///Normalization cancels in PHP anyway - RealD beta; - - Householder_vector(ck, 0, 2, v, beta); - Householder_mult(H,v,beta,0,lock_num+0,lock_num+2,0); - Householder_mult(H,v,beta,0,lock_num+0,lock_num+2,1); - ///Accumulate eigenvector - Householder_mult(Q,v,beta,0,lock_num+0,lock_num+2,1); - - int sw = 0; - for(int k=lock_num+0;k(ck, 0, 2-sw, v, beta); - Householder_mult(H,v, beta,0,k+1,k+3-sw,0); - Householder_mult(H,v, beta,0,k+1,k+3-sw,1); - ///Accumulate eigenvector - Householder_mult(Q,v, beta,0,k+1,k+3-sw,1); - } - } - } - - void TestConv(DenseMatrix & H,int SS, - DenseVector &bq, Field &bf, - DenseVector &tevals, DenseVector > &tevecs, - int lock, int converged) - { - std::cout << "Converged " << converged << " so far." << std::endl; - int lock_num = lock ? converged : 0; - int M = Nm; - - ///Active Factorization - DenseMatrix AH; Resize(AH,SS - lock_num,SS - lock_num ); - - AH = GetSubMtx(H,lock_num, SS, lock_num, SS); - - int NN=tevals.size(); - int AHsize=SS-lock_num; - - RealD small=1.0e-16; - Wilkinson(AH, tevals, tevecs, small); - - EigenSort(tevals, tevecs); - - RealD resid_nrm= norm2(bf); - - if(!lock) converged = 0; -#if 0 - for(int i = SS - lock_num - 1; i >= SS - Nk && i >= 0; --i){ - - RealD diff = 0; - diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm; - - std::cout << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl; - - if(diff < converged) { - - if(lock) { - - DenseMatrix Q; Resize(Q,M,M); - bool herm = true; - - Lock(H, Q, tevals[i], converged, small, SS, herm); - - times_real(bq, Q, bq.size()); - bf = Q[M - 1][M - 1]* bf; - lock_num++; - } - converged++; - std::cout << " converged on eval " << converged << " of " << Nk << std::endl; - } else { - break; - } - } -#endif - std::cout << "Got " << converged << " so far " < &evals, - DenseVector > &evecs) { - - DenseVector goodval(this->get); - - EigenSort(evals,evecs); - - int NM = Nm; - - DenseVector< DenseVector > V; Size(V,NM); - DenseVector QZ(NM*NM); - - for(int i = 0; i < NM; i++){ - for(int j = 0; j < NM; j++){ - // evecs[i][j]; - } - } - } - - -/** - There is some matrix Q such that for any vector y - Q.e_1 = y and Q is unitary. -**/ - template - static T orthQ(DenseMatrix &Q, DenseVector y){ - int N = y.size(); //Matrix Size - Fill(Q,0.0); - T tau; - for(int i=0;i 0.0){ - - T gam = conj( (y[j]/tau)/tau0 ); - for(int k=0;k<=j-1;k++){ - Q[k][j]=-gam*y[k]; - } - Q[j][j]=tau0/tau; - } else { - Q[j-1][j]=1.0; - } - tau0 = tau; - } - return tau; - } - -/** - There is some matrix Q such that for any vector y - Q.e_k = y and Q is unitary. -**/ - template< class T> - static T orthU(DenseMatrix &Q, DenseVector y){ - T tau = orthQ(Q,y); - SL(Q); - return tau; - } - - -/** - Wind up with a matrix with the first con rows untouched - -say con = 2 - Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum - and the matrix is upper hessenberg - and with f and Q appropriately modidied with Q is the arnoldi factorization - -**/ - -template -static void Lock(DenseMatrix &H, // Hess mtx - DenseMatrix &Q, // Lock Transform - T val, // value to be locked - int con, // number already locked - RealD small, - int dfg, - bool herm) -{ - //ForceTridiagonal(H); - - int M = H.dim; - DenseVector vec; Resize(vec,M-con); - - DenseMatrix AH; Resize(AH,M-con,M-con); - AH = GetSubMtx(H,con, M, con, M); - - DenseMatrix QQ; Resize(QQ,M-con,M-con); - - Unity(Q); Unity(QQ); - - DenseVector evals; Resize(evals,M-con); - DenseMatrix evecs; Resize(evecs,M-con,M-con); - - Wilkinson(AH, evals, evecs, small); - - int k=0; - RealD cold = abs( val - evals[k]); - for(int i=1;icon+2; j--){ - - DenseMatrix U; Resize(U,j-1-con,j-1-con); - DenseVector z; Resize(z,j-1-con); - T nm = norm(z); - for(int k = con+0;k Hb; Resize(Hb,j-1-con,M); - - for(int a = 0;a Qb; Resize(Qb,M,M); - - for(int a = 0;a Hc; Resize(Hc,M,M); - - for(int a = 0;a { FieldMetaData header; IldgReader _IldgReader; _IldgReader.open(config); - _IldgReader.readConfiguration(config,U,header); // format from the header + _IldgReader.readConfiguration(U,header); // format from the header _IldgReader.close(); std::cout << GridLogMessage << "Read ILDG Configuration from " << config diff --git a/tests/solver/Test_dwf_lanczos.cc b/tests/solver/Test_dwf_lanczos.cc index bb978186..48cca378 100644 --- a/tests/solver/Test_dwf_lanczos.cc +++ b/tests/solver/Test_dwf_lanczos.cc @@ -54,7 +54,7 @@ int main (int argc, char ** argv) GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); LatticeGaugeField Umu(UGrid); - SU3::TepidConfiguration(RNG4, Umu); + SU3::HotConfiguration(RNG4, Umu); std::vector U(4,UGrid); for(int mu=0;mu Date: Wed, 21 Jun 2017 02:26:03 +0100 Subject: [PATCH 2/5] Simplified lanczos, added Eigen diagonalisation. Curious if we can deprecate dependencly on BLAS. Will see when we get 48^3 running on our BG/Q port --- .../iterative/BlockConjugateGradient.h | 7 +- lib/algorithms/iterative/EigenSort.h | 81 -- .../iterative/ImplicitlyRestartedLanczos.h | 1074 +++++++++-------- tests/solver/Test_dwf_lanczos.cc | 9 +- 4 files changed, 547 insertions(+), 624 deletions(-) delete mode 100644 lib/algorithms/iterative/EigenSort.h diff --git a/lib/algorithms/iterative/BlockConjugateGradient.h b/lib/algorithms/iterative/BlockConjugateGradient.h index f8b83b1f..9418f63c 100644 --- a/lib/algorithms/iterative/BlockConjugateGradient.h +++ b/lib/algorithms/iterative/BlockConjugateGradient.h @@ -56,11 +56,8 @@ class BlockConjugateGradient : public OperatorFunction { Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) - : Tolerance(tol), - CGtype(cgtype), - blockDim(_Orthog), - MaxIterations(maxit), - ErrorOnNoConverge(err_on_no_conv){}; + : Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) + {}; //////////////////////////////////////////////////////////////////////////////////////////////////// // Thin QR factorisation (google it) diff --git a/lib/algorithms/iterative/EigenSort.h b/lib/algorithms/iterative/EigenSort.h deleted file mode 100644 index 23621544..00000000 --- a/lib/algorithms/iterative/EigenSort.h +++ /dev/null @@ -1,81 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/EigenSort.h - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_EIGENSORT_H -#define GRID_EIGENSORT_H - - -namespace Grid { - ///////////////////////////////////////////////////////////// - // Eigen sorter to begin with - ///////////////////////////////////////////////////////////// - -template -class SortEigen { - private: - -//hacking for testing for now - private: - static bool less_lmd(RealD left,RealD right){ - return left > right; - } - static bool less_pair(std::pair& left, - std::pair& right){ - return left.first > (right.first); - } - - - public: - - void push(DenseVector& lmd, - DenseVector& evec,int N) { - DenseVector cpy(lmd.size(),evec[0]._grid); - for(int i=0;i > emod(lmd.size()); - for(int i=0;i(lmd[i],&cpy[i]); - - partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); - - typename DenseVector >::iterator it = emod.begin(); - for(int i=0;ifirst; - evec[i]=*(it->second); - ++it; - } - } - void push(DenseVector& lmd,int N) { - std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); - } - bool saturated(RealD lmd, RealD thrs) { - return fabs(lmd) > fabs(thrs); - } -}; - -} -#endif diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index acd67592..571bf1b2 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -7,7 +7,8 @@ Copyright (C) 2015 Author: Peter Boyle -Author: paboyle +Author: Chulwoo Jung +Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,35 +32,71 @@ Author: paboyle #include //memset -#ifdef USE_LAPACK -void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, - double *vl, double *vu, int *il, int *iu, double *abstol, - int *m, double *w, double *z, int *ldz, int *isuppz, - double *work, int *lwork, int *iwork, int *liwork, - int *info); -#endif - -template using DenseVector = std::vector; - -//#include -#include - namespace Grid { + enum IRLdiagonalisation { + IRLdiagonaliseWithDSTEGR, + IRLdiagonaliseWithQR, + IRLdiagonaliseWithEigen + }; + //////////////////////////////////////////////////////////////////////////////// + // Helper class for sorting the evalues AND evectors by Field + // Use pointer swizzle on vectors + //////////////////////////////////////////////////////////////////////////////// +template +class SortEigen { + private: + static bool less_lmd(RealD left,RealD right){ + return left > right; + } + static bool less_pair(std::pair& left, + std::pair& right){ + return left.first > (right.first); + } + + public: + void push(std::vector& lmd,std::vector& evec,int N) { + + //////////////////////////////////////////////////////////////////////// + // PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set. + // : The vector reorder should be done by pointer swizzle somehow + //////////////////////////////////////////////////////////////////////// + std::vector cpy(lmd.size(),evec[0]._grid); + for(int i=0;i > emod(lmd.size()); + + for(int i=0;i(lmd[i],&cpy[i]); + + partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); + + typename std::vector >::iterator it = emod.begin(); + for(int i=0;ifirst; + evec[i]=*(it->second); + ++it; + } + } + void push(std::vector& lmd,int N) { + std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); + } + bool saturated(RealD lmd, RealD thrs) { + return fabs(lmd) > fabs(thrs); + } +}; + ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// template class ImplicitlyRestartedLanczos { - -public: - int Niter; // Max iterations - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Nm; // Nm -- total number of vectors - +private: + int MaxIter; // Max iterations + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + int Nm; // Nm -- total number of vectors RealD eresid; - + IRLdiagonalisation diagonalisation; //////////////////////////////////// // Embedded objects //////////////////////////////////// @@ -70,362 +107,20 @@ public: ///////////////////////// // Constructor ///////////////////////// +public: ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op - OperatorFunction & poly, // polynmial - int _Nstop, // sought vecs + OperatorFunction & poly, // polynomial + int _Nstop, // really sought vecs int _Nk, // sought vecs int _Nm, // total vecs - RealD _eresid, // resid in lmdue deficit - int _Niter) : // Max iterations + RealD _eresid, // resid in lmd deficit + int _MaxIter, // Max iterations + IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen ) : _Linop(Linop), _poly(poly), - Nstop(_Nstop), Nk(_Nk), Nm(_Nm), - eresid(_eresid), Niter(_Niter) { }; - -#if 0 - ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op - OperatorFunction & poly, // polynmial - int _Nk, // sought vecs - int _Nm, // total vecs - RealD _eresid, // resid in lmdue deficit - int _Niter) : // Max iterations - _Linop(Linop), _poly(poly), - Nstop(_Nk), Nk(_Nk), Nm(_Nm), - eresid(_eresid), Niter(_Niter) { }; -#endif - -#if 0 - void calc(DenseVector& eval, - DenseVector& evec, - const Field& src, - int& Nconv); - - void step(DenseVector& lmd, - DenseVector& lme, - DenseVector& evec, - Field& w,int Nm,int k); - - void setUnit_Qt(int Nm, DenseVector &Qt) ; - - static RealD normalise(Field& v) ; - void orthogonalize(Field& w, DenseVector& evec, int k); - void diagonalize(DenseVector& lmd, - DenseVector& lme, - int N2, int N1, - DenseVector& Qt, - GridBase *grid); - - void qr_decomp(DenseVector& lmd, - DenseVector& lme, - int Nk, int Nm, - DenseVector& Qt, - RealD Dsh, int kmin, int kmax); - -#ifdef USE_LAPACK - void diagonalize_lapack(DenseVector& lmd, - DenseVector& lme, - int N1, int N2, - DenseVector& Qt, - GridBase *grid); -#endif -#endif - -/* Saad PP. 195 -1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 -2. For k = 1,2,...,m Do: -3. wk:=Avk−βkv_{k−1} -4. αk:=(wk,vk) // -5. wk:=wk−αkvk // wk orthog vk -6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop -7. vk+1 := wk/βk+1 -8. EndDo - */ - void step(DenseVector& lmd, - DenseVector& lme, - DenseVector& evec, - Field& w,int Nm,int k) - { - const RealD tiny = 1.0e-20; - assert( k< Nm ); - - _poly(_Linop,evec[k],w); // 3. wk:=Avk−βkv_{k−1} - - if(k>0) w -= lme[k-1] * evec[k-1]; - - ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) - RealD alph = real(zalph); - - w = w - alph * evec[k];// 5. wk:=wk−αkvk - - RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop - // 7. vk+1 := wk/βk+1 - - lmd[k] = alph; - lme[k] = beta; - - if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise - if ( k < Nm-1) evec[k+1] = w; - - if ( beta < tiny ) std::cout << " beta is tiny "<& lmd, // Nm - DenseVector& lme, // Nm - int Nk, int Nm, - DenseVector& Qt, // Nm x Nm matrix - RealD Dsh, int kmin, int kmax) - { - int k = kmin-1; - RealD x; - - RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); - RealD c = ( lmd[k] -Dsh) *Fden; - RealD s = -lme[k] *Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; - - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - x =-s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - - for(int i=0; i& lmd, - DenseVector& lme, - int N1, - int N2, - DenseVector& Qt, - GridBase *grid) - { - const int size = Nm; - int NN = N1; - double evals_tmp[NN]; - double evec_tmp[NN][NN]; - memset(evec_tmp[0],0,sizeof(double)*NN*NN); - double DD[NN]; - double EE[NN]; - for (int i = 0; i< NN; i++) { - for (int j = i - 1; j <= i + 1; j++) { - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; - } - } - } - int evals_found; - int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - int liwork = 3+NN*10 ; - int iwork[liwork]; - double work[lwork]; - int isuppz[2*NN]; - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - int ifail[NN]; - int info; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - int il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(evals_tmp,0,sizeof(double)*NN); - if ( il <= NN){ - LAPACK_dstegr(&jobz, &range, &NN, - (double*)DD, (double*)EE, - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, evals_tmp, (double*)evec_tmp, &NN, - isuppz, - work, &lwork, iwork, &liwork, - &info); - for (int i = iu-1; i>= il-1; i--){ - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; - if (il>1) evec_tmp[i-(il-1)][j]=0.; - } - } - } - { - grid->GlobalSumVector(evals_tmp,NN); - grid->GlobalSumVector((double*)evec_tmp,NN*NN); - } - } - // cheating a bit. - // It is better to sort instead of just reversing it, - // but the document of the routine says evals are sorted in increasing order. - // qr gives evals in decreasing order. - for(int i=0;i& lmd, - DenseVector& lme, - int N2, - int N1, - DenseVector& Qt, - GridBase *grid) - { - -#ifdef USE_LAPACK - const int check_lapack=0; // just use lapack if 0, check against lapack if 1 - - if(!check_lapack) - return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid); - - DenseVector lmd2(N1); - DenseVector lme2(N1); - DenseVector Qt2(N1*N1); - for(int k=0; k= kmin; --j){ - RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); - if(fabs(lme[j-1])+dds > dds){ - kmax = j+1; - goto continued; - } - } - Niter = iter; -#ifdef USE_LAPACK - if(check_lapack){ - const double SMALL=1e-8; - diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid); - DenseVector lmd3(N2); - for(int k=0; kSMALL) std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] < dds){ - kmin = j+1; - break; - } - } - } - std::cout << "[QL method] Error - Too many iteration: "<& evec, - int k) - { - typedef typename Field::scalar_type MyComplex; - MyComplex ip; - - if ( 0 ) { - for(int j=0; j &Qt) { - for(int i=0; i& eval, - DenseVector& evec, - const Field& src, - int& Nconv) - { - - GridBase *grid = evec[0]._grid; - assert(grid == src._grid); - - std::cout << " -- seek Nk = " << Nk <<" vectors"<< std::endl; - std::cout << " -- accept Nstop = " << Nstop <<" vectors"<< std::endl; - std::cout << " -- total Nm = " << Nm <<" vectors"<< std::endl; - std::cout << " -- size of eval = " << eval.size() << std::endl; - std::cout << " -- size of evec = " << evec.size() << std::endl; - - assert(Nm == evec.size() && Nm == eval.size()); - - DenseVector lme(Nm); - DenseVector lme2(Nm); - DenseVector eval2(Nm); - DenseVector Qt(Nm*Nm); - DenseVector Iconv(Nm); - - DenseVector B(Nm,grid); // waste of space replicating - - Field f(grid); - Field v(grid); - - int k1 = 1; - int k2 = Nk; - - Nconv = 0; - - RealD beta_k; - - // Set initial vector - evec[0] = src; - std:: cout <<"norm2(src)= " << norm2(src)<& eval, std::vector& evec, const Field& src, int& Nconv) + { - for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; - - for(int j=k1-1; j=Nstop ){ - goto converged; - } - } // end of iter loop + GridBase *grid = evec[0]._grid; + assert(grid == src._grid); + + std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + std::cout << GridLogMessage <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; + std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + std::cout << GridLogMessage <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; + std::cout << GridLogMessage <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; + std::cout << GridLogMessage <<" -- total Nm = " << Nm <<" vectors"<< std::endl; + std::cout << GridLogMessage <<" -- size of eval = " << eval.size() << std::endl; + std::cout << GridLogMessage <<" -- size of evec = " << evec.size() << std::endl; + if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { + std::cout << GridLogMessage << "Diagonalisation is DSTEGR "< lme(Nm); + std::vector lme2(Nm); + std::vector eval2(Nm); + Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); + std::vector Iconv(Nm); + + std::vector B(Nm,grid); // waste of space replicating + + Field f(grid); + Field v(grid); + + int k1 = 1; + int k2 = Nk; + + Nconv = 0; + + RealD beta_k; + + // Set initial vector + evec[0] = src; + std::cout << GridLogMessage <<"norm2(src)= " << norm2(src)<=Nstop ){ + goto converged; + } + } // end of iter loop + + std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + std::cout<< GridLogError <<" ImplicitlyRestartedLanczos::calc() NOT converged."; + std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + abort(); + + converged: + // Sorting + eval.resize(Nconv); + evec.resize(Nconv,grid); + for(int i=0; i& lmd, + std::vector& lme, + std::vector& evec, + Field& w,int Nm,int k) + { + const RealD tiny = 1.0e-20; + assert( k< Nm ); + + _poly(_Linop,evec[k],w); // 3. wk:=Avk−βkv_{k−1} + + if(k>0) w -= lme[k-1] * evec[k-1]; + + ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) + RealD alph = real(zalph); + + w = w - alph * evec[k];// 5. wk:=wk−αkvk + + RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop + // 7. vk+1 := wk/βk+1 + + lmd[k] = alph; + lme[k] = beta; + + if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise + if ( k < Nm-1) evec[k+1] = w; + + if ( beta < tiny ) std::cout << GridLogMessage << " beta is tiny "<& lmd, // Nm + std::vector& lme, // Nm + int Nk, int Nm, // Nk, Nm + Eigen::MatrixXd& Qt, // Nm x Nm matrix + RealD Dsh, int kmin, int kmax) + { + int k = kmin-1; + RealD x; + + RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); + RealD c = ( lmd[k] -Dsh) *Fden; + RealD s = -lme[k] *Fden; + + RealD tmpa1 = lmd[k]; + RealD tmpa2 = lmd[k+1]; + RealD tmpb = lme[k]; + + lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; + lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; + lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; + x =-s*lme[k+1]; + lme[k+1] = c*lme[k+1]; + + for(int i=0; i& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) + { + Qt = Eigen::MatrixXd::Identity(Nm,Nm); + if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { + diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid); + } else if ( diagonalisation == IRLdiagonaliseWithQR ) { + diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid); + } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { + diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid); + } else { + assert(0); + } + } + +#ifdef USE_LAPACK +void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, + double *vl, double *vu, int *il, int *iu, double *abstol, + int *m, double *w, double *z, int *ldz, int *isuppz, + double *work, int *lwork, int *iwork, int *liwork, + int *info); #endif +void diagonalize_lapack(std::vector& lmd, + std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd& Qt, + GridBase *grid) +{ +#ifdef USE_LAPACK + const int size = Nm; + int NN = Nk; + double evals_tmp[NN]; + double evec_tmp[NN][NN]; + memset(evec_tmp[0],0,sizeof(double)*NN*NN); + double DD[NN]; + double EE[NN]; + for (int i = 0; i< NN; i++) { + for (int j = i - 1; j <= i + 1; j++) { + if ( j < NN && j >= 0 ) { + if (i==j) DD[i] = lmd[i]; + if (i==j) evals_tmp[i] = lmd[i]; + if (j==(i-1)) EE[j] = lme[j]; + } + } + } + int evals_found; + int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; + int liwork = 3+NN*10 ; + int iwork[liwork]; + double work[lwork]; + int isuppz[2*NN]; + char jobz = 'V'; // calculate evals & evecs + char range = 'I'; // calculate all evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + int ifail[NN]; + int info; + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + int il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + double tol = 0.0; + if (1) { + memset(evals_tmp,0,sizeof(double)*NN); + if ( il <= NN){ + LAPACK_dstegr(&jobz, &range, &NN, + (double*)DD, (double*)EE, + &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, evals_tmp, (double*)evec_tmp, &NN, + isuppz, + work, &lwork, iwork, &liwork, + &info); + for (int i = iu-1; i>= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; + if (il>1) evec_tmp[i-(il-1)][j]=0.; + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,NN*NN); + } + } + // Safer to sort instead of just reversing it, + // but the document of the routine says evals are sorted in increasing order. + // qr gives evals in decreasing order. + for(int i=0;i& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) + { + int Niter = 100*Nm; + int kmin = 1; + int kmax = Nk; + + // (this should be more sophisticated) + for(int iter=0; iter= kmin; --j){ + RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); + if(fabs(lme[j-1])+dds > dds){ + kmax = j+1; + goto continued; + } + } + Niter = iter; + return; + + continued: + for(int j=0; j dds){ + kmin = j+1; + break; + } + } + } + std::cout << GridLogError << "[QL method] Error - Too many iteration: "<& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, // Nm x Nm + GridBase *grid) + { + Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); + + for(int i=0;i eigensolver(TriDiag); + + for (int i = 0; i < Nk; i++) { + lmd[Nk-1-i] = eigensolver.eigenvalues()(i); + } + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { + Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); + } + } + } + + + static RealD normalise(Field& v) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void orthogonalize(Field& w, std::vector& evec, int k) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; + + for(int j=0; j eval(Nm); - FermionField src(FrbGrid); gaussian(RNG5rb,src); + FermionField src(FrbGrid); + gaussian(RNG5rb,src); std::vector evec(Nm,FrbGrid); for(int i=0;i<1;i++){ - std::cout << i<<" / "<< Nm<< " grid pointer "< Date: Wed, 21 Jun 2017 02:50:09 +0100 Subject: [PATCH 3/5] Clean up finished. Could shrink Lanczos to around 400 lines at a push --- .../iterative/ImplicitlyRestartedLanczos.h | 114 +++++++++--------- tests/debug/Test_synthetic_lanczos.cc | 4 +- 2 files changed, 62 insertions(+), 56 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 571bf1b2..a8723f32 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -39,10 +39,11 @@ namespace Grid { IRLdiagonaliseWithQR, IRLdiagonaliseWithEigen }; - //////////////////////////////////////////////////////////////////////////////// - // Helper class for sorting the evalues AND evectors by Field - // Use pointer swizzle on vectors - //////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////// +// Helper class for sorting the evalues AND evectors by Field +// Use pointer swizzle on vectors +//////////////////////////////////////////////////////////////////////////////// template class SortEigen { private: @@ -90,7 +91,9 @@ class SortEigen { ///////////////////////////////////////////////////////////// template class ImplicitlyRestartedLanczos { + private: + int MaxIter; // Max iterations int Nstop; // Number of evecs checked for convergence int Nk; // Number of converged sought @@ -122,6 +125,29 @@ public: diagonalisation(_diagonalisation) { }; + //////////////////////////////// + // Helpers + //////////////////////////////// + static RealD normalise(Field& v) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void orthogonalize(Field& w, std::vector& evec, int k) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; + + for(int j=0; j K P = M − K † @@ -167,9 +193,10 @@ until convergence std::vector lme(Nm); std::vector lme2(Nm); std::vector eval2(Nm); - Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); - std::vector Iconv(Nm); + Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); + + std::vector Iconv(Nm); std::vector B(Nm,grid); // waste of space replicating Field f(grid); @@ -218,6 +245,7 @@ until convergence // Implicitly shifted QR transformations Qt = Eigen::MatrixXd::Identity(Nm,Nm); for(int ip=k2; ip& lmd, std::vector& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, // Nm x Nm + GridBase *grid) + { + Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); + + for(int i=0;i eigensolver(TriDiag); + + for (int i = 0; i < Nk; i++) { + lmd[Nk-1-i] = eigensolver.eigenvalues()(i); + } + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { + Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); + } + } + } + /////////////////////////////////////////////////////////////////////////// + // File could end here if settle on Eigen ??? + /////////////////////////////////////////////////////////////////////////// + void qr_decomp(std::vector& lmd, // Nm std::vector& lme, // Nm int Nk, int Nm, // Nk, Nm @@ -570,50 +620,6 @@ void diagonalize_lapack(std::vector& lmd, abort(); } - void diagonalize_Eigen(std::vector& lmd, std::vector& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, // Nm x Nm - GridBase *grid) - { - Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); - - for(int i=0;i eigensolver(TriDiag); - - for (int i = 0; i < Nk; i++) { - lmd[Nk-1-i] = eigensolver.eigenvalues()(i); - } - for (int i = 0; i < Nk; i++) { - for (int j = 0; j < Nk; j++) { - Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); - } - } - } - - - static RealD normalise(Field& v) - { - RealD nn = norm2(v); - nn = sqrt(nn); - v = v * (1.0/nn); - return nn; - } - - void orthogonalize(Field& w, std::vector& evec, int k) - { - typedef typename Field::scalar_type MyComplex; - MyComplex ip; - - for(int j=0; j IRL(HermOp,X,Nk,Nm,eresid,Nit); - ImplicitlyRestartedLanczos ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit); + ImplicitlyRestartedLanczos IRL(HermOp,X,Nk,Nk,Nm,eresid,Nit); + ImplicitlyRestartedLanczos ChebyIRL(HermOp,Cheby,Nk,Nk,Nm,eresid,Nit); LatticeComplex src(grid); gaussian(RNG,src); { From ef4f2b8c410d449ff0beea1682cfc3de9bda3f79 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 21 Jun 2017 09:22:20 +0100 Subject: [PATCH 4/5] todo update --- TODO | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/TODO b/TODO index eeb7dfa5..8f80903e 100644 --- a/TODO +++ b/TODO @@ -2,8 +2,8 @@ TODO: --------------- Large item work list: -1)- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- -2)- MultiRHS with spread out extra dim +1)- MultiRHS with spread out extra dim +2)- Christoph's local basis expansion Lanczos 3)- BG/Q port and check 4)- Precision conversion and sort out localConvert <-- partial - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet @@ -13,6 +13,7 @@ Large item work list: 8)- HDCR resume Recent DONE +-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE -- GaugeFix into central location <-- DONE -- Scidac and Ildg metadata handling <-- DONE -- Binary I/O MPI2 IO <-- DONE From 9e56c6573007ccc857571aefa2ce3b6851f7b891 Mon Sep 17 00:00:00 2001 From: paboyle Date: Wed, 21 Jun 2017 14:02:58 +0100 Subject: [PATCH 5/5] Updated TODO list --- TODO | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TODO b/TODO index 8f80903e..001c6c0c 100644 --- a/TODO +++ b/TODO @@ -2,7 +2,8 @@ TODO: --------------- Large item work list: -1)- MultiRHS with spread out extra dim +1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O + 2)- Christoph's local basis expansion Lanczos 3)- BG/Q port and check 4)- Precision conversion and sort out localConvert <-- partial