diff --git a/TODO b/TODO index a5d4cabd..eeb7dfa5 100644 --- a/TODO +++ b/TODO @@ -1,24 +1,28 @@ TODO: --------------- -Peter's work list: -1)- Precision conversion and sort out localConvert <-- -2)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- - --- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet --- Physical propagator interface --- Conserved currents --- GaugeFix into central location --- Multigrid Wilson and DWF, compare to other Multigrid implementations --- HDCR resume +Large item work list: +1)- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- +2)- MultiRHS with spread out extra dim +3)- BG/Q port and check +4)- Precision conversion and sort out localConvert <-- partial + - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet +5)- Physical propagator interface +6)- Conserved currents +7)- Multigrid Wilson and DWF, compare to other Multigrid implementations +8)- HDCR resume Recent DONE +-- GaugeFix into central location <-- DONE +-- Scidac and Ildg metadata handling <-- DONE +-- Binary I/O MPI2 IO <-- DONE -- Binary I/O speed up & x-strips <-- DONE -- Cut down the exterior overhead <-- DONE -- Interior legs from SHM comms <-- DONE -- Half-precision comms <-- DONE --- Merge high precision reduction into develop --- multiRHS DWF; benchmark on Cori/BNL for comms elimination +-- Merge high precision reduction into develop <-- DONE +-- BlockCG, BCGrQ <-- DONE +-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE -- slice* linalg routines for multiRHS, BlockCG ----- diff --git a/lib/algorithms/densematrix/DenseMatrix.h b/lib/algorithms/densematrix/DenseMatrix.h deleted file mode 100644 index d86add21..00000000 --- a/lib/algorithms/densematrix/DenseMatrix.h +++ /dev/null @@ -1,137 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/DenseMatrix.h - - Copyright (C) 2015 - -Author: Peter Boyle -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_DENSE_MATRIX_H -#define GRID_DENSE_MATRIX_H - -namespace Grid { - ///////////////////////////////////////////////////////////// - // Matrix untils - ///////////////////////////////////////////////////////////// - -template using DenseVector = std::vector; -template using DenseMatrix = DenseVector >; - -template void Size(DenseVector & vec, int &N) -{ - N= vec.size(); -} -template void Size(DenseMatrix & mat, int &N,int &M) -{ - N= mat.size(); - M= mat[0].size(); -} - -template void SizeSquare(DenseMatrix & mat, int &N) -{ - int M; Size(mat,N,M); - assert(N==M); -} - -template void Resize(DenseVector & mat, int N) { - mat.resize(N); -} -template void Resize(DenseMatrix & mat, int N, int M) { - mat.resize(N); - for(int i=0;i void Fill(DenseMatrix & mat, T&val) { - int N,M; - Size(mat,N,M); - for(int i=0;i DenseMatrix Transpose(DenseMatrix & mat){ - int N,M; - Size(mat,N,M); - DenseMatrix C; Resize(C,M,N); - for(int i=0;i void Unity(DenseMatrix &A){ - int N; SizeSquare(A,N); - for(int i=0;i -void PlusUnit(DenseMatrix & A,T c){ - int dim; SizeSquare(A,dim); - for(int i=0;i -DenseMatrix HermitianConj(DenseMatrix &mat){ - - int dim; SizeSquare(mat,dim); - - DenseMatrix C; Resize(C,dim,dim); - - for(int i=0;i -DenseMatrix GetSubMtx(DenseMatrix &A,int row_st, int row_end, int col_st, int col_end) -{ - DenseMatrix H; Resize(H,row_end - row_st,col_end-col_st); - - for(int i = row_st; i - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef FRANCIS_H -#define FRANCIS_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -//#include -//#include -//#include - -namespace Grid { - -template int SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseMatrix &evecs, RealD small); -template int Eigensystem(DenseMatrix &Ain, DenseVector &evals, DenseMatrix &evecs, RealD small); - -/** - Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. -H = - x x x x x x x x x - x x x x x x x x x - 0 x x x x x x x x - 0 0 x x x x x x x - 0 0 0 x x x x x x - 0 0 0 0 x x x x x - 0 0 0 0 0 x x x x - 0 0 0 0 0 0 x x x - 0 0 0 0 0 0 0 x x -Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. -**/ -template -int QReigensystem(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small) -{ - DenseMatrix H = Hin; - - int N ; SizeSquare(H,N); - int M = N; - - Fill(evals,0); - Fill(evecs,0); - - T s,t,x=0,y=0,z=0; - T u,d; - T apd,amd,bc; - DenseVector p(N,0); - T nrm = Norm(H); ///DenseMatrix Norm - int n, m; - int e = 0; - int it = 0; - int tot_it = 0; - int l = 0; - int r = 0; - DenseMatrix P; Resize(P,N,N); Unity(P); - DenseVector trows(N,0); - - /// Check if the matrix is really hessenberg, if not abort - RealD sth = 0; - for(int j=0;j small){ - std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; - exit(1); - } - } - } - - do{ - std::cout << "Francis QR Step N = " << N << std::endl; - /** Check for convergence - x x x x x - 0 x x x x - 0 0 x x x - 0 0 x x x - 0 0 0 0 x - for this matrix l = 4 - **/ - do{ - l = Chop_subdiag(H,nrm,e,small); - r = 0; ///May have converged on more than one eval - ///Single eval - if(l == N-1){ - evals[e] = H[l][l]; - N--; e++; r++; it = 0; - } - ///RealD eval - if(l == N-2){ - trows[l+1] = 1; ///Needed for UTSolve - apd = H[l][l] + H[l+1][l+1]; - amd = H[l][l] - H[l+1][l+1]; - bc = (T)4.0*H[l+1][l]*H[l][l+1]; - evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) ); - evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); - N-=2; e+=2; r++; it = 0; - } - } while(r>0); - - if(N ==0) break; - - DenseVector ck; Resize(ck,3); - DenseVector v; Resize(v,3); - - for(int m = N-3; m >= l; m--){ - ///Starting vector essentially random shift. - if(it%10 == 0 && N >= 3 && it > 0){ - s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); - t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); - x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; - y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); - z = H[m+1][m]*H[m+2][m+1]; - } - ///Starting vector implicit Q theorem - else{ - s = (H[N-2][N-2] + H[N-1][N-1]); - t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); - x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; - y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); - z = H[m+1][m]*H[m+2][m+1]; - } - ck[0] = x; ck[1] = y; ck[2] = z; - - if(m == l) break; - - /** Some stupid thing from numerical recipies, seems to work**/ - // PAB.. for heaven's sake quote page, purpose, evidence it works. - // what sort of comment is that!?!?!? - u=abs(H[m][m-1])*(abs(y)+abs(z)); - d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); - if ((T)abs(u+d) == (T)abs(d) ){ - l = m; break; - } - - //if (u < small){l = m; break;} - } - if(it > 100000){ - std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; - std::cout << "got " << e << " evals " << l << " " << N << std::endl; - exit(1); - } - normalize(ck); ///Normalization cancels in PHP anyway - T beta; - Householder_vector(ck, 0, 2, v, beta); - Householder_mult(H,v,beta,0,l,l+2,0); - Householder_mult(H,v,beta,0,l,l+2,1); - ///Accumulate eigenvector - Householder_mult(P,v,beta,0,l,l+2,1); - int sw = 0; ///Are we on the last row? - for(int k=l;k(ck, 0, 2-sw, v, beta); - Householder_mult(H,v, beta,0,k+1,k+3-sw,0); - Householder_mult(H,v, beta,0,k+1,k+3-sw,1); - ///Accumulate eigenvector - Householder_mult(P,v, beta,0,k+1,k+3-sw,1); - } - it++; - tot_it++; - }while(N > 1); - N = evals.size(); - ///Annoying - UT solves in reverse order; - DenseVector tmp; Resize(tmp,N); - for(int i=0;i -int my_Wilkinson(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small) -{ - /** - Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. - H = - x x 0 0 0 0 - x x x 0 0 0 - 0 x x x 0 0 - 0 0 x x x 0 - 0 0 0 x x x - 0 0 0 0 x x - Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/ - return my_Wilkinson(Hin, evals, evecs, small, small); -} - -template -int my_Wilkinson(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small, RealD tol) -{ - int N; SizeSquare(Hin,N); - int M = N; - - ///I don't want to modify the input but matricies must be passed by reference - //Scale a matrix by its "norm" - //RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm); - DenseMatrix H; H = Hin; - - RealD Hnorm = abs(Norm(Hin)); - H = H * (1.0 / Hnorm); - - // TODO use openmp and memset - Fill(evals,0); - Fill(evecs,0); - - T s, t, x = 0, y = 0, z = 0; - T u, d; - T apd, amd, bc; - DenseVector p; Resize(p,N); Fill(p,0); - - T nrm = Norm(H); ///DenseMatrix Norm - int n, m; - int e = 0; - int it = 0; - int tot_it = 0; - int l = 0; - int r = 0; - DenseMatrix P; Resize(P,N,N); - Unity(P); - DenseVector trows(N, 0); - /// Check if the matrix is really symm tridiag - RealD sth = 0; - for(int j = 0; j < N; ++j) - { - for(int i = j + 2; i < N; ++i) - { - if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) - { - std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; - std::cout << "Warning tridiagonalize and call again" << std::endl; - // exit(1); // see what is going on - //return; - } - } - } - - do{ - do{ - //Jasper - //Check if the subdiagonal term is small enough ( 0); - //Jasper - //Already converged - //-------------- - if(N == 0) break; - - DenseVector ck,v; Resize(ck,2); Resize(v,2); - - for(int m = N - 3; m >= l; m--) - { - ///Starting vector essentially random shift. - if(it%10 == 0 && N >= 3 && it > 0) - { - t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); - x = H[m][m] - t; - z = H[m + 1][m]; - } else { - ///Starting vector implicit Q theorem - d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; - t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2] - / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); - x = H[m][m] - t; - z = H[m + 1][m]; - } - //Jasper - //why it is here???? - //----------------------- - if(m == l) - break; - - u = abs(H[m][m - 1]) * (abs(y) + abs(z)); - d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); - if ((T)abs(u + d) == (T)abs(d)) - { - l = m; - break; - } - } - //Jasper - if(it > 1000000) - { - std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; - std::cout << "got " << e << " evals " << l << " " << N << std::endl; - exit(1); - } - // - T s, c; - Givens_calc(x, z, c, s); - Givens_mult(H, l, l + 1, c, -s, 0); - Givens_mult(H, l, l + 1, c, s, 1); - Givens_mult(P, l, l + 1, c, s, 1); - // - for(int k = l; k < N - 2; ++k) - { - x = H.A[k + 1][k]; - z = H.A[k + 2][k]; - Givens_calc(x, z, c, s); - Givens_mult(H, k + 1, k + 2, c, -s, 0); - Givens_mult(H, k + 1, k + 2, c, s, 1); - Givens_mult(P, k + 1, k + 2, c, s, 1); - } - it++; - tot_it++; - }while(N > 1); - - N = evals.size(); - ///Annoying - UT solves in reverse order; - DenseVector tmp(N); - for(int i = 0; i < N; ++i) - tmp[i] = evals[N-i-1]; - evals = tmp; - // - UTeigenvectors(H, trows, evals, evecs); - //UTSymmEigenvectors(H, trows, evals, evecs); - for(int i = 0; i < evals.size(); ++i) - { - evecs[i] = P * evecs[i]; - normalize(evecs[i]); - evals[i] = evals[i] * Hnorm; - } - // // FIXME this is to test - // Hin.write("evecs3", evecs); - // Hin.write("evals3", evals); - // // check rsd - // for(int i = 0; i < M; i++) { - // vector Aevec = Hin * evecs[i]; - // RealD norm2(0.); - // for(int j = 0; j < M; j++) { - // norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); - // } - // } - return tot_it; -} - -template -void Hess(DenseMatrix &A, DenseMatrix &Q, int start){ - - /** - turn a matrix A = - x x x x x - x x x x x - x x x x x - x x x x x - x x x x x - into - x x x x x - x x x x x - 0 x x x x - 0 0 x x x - 0 0 0 x x - with householder rotations - Slow. - */ - int N ; SizeSquare(A,N); - DenseVector p; Resize(p,N); Fill(p,0); - - for(int k=start;k ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); - for(int i=k+1;i(ck, 0, ck.size()-1, v, beta); ///Householder vector - Householder_mult(A,v,beta,start,k+1,N-1,0); ///A -> PA - Householder_mult(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H - ///Accumulate eigenvector - Householder_mult(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H - } - /*for(int l=0;l -void Tri(DenseMatrix &A, DenseMatrix &Q, int start){ -///Tridiagonalize a matrix - int N; SizeSquare(A,N); - Hess(A,Q,start); - /*for(int l=0;l -void ForceTridiagonal(DenseMatrix &A){ -///Tridiagonalize a matrix - int N ; SizeSquare(A,N); - for(int l=0;l -int my_SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - ///Solve a symmetric eigensystem, not necessarily in tridiagonal form - int N; SizeSquare(Ain,N); - DenseMatrix A; A = Ain; - DenseMatrix Q; Resize(Q,N,N); Unity(Q); - Tri(A,Q,0); - int it = my_Wilkinson(A, evals, evecs, small); - for(int k=0;k -int Wilkinson(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - return my_Wilkinson(Ain, evals, evecs, small); -} - -template -int SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - return my_SymmEigensystem(Ain, evals, evecs, small); -} - -template -int Eigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ -///Solve a general eigensystem, not necessarily in tridiagonal form - int N = Ain.dim; - DenseMatrix A(N); A = Ain; - DenseMatrix Q(N);Q.Unity(); - Hess(A,Q,0); - int it = QReigensystem(A, evals, evecs, small); - for(int k=0;k - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef HOUSEHOLDER_H -#define HOUSEHOLDER_H - -#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; -#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; -#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace Grid { -/** Comparison function for finding the max element in a vector **/ -template bool cf(T i, T j) { - return abs(i) < abs(j); -} - -/** - Calculate a real Givens angle - **/ -template inline void Givens_calc(T y, T z, T &c, T &s){ - - RealD mz = (RealD)abs(z); - - if(mz==0.0){ - c = 1; s = 0; - } - if(mz >= (RealD)abs(y)){ - T t = -y/z; - s = (T)1.0 / sqrt ((T)1.0 + t * t); - c = s * t; - } else { - T t = -z/y; - c = (T)1.0 / sqrt ((T)1.0 + t * t); - s = c * t; - } -} - -template inline void Givens_mult(DenseMatrix &A, int i, int k, T c, T s, int dir) -{ - int q ; SizeSquare(A,q); - - if(dir == 0){ - for(int j=0;j inline void Householder_vector(DenseVector input, int k, int j, DenseVector &v, T &beta) -{ - int N ; Size(input,N); - T m = *max_element(input.begin() + k, input.begin() + j + 1, cf ); - - if(abs(m) > 0.0){ - T alpha = 0; - - for(int i=k; i 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha; - else v[k] = -alpha; - } else{ - for(int i=k; i inline void Householder_vector(DenseVector input, int k, int j, int dir, DenseVector &v, T &beta) -{ - int N = input.size(); - T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); - - if(abs(m) > 0.0){ - T alpha = 0; - - for(int i=k; i 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; - else v[dir] = -alpha; - }else{ - for(int i=k; i inline void Householder_mult(DenseMatrix &A , DenseVector v, T beta, int l, int k, int j, int trans) -{ - int N ; SizeSquare(A,N); - - if(abs(beta) > 0.0){ - for(int p=l; p inline void Householder_mult_tri(DenseMatrix &A , DenseVector v, T beta, int l, int M, int k, int j, int trans) -{ - if(abs(beta) > 0.0){ - - int N ; SizeSquare(A,N); - - DenseMatrix tmp; Resize(tmp,N,N); Fill(tmp,0); - - T s; - for(int p=l; p +template using DenseVector = std::vector; + +//#include #include namespace Grid { @@ -47,104 +49,85 @@ namespace Grid { ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// - - template - class ImplicitlyRestartedLanczos { +class ImplicitlyRestartedLanczos { - const RealD small = 1.0e-16; public: - int lock; - int get; - int Niter; - int converged; + int Niter; // Max iterations + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + int Nm; // Nm -- total number of vectors - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Np; // Np -- Number of spare vecs in kryloc space - int Nm; // Nm -- total number of vectors + RealD eresid; - RealD eresid; + //////////////////////////////////// + // Embedded objects + //////////////////////////////////// + SortEigen _sort; + LinearOperatorBase &_Linop; + OperatorFunction &_poly; - SortEigen _sort; + ///////////////////////// + // Constructor + ///////////////////////// + ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op + OperatorFunction & poly, // polynmial + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // total vecs + RealD _eresid, // resid in lmdue deficit + int _Niter) : // Max iterations + _Linop(Linop), _poly(poly), + Nstop(_Nstop), Nk(_Nk), Nm(_Nm), + eresid(_eresid), Niter(_Niter) { }; -// GridCartesian &_fgrid; - - LinearOperatorBase &_Linop; - - OperatorFunction &_poly; - - ///////////////////////// - // Constructor - ///////////////////////// - void init(void){}; - void Abort(int ff, DenseVector &evals, DenseVector > &evecs); - - ImplicitlyRestartedLanczos( - LinearOperatorBase &Linop, // op - OperatorFunction & poly, // polynmial - int _Nstop, // sought vecs - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - int _Niter) : // Max iterations - _Linop(Linop), - _poly(poly), - Nstop(_Nstop), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - Niter(_Niter) - { - Np = Nm-Nk; assert(Np>0); - }; - - ImplicitlyRestartedLanczos( - LinearOperatorBase &Linop, // op +#if 0 + ImplicitlyRestartedLanczos(LinearOperatorBase &Linop, // op OperatorFunction & poly, // polynmial int _Nk, // sought vecs - int _Nm, // spare vecs + int _Nm, // total vecs RealD _eresid, // resid in lmdue deficit int _Niter) : // Max iterations - _Linop(Linop), - _poly(poly), - Nstop(_Nk), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - Niter(_Niter) - { - Np = Nm-Nk; assert(Np>0); - }; + _Linop(Linop), _poly(poly), + Nstop(_Nk), Nk(_Nk), Nm(_Nm), + eresid(_eresid), Niter(_Niter) { }; +#endif - ///////////////////////// - // Sanity checked this routine (step) against Saad. - ///////////////////////// - void RitzMatrix(DenseVector& evec,int k){ +#if 0 + void calc(DenseVector& eval, + DenseVector& evec, + const Field& src, + int& Nconv); - if(1) return; + void step(DenseVector& lmd, + DenseVector& lme, + DenseVector& evec, + Field& w,int Nm,int k); - GridBase *grid = evec[0]._grid; - Field w(grid); - std::cout << "RitzMatrix "<1 ) { - if (abs(in) >1.0e-9 ) { - std::cout<<"oops"< &Qt) ; + + static RealD normalise(Field& v) ; + void orthogonalize(Field& w, DenseVector& evec, int k); + void diagonalize(DenseVector& lmd, + DenseVector& lme, + int N2, int N1, + DenseVector& Qt, + GridBase *grid); + + void qr_decomp(DenseVector& lmd, + DenseVector& lme, + int Nk, int Nm, + DenseVector& Qt, + RealD Dsh, int kmin, int kmax); + +#ifdef USE_LAPACK + void diagonalize_lapack(DenseVector& lmd, + DenseVector& lme, + int N1, int N2, + DenseVector& Qt, + GridBase *grid); +#endif +#endif /* Saad PP. 195 1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 @@ -161,12 +144,12 @@ public: DenseVector& evec, Field& w,int Nm,int k) { + const RealD tiny = 1.0e-20; assert( k< Nm ); _poly(_Linop,evec[k],w); // 3. wk:=Avk−βkv_{k−1} - if(k>0){ - w -= lme[k-1] * evec[k-1]; - } + + if(k>0) w -= lme[k-1] * evec[k-1]; ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) RealD alph = real(zalph); @@ -176,29 +159,20 @@ public: RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop // 7. vk+1 := wk/βk+1 -// std::cout << "alpha = " << zalph << " beta "<0) { - orthogonalize(w,evec,k); // orthonormalise - } - - if(k < Nm-1) evec[k+1] = w; + if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise + if ( k < Nm-1) evec[k+1] = w; + + if ( beta < tiny ) std::cout << " beta is tiny "<& lmd, - DenseVector& lme, - int Nk, - int Nm, - DenseVector& Qt, - RealD Dsh, - int kmin, - int kmax) + + void qr_decomp(DenseVector& lmd, // Nm + DenseVector& lme, // Nm + int Nk, int Nm, + DenseVector& Qt, // Nm x Nm matrix + RealD Dsh, int kmin, int kmax) { int k = kmin-1; RealD x; @@ -218,7 +192,7 @@ public: lme[k+1] = c*lme[k+1]; for(int i=0; i& lmd, - DenseVector& lme, - int N1, - int N2, - DenseVector& Qt, - GridBase *grid){ - const int size = Nm; -// tevals.resize(size); -// tevecs.resize(size); - int NN = N1; - double evals_tmp[NN]; - double evec_tmp[NN][NN]; - memset(evec_tmp[0],0,sizeof(double)*NN*NN); -// double AA[NN][NN]; - double DD[NN]; - double EE[NN]; - for (int i = 0; i< NN; i++) - for (int j = i - 1; j <= i + 1; j++) - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; + DenseVector& lme, + int N1, + int N2, + DenseVector& Qt, + GridBase *grid) + { + const int size = Nm; + int NN = N1; + double evals_tmp[NN]; + double evec_tmp[NN][NN]; + memset(evec_tmp[0],0,sizeof(double)*NN*NN); + double DD[NN]; + double EE[NN]; + for (int i = 0; i< NN; i++) { + for (int j = i - 1; j <= i + 1; j++) { + if ( j < NN && j >= 0 ) { + if (i==j) DD[i] = lmd[i]; + if (i==j) evals_tmp[i] = lmd[i]; + if (j==(i-1)) EE[j] = lme[j]; + } + } } - int evals_found; - int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - int liwork = 3+NN*10 ; - int iwork[liwork]; - double work[lwork]; - int isuppz[2*NN]; - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - int ifail[NN]; - int info; -// int total = QMP_get_number_of_nodes(); -// int node = QMP_get_node_number(); -// GridBase *grid = evec[0]._grid; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - int il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(evals_tmp,0,sizeof(double)*NN); - if ( il <= NN){ - printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); - LAPACK_dstegr(&jobz, &range, &NN, - (double*)DD, (double*)EE, - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, evals_tmp, (double*)evec_tmp, &NN, - isuppz, - work, &lwork, iwork, &liwork, - &info); - for (int i = iu-1; i>= il-1; i--){ - printf("node=%d evals_found=%d evals_tmp[%d] = %g\n",node,evals_found, i - (il-1),evals_tmp[i - (il-1)]); - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; - if (il>1) evec_tmp[i-(il-1)][j]=0.; - } - } + int evals_found; + int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; + int liwork = 3+NN*10 ; + int iwork[liwork]; + double work[lwork]; + int isuppz[2*NN]; + char jobz = 'V'; // calculate evals & evecs + char range = 'I'; // calculate all evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + int ifail[NN]; + int info; + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + int il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + double tol = 0.0; + if (1) { + memset(evals_tmp,0,sizeof(double)*NN); + if ( il <= NN){ + LAPACK_dstegr(&jobz, &range, &NN, + (double*)DD, (double*)EE, + &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, evals_tmp, (double*)evec_tmp, &NN, + isuppz, + work, &lwork, iwork, &liwork, + &info); + for (int i = iu-1; i>= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; + if (il>1) evec_tmp[i-(il-1)][j]=0.; + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,NN*NN); + } + } + // cheating a bit. + // It is better to sort instead of just reversing it, + // but the document of the routine says evals are sorted in increasing order. + // qr gives evals in decreasing order. + for(int i=0;iGlobalSumVector(evals_tmp,NN); - grid->GlobalSumVector((double*)evec_tmp,NN*NN); - } - } -// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. - for(int i=0;i& lmd, DenseVector& lme, int N2, @@ -354,24 +324,23 @@ public: if(!check_lapack) return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid); - DenseVector lmd2(N1); - DenseVector lme2(N1); - DenseVector Qt2(N1*N1); - for(int k=0; k lmd2(N1); + DenseVector lme2(N1); + DenseVector Qt2(N1*N1); + for(int k=0; k lmd3(N2); - for(int k=0; k lmd3(N2); + for(int k=0; kSMALL) std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <SMALL) std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <SMALL) std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] < &Qt) { for(int i=0; i & bq, - Field &bf, - DenseMatrix &H){ - - GridBase *grid = bq[0]._grid; - - RealD beta; - RealD sqbt; - RealD alpha; - - for(int i=start;i 1) std::cout << "orthagonality refined " << re << " times" < evals, - DenseVector evecs){ - int N= evals.size(); - _sort.push(evals,evecs, evals.size(),N); - } - - void ImplicitRestart(int TM, DenseVector &evals, DenseVector > &evecs, DenseVector &bq, Field &bf, int cont) - { - std::cout << "ImplicitRestart begin. Eigensort starting\n"; - - DenseMatrix H; Resize(H,Nm,Nm); - - EigenSort(evals, evecs); - - ///Assign shifts - int K=Nk; - int M=Nm; - int P=Np; - int converged=0; - if(K - converged < 4) P = (M - K-1); //one - // DenseVector shifts(P + shift_extra.size()); - DenseVector shifts(P); - for(int k = 0; k < P; ++k) - shifts[k] = evals[k]; - - /// Shift to form a new H and q - DenseMatrix Q; Resize(Q,TM,TM); - Unity(Q); - Shift(Q, shifts); // H is implicitly passed in in Rudy's Shift routine - - int ff = K; - - /// Shifted H defines a new K step Arnoldi factorization - RealD beta = H[ff][ff-1]; - RealD sig = Q[TM - 1][ff - 1]; - std::cout << "beta = " << beta << " sig = " << real(sig) < q Q - times_real(bq, Q, TM); - - std::cout << norm2(bq[0]) << " -- after " << ff < &bq, Field &bf, DenseVector > & evecs,DenseVector &evals) - { - init(); - - int M=Nm; - - DenseMatrix H; Resize(H,Nm,Nm); - Resize(evals,Nm); - Resize(evecs,Nm); - - int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with - - if(ff < M) { - std::cout << "Krylov: aborting ff "< " << it << std::endl; - int lock_num = lock ? converged : 0; - DenseVector tevals(M - lock_num ); - DenseMatrix tevecs; Resize(tevecs,M - lock_num,M - lock_num); - - //check residual of polynominal - TestConv(H,M, tevals, tevecs); - - if(converged >= Nk) - break; - - ImplicitRestart(ff, tevals,tevecs,H); - } - Wilkinson(H, evals, evecs, small); - // Check(); - - std::cout << "Done "< & H,DenseMatrix &Q, DenseVector shifts) { - - int P; Size(shifts,P); - int M; SizeSquare(Q,M); - - Unity(Q); - - int lock_num = lock ? converged : 0; - - RealD t_Househoulder_vector(0.0); - RealD t_Househoulder_mult(0.0); - - for(int i=0;i ck(3), v(3); - - x = H[lock_num+0][lock_num+0]-shifts[i]; - y = H[lock_num+1][lock_num+0]; - ck[0] = x; ck[1] = y; ck[2] = 0; - - normalise(ck); ///Normalization cancels in PHP anyway - RealD beta; - - Householder_vector(ck, 0, 2, v, beta); - Householder_mult(H,v,beta,0,lock_num+0,lock_num+2,0); - Householder_mult(H,v,beta,0,lock_num+0,lock_num+2,1); - ///Accumulate eigenvector - Householder_mult(Q,v,beta,0,lock_num+0,lock_num+2,1); - - int sw = 0; - for(int k=lock_num+0;k(ck, 0, 2-sw, v, beta); - Householder_mult(H,v, beta,0,k+1,k+3-sw,0); - Householder_mult(H,v, beta,0,k+1,k+3-sw,1); - ///Accumulate eigenvector - Householder_mult(Q,v, beta,0,k+1,k+3-sw,1); - } - } - } - - void TestConv(DenseMatrix & H,int SS, - DenseVector &bq, Field &bf, - DenseVector &tevals, DenseVector > &tevecs, - int lock, int converged) - { - std::cout << "Converged " << converged << " so far." << std::endl; - int lock_num = lock ? converged : 0; - int M = Nm; - - ///Active Factorization - DenseMatrix AH; Resize(AH,SS - lock_num,SS - lock_num ); - - AH = GetSubMtx(H,lock_num, SS, lock_num, SS); - - int NN=tevals.size(); - int AHsize=SS-lock_num; - - RealD small=1.0e-16; - Wilkinson(AH, tevals, tevecs, small); - - EigenSort(tevals, tevecs); - - RealD resid_nrm= norm2(bf); - - if(!lock) converged = 0; -#if 0 - for(int i = SS - lock_num - 1; i >= SS - Nk && i >= 0; --i){ - - RealD diff = 0; - diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm; - - std::cout << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl; - - if(diff < converged) { - - if(lock) { - - DenseMatrix Q; Resize(Q,M,M); - bool herm = true; - - Lock(H, Q, tevals[i], converged, small, SS, herm); - - times_real(bq, Q, bq.size()); - bf = Q[M - 1][M - 1]* bf; - lock_num++; - } - converged++; - std::cout << " converged on eval " << converged << " of " << Nk << std::endl; - } else { - break; - } - } -#endif - std::cout << "Got " << converged << " so far " < &evals, - DenseVector > &evecs) { - - DenseVector goodval(this->get); - - EigenSort(evals,evecs); - - int NM = Nm; - - DenseVector< DenseVector > V; Size(V,NM); - DenseVector QZ(NM*NM); - - for(int i = 0; i < NM; i++){ - for(int j = 0; j < NM; j++){ - // evecs[i][j]; - } - } - } - - -/** - There is some matrix Q such that for any vector y - Q.e_1 = y and Q is unitary. -**/ - template - static T orthQ(DenseMatrix &Q, DenseVector y){ - int N = y.size(); //Matrix Size - Fill(Q,0.0); - T tau; - for(int i=0;i 0.0){ - - T gam = conj( (y[j]/tau)/tau0 ); - for(int k=0;k<=j-1;k++){ - Q[k][j]=-gam*y[k]; - } - Q[j][j]=tau0/tau; - } else { - Q[j-1][j]=1.0; - } - tau0 = tau; - } - return tau; - } - -/** - There is some matrix Q such that for any vector y - Q.e_k = y and Q is unitary. -**/ - template< class T> - static T orthU(DenseMatrix &Q, DenseVector y){ - T tau = orthQ(Q,y); - SL(Q); - return tau; - } - - -/** - Wind up with a matrix with the first con rows untouched - -say con = 2 - Q is such that Qdag H Q has {x, x, val, 0, 0, 0, 0, ...} as 1st colum - and the matrix is upper hessenberg - and with f and Q appropriately modidied with Q is the arnoldi factorization - -**/ - -template -static void Lock(DenseMatrix &H, // Hess mtx - DenseMatrix &Q, // Lock Transform - T val, // value to be locked - int con, // number already locked - RealD small, - int dfg, - bool herm) -{ - //ForceTridiagonal(H); - - int M = H.dim; - DenseVector vec; Resize(vec,M-con); - - DenseMatrix AH; Resize(AH,M-con,M-con); - AH = GetSubMtx(H,con, M, con, M); - - DenseMatrix QQ; Resize(QQ,M-con,M-con); - - Unity(Q); Unity(QQ); - - DenseVector evals; Resize(evals,M-con); - DenseMatrix evecs; Resize(evecs,M-con,M-con); - - Wilkinson(AH, evals, evecs, small); - - int k=0; - RealD cold = abs( val - evals[k]); - for(int i=1;icon+2; j--){ - - DenseMatrix U; Resize(U,j-1-con,j-1-con); - DenseVector z; Resize(z,j-1-con); - T nm = norm(z); - for(int k = con+0;k Hb; Resize(Hb,j-1-con,M); - - for(int a = 0;a Qb; Resize(Qb,M,M); - - for(int a = 0;a Hc; Resize(Hc,M,M); - - for(int a = 0;a { FieldMetaData header; IldgReader _IldgReader; _IldgReader.open(config); - _IldgReader.readConfiguration(config,U,header); // format from the header + _IldgReader.readConfiguration(U,header); // format from the header _IldgReader.close(); std::cout << GridLogMessage << "Read ILDG Configuration from " << config diff --git a/tests/solver/Test_dwf_lanczos.cc b/tests/solver/Test_dwf_lanczos.cc index bb978186..48cca378 100644 --- a/tests/solver/Test_dwf_lanczos.cc +++ b/tests/solver/Test_dwf_lanczos.cc @@ -54,7 +54,7 @@ int main (int argc, char ** argv) GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); LatticeGaugeField Umu(UGrid); - SU3::TepidConfiguration(RNG4, Umu); + SU3::HotConfiguration(RNG4, Umu); std::vector U(4,UGrid); for(int mu=0;mu