From cb9ff20249d90f528ba1b2609f4cbe3e62b1f437 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 13 Oct 2017 11:30:50 +0100 Subject: [PATCH 01/45] Approx tests and lanczos improvement --- lib/algorithms/approx/Chebyshev.h | 6 +- .../BlockImplicitlyRestartedLanczos.h | 1399 +++++++++-------- .../FieldBasisVector.h | 5 +- .../iterative/ImplicitlyRestartedLanczos.h | 3 +- lib/log/Log.cc | 10 +- lib/log/Log.h | 13 +- lib/threads/Threads.h | 2 + tests/debug/Test_cheby.cc | 36 +- tests/hmc/Test_remez.cc | 61 +- 9 files changed, 823 insertions(+), 712 deletions(-) diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index f8c21a05..5088c51b 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -83,8 +83,10 @@ namespace Grid { public: void csv(std::ostream &out){ - RealD diff = hi-lo; - for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) { + RealD diff = hi-lo; + RealD delta = (hi-lo)*1.0e-9; + for (RealD x=lo; x<hi; x+=delta) { + delta*=1.1; RealD f = approx(x); out<< x<<" "<<f<<std::endl; } diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h index 55a85552..90d45193 100644 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h @@ -32,8 +32,7 @@ Author: Christoph Lehner <clehner@bnl.gov> #define GRID_BIRL_H #include <string.h> //memset - -#include <zlib.h> +//#include <zlib.h> #include <sys/stat.h> #include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h> @@ -42,420 +41,185 @@ Author: Christoph Lehner <clehner@bnl.gov> namespace Grid { +template<class Field> +void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k) +{ + for(int j=0; j<k; ++j){ + auto ip = innerProduct(basis[j],w); + w = w - ip*basis[j]; + } +} + +template<class Field> +void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) +{ + typedef typename Field::vector_object vobj; + GridBase* grid = basis[0]._grid; + + parallel_region + { + std::vector < vobj > B(Nm); // Thread private + + parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ + for(int j=j0; j<j1; ++j) B[j]=0.; + + for(int j=j0; j<j1; ++j){ + for(int k=k0; k<k1; ++k){ + B[j] +=Qt(j,k) * basis[k]._odata[ss]; + } + } + for(int j=j0; j<j1; ++j){ + basis[j]._odata[ss] = B[j]; + } + } + } +} + +template<class Field> +void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx) +{ + int vlen = idx.size(); + + assert(vlen>=1); + assert(vlen<=sort_vals.size()); + assert(vlen<=_v.size()); + + for (size_t i=0;i<vlen;i++) { + + if (idx[i] != i) { + + assert(idx[i] > i); + ////////////////////////////////////// + // idx[i] is a table of desired sources giving a permutation. + // + // Swap v[i] with v[idx[i]]. + // + // Find j>i for which _vnew[j] = _vold[i], + // track the move idx[j] => idx[i] + // track the move idx[i] => i + ////////////////////////////////////// + size_t j; + for (j=i;j<idx.size();j++) + if (idx[j]==i) + break; + + assert(j!=idx.size()); + assert(idx[j]==i); + + std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy + std::swap(sort_vals[i],sort_vals[idx[i]]); + + idx[j] = idx[i]; + idx[i] = i; + } + } +} + +std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals) +{ + std::vector<int> idx(sort_vals.size()); + std::iota(idx.begin(), idx.end(), 0); + + // sort indexes based on comparing values in v + std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { + return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); + }); + return idx; +} + +template<class Field> +void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse) +{ + std::vector<int> idx = basisSortGetIndex(sort_vals); + if (reverse) + std::reverse(idx.begin(), idx.end()); + + basisReorderInPlace(_v,sort_vals,idx); +} + +// PAB: faster to compute the inner products first then fuse loops. +// If performance critical can improve. +template<class Field> +void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) { + result = zero; + assert(_v.size()==eval.size()); + int N = (int)_v.size(); + for (int i=0;i<N;i++) { + Field& tmp = _v[i]; + axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result); + } +} + + /* enum IRLdiagonalisation { + IRLdiagonaliseWithDSTEGR, + IRLdiagonaliseWithQR, + IRLdiagonaliseWithEigen + };*/ + ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// - template<class Field> - class BlockImplicitlyRestartedLanczos { - - const RealD small = 1.0e-16; +template<class Field> +class BlockImplicitlyRestartedLanczos { + private: + const RealD small = 1.0e-8; + int MaxIter; + int MinRestart; // Minimum number of restarts; only check for convergence after + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + // int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk + int Nm; // Nm -- total number of vectors + IRLdiagonalisation diagonalisation; + int orth_period; + + RealD OrthoTime; + RealD eresid, betastp; + //////////////////////////////// + // Embedded objects + //////////////////////////////// + SortEigen<Field> _sort; + LinearFunction<Field> &_HermOp; + LinearFunction<Field> &_HermOpTest; + ///////////////////////// + // Constructor + ///////////////////////// public: - int lock; - int get; - int Niter; - int converged; + BlockImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, + LinearFunction<Field> & HermOpTest, + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + RealD _betastp, // if beta(k) < betastp: converged + int _MaxIter, // Max iterations + int _MinRestart, int _orth_period = 1, + IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : + _HermOp(HermOp), _HermOpTest(HermOpTest), + Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), + eresid(_eresid), betastp(_betastp), + MaxIter(_MaxIter) , MinRestart(_MinRestart), + orth_period(_orth_period), diagonalisation(_diagonalisation) { }; - int Nminres; // Minimum number of restarts; only check for convergence after - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Np; // Np -- Number of spare vecs in kryloc space - int Nm; // Nm -- total number of vectors + //////////////////////////////// + // Helpers + //////////////////////////////// + template<typename T> static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } - int orth_period; - - RealD OrthoTime; - - RealD eresid, betastp; - SortEigen<Field> _sort; - LinearFunction<Field> &_HermOp; - LinearFunction<Field> &_HermOpTest; - ///////////////////////// - // Constructor - ///////////////////////// - - BlockImplicitlyRestartedLanczos( - LinearFunction<Field> & HermOp, - LinearFunction<Field> & HermOpTest, - int _Nstop, // sought vecs - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - RealD _betastp, // if beta(k) < betastp: converged - int _Niter, // Max iterations - int _Nminres, int _orth_period = 1) : - _HermOp(HermOp), - _HermOpTest(HermOpTest), - Nstop(_Nstop), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - betastp(_betastp), - Niter(_Niter), - Nminres(_Nminres), - orth_period(_orth_period) - { - Np = Nm-Nk; assert(Np>0); - }; - - BlockImplicitlyRestartedLanczos( - LinearFunction<Field> & HermOp, - LinearFunction<Field> & HermOpTest, - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - RealD _betastp, // if beta(k) < betastp: converged - int _Niter, // Max iterations - int _Nminres, - int _orth_period = 1) : - _HermOp(HermOp), - _HermOpTest(HermOpTest), - Nstop(_Nk), - Nk(_Nk), - Nm(_Nm), - eresid(_eresid), - betastp(_betastp), - Niter(_Niter), - Nminres(_Nminres), - orth_period(_orth_period) - { - Np = Nm-Nk; assert(Np>0); - }; - - -/* Saad PP. 195 -1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 -2. For k = 1,2,...,m Do: -3. wk:=Avk−βkv_{k−1} -4. αk:=(wk,vk) // -5. wk:=wk−αkvk // wk orthog vk -6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop -7. vk+1 := wk/βk+1 -8. EndDo - */ - void step(std::vector<RealD>& lmd, - std::vector<RealD>& lme, - BasisFieldVector<Field>& evec, - Field& w,int Nm,int k) - { - assert( k< Nm ); - - GridStopWatch gsw_op,gsw_o; - - Field& evec_k = evec[k]; - - gsw_op.Start(); - _HermOp(evec_k,w); - gsw_op.Stop(); - - if(k>0){ - w -= lme[k-1] * evec[k-1]; - } - - ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) - RealD alph = real(zalph); - - w = w - alph * evec_k;// 5. wk:=wk−αkvk - - RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop - // 7. vk+1 := wk/βk+1 - - std::cout<<GridLogMessage << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl; - const RealD tiny = 1.0e-20; - if ( beta < tiny ) { - std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl; - } - lmd[k] = alph; - lme[k] = beta; - - gsw_o.Start(); - if (k>0 && k % orth_period == 0) { - orthogonalize(w,evec,k); // orthonormalise - } - gsw_o.Stop(); - - if(k < Nm-1) { - evec[k+1] = w; - } - - std::cout << GridLogMessage << "Timing: operator=" << gsw_op.Elapsed() << - " orth=" << gsw_o.Elapsed() << std::endl; - - } - - void qr_decomp(std::vector<RealD>& lmd, - std::vector<RealD>& lme, - int Nk, - int Nm, - std::vector<RealD>& Qt, - RealD Dsh, - int kmin, - int kmax) - { - int k = kmin-1; - RealD x; - - RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); - RealD c = ( lmd[k] -Dsh) *Fden; - RealD s = -lme[k] *Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; - - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - x =-s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - - for(int i=0; i<Nk; ++i){ - RealD Qtmp1 = Qt[i+Nm*k ]; - RealD Qtmp2 = Qt[i+Nm*(k+1)]; - Qt[i+Nm*k ] = c*Qtmp1 - s*Qtmp2; - Qt[i+Nm*(k+1)] = s*Qtmp1 + c*Qtmp2; - } - - // Givens transformations - for(int k = kmin; k < kmax-1; ++k){ - - RealD Fden = 1.0/hypot(x,lme[k-1]); - RealD c = lme[k-1]*Fden; - RealD s = - x*Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; - - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - lme[k-1] = c*lme[k-1] -s*x; - - if(k != kmax-2){ - x = -s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - } - - for(int i=0; i<Nk; ++i){ - RealD Qtmp1 = Qt[i+Nm*k ]; - RealD Qtmp2 = Qt[i+Nm*(k+1)]; - Qt[i+Nm*k ] = c*Qtmp1 -s*Qtmp2; - Qt[i+Nm*(k+1)] = s*Qtmp1 +c*Qtmp2; - } - } - } - -#ifdef USE_LAPACK_IRL -#define LAPACK_INT int -//long long - void diagonalize_lapack(std::vector<RealD>& lmd, - std::vector<RealD>& lme, - int N1, - int N2, - std::vector<RealD>& Qt, - GridBase *grid){ - - std::cout << GridLogMessage << "diagonalize_lapack start\n"; - GridStopWatch gsw; - - const int size = Nm; - // tevals.resize(size); - // tevecs.resize(size); - LAPACK_INT NN = N1; - std::vector<double> evals_tmp(NN); - std::vector<double> evec_tmp(NN*NN); - memset(&evec_tmp[0],0,sizeof(double)*NN*NN); - // double AA[NN][NN]; - std::vector<double> DD(NN); - std::vector<double> EE(NN); - for (int i = 0; i< NN; i++) - for (int j = i - 1; j <= i + 1; j++) - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; - } - LAPACK_INT evals_found; - LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - LAPACK_INT liwork = 3+NN*10 ; - std::vector<LAPACK_INT> iwork(liwork); - std::vector<double> work(lwork); - std::vector<LAPACK_INT> isuppz(2*NN); - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - std::vector<int> ifail(NN); - LAPACK_INT info; - // int total = QMP_get_number_of_nodes(); - // int node = QMP_get_node_number(); - // GridBase *grid = evec[0]._grid; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - LAPACK_INT il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(&evals_tmp[0],0,sizeof(double)*NN); - if ( il <= NN){ - std::cout << GridLogMessage << "dstegr started" << std::endl; - gsw.Start(); - dstegr(&jobz, &range, &NN, - (double*)&DD[0], (double*)&EE[0], - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, &evals_tmp[0], (double*)&evec_tmp[0], &NN, - &isuppz[0], - &work[0], &lwork, &iwork[0], &liwork, - &info); - gsw.Stop(); - std::cout << GridLogMessage << "dstegr completed in " << gsw.Elapsed() << std::endl; - for (int i = iu-1; i>= il-1; i--){ - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i*NN + j] = evec_tmp[(i - (il-1)) * NN + j]; - if (il>1) evec_tmp[(i-(il-1)) * NN + j]=0.; - } - } - } - { - // QMP_sum_double_array(evals_tmp,NN); - // QMP_sum_double_array((double *)evec_tmp,NN*NN); - grid->GlobalSumVector(&evals_tmp[0],NN); - grid->GlobalSumVector(&evec_tmp[0],NN*NN); - } - } - // cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order. - for(int i=0;i<NN;i++){ - for(int j=0;j<NN;j++) - Qt[(NN-1-i)*N2+j]=evec_tmp[i*NN + j]; - lmd [NN-1-i]=evals_tmp[i]; - } - - std::cout << GridLogMessage << "diagonalize_lapack complete\n"; - } -#undef LAPACK_INT -#endif - - - void diagonalize(std::vector<RealD>& lmd, - std::vector<RealD>& lme, - int N2, - int N1, - std::vector<RealD>& Qt, - GridBase *grid) - { - -#ifdef USE_LAPACK_IRL - const int check_lapack=0; // just use lapack if 0, check against lapack if 1 - - if(!check_lapack) - return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid); - - std::vector <RealD> lmd2(N1); - std::vector <RealD> lme2(N1); - std::vector<RealD> Qt2(N1*N1); - for(int k=0; k<N1; ++k){ - lmd2[k] = lmd[k]; - lme2[k] = lme[k]; - } - for(int k=0; k<N1*N1; ++k) - Qt2[k] = Qt[k]; - -// diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid); -#endif - - int Niter = 10000*N1; - int kmin = 1; - int kmax = N2; - // (this should be more sophisticated) - - for(int iter=0; ; ++iter){ - if ( (iter+1)%(100*N1)==0) - std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n"; - - // determination of 2x2 leading submatrix - RealD dsub = lmd[kmax-1]-lmd[kmax-2]; - RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); - RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub))); - // (Dsh: shift) - - // transformation - qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax); - - // Convergence criterion (redef of kmin and kamx) - for(int j=kmax-1; j>= kmin; --j){ - RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); - if(fabs(lme[j-1])+dds > dds){ - kmax = j+1; - goto continued; - } - } - Niter = iter; -#ifdef USE_LAPACK_IRL - if(check_lapack){ - const double SMALL=1e-8; - diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid); - std::vector <RealD> lmd3(N2); - for(int k=0; k<N2; ++k) lmd3[k]=lmd[k]; - _sort.push(lmd3,N2); - _sort.push(lmd2,N2); - for(int k=0; k<N2; ++k){ - if (fabs(lmd2[k] - lmd3[k]) >SMALL) std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl; -// if (fabs(lme2[k] - lme[k]) >SMALL) std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl; - } - for(int k=0; k<N1*N1; ++k){ -// if (fabs(Qt2[k] - Qt[k]) >SMALL) std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl; - } - } -#endif - return; - - continued: - for(int j=0; j<kmax-1; ++j){ - RealD dds = fabs(lmd[j])+fabs(lmd[j+1]); - if(fabs(lme[j])+dds > dds){ - kmin = j+1; - break; - } - } - } - std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n"; - abort(); - } - -#if 1 - template<typename T> - static RealD normalise(T& v) - { - RealD nn = norm2(v); - nn = sqrt(nn); - v = v * (1.0/nn); - return nn; - } - - void orthogonalize(Field& w, - BasisFieldVector<Field>& evec, - int k) - { - double t0=-usecond()/1e6; - - evec.orthogonalize(w,k); - - normalise(w); - t0+=usecond()/1e6; - OrthoTime +=t0; - } - - void setUnit_Qt(int Nm, std::vector<RealD> &Qt) { - for(int i=0; i<Qt.size(); ++i) Qt[i] = 0.0; - for(int k=0; k<Nm; ++k) Qt[k + k*Nm] = 1.0; - } + void orthogonalize(Field& w, BasisFieldVector<Field>& evec,int k) + { + OrthoTime-=usecond()/1e6; + //evec.orthogonalize(w,k); + basisOrthogonalize(evec._v,w,k); + normalise(w); + OrthoTime+=usecond()/1e6; + } /* Rudy Arthur's thesis pp.137 ------------------------ @@ -474,280 +238,555 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - - void calc(std::vector<RealD>& eval, - BasisFieldVector<Field>& evec, - const Field& src, - int& Nconv, - bool reverse, - int SkipTest) - { - - GridBase *grid = evec._v[0]._grid;//evec.get(0 + evec_offset)._grid; - assert(grid == src._grid); - - std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl; - std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl; - std::cout<<GridLogMessage << " -- size of eval = " << eval.size() << std::endl; - std::cout<<GridLogMessage << " -- size of evec = " << evec.size() << std::endl; + void calc(std::vector<RealD>& eval, BasisFieldVector<Field>& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) + { + GridBase *grid = src._grid; + assert(grid == evec[0]._grid); + + GridLogIRL.TimingMode(1); + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; + std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; + std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl; + std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl; + std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl; + if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { + std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl; + } else if ( diagonalisation == IRLdiagonaliseWithQR ) { + std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl; + } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { + std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl; + } + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - assert(Nm <= evec.size() && Nm <= eval.size()); - - // quickly get an idea of the largest eigenvalue to more properly normalize the residuum - RealD evalMaxApprox = 0.0; - { - auto src_n = src; - auto tmp = src; - const int _MAX_ITER_IRL_MEVAPP_ = 50; - for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) { - _HermOpTest(src_n,tmp); - RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. - RealD vden = norm2(src_n); - RealD na = vnum/vden; - if (fabs(evalMaxApprox/na - 1.0) < 0.05) - i=_MAX_ITER_IRL_MEVAPP_; - evalMaxApprox = na; - std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl; - src_n = tmp; - } - } - - std::vector<RealD> lme(Nm); - std::vector<RealD> lme2(Nm); - std::vector<RealD> eval2(Nm); - std::vector<RealD> eval2_copy(Nm); - std::vector<RealD> Qt(Nm*Nm); - - - Field f(grid); - Field v(grid); - - int k1 = 1; - int k2 = Nk; - - Nconv = 0; - - RealD beta_k; - - // Set initial vector - evec[0] = src; - normalise(evec[0]); - std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0])<<std::endl; - - // Initial Nk steps - OrthoTime=0.; - double t0=usecond()/1e6; - for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k); - double t1=usecond()/1e6; - std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1; - std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; - t1=usecond()/1e6; - - // Restarting loop begins - for(int iter = 0; iter<Niter; ++iter){ - - std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl; - - // - // Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs. - // We loop over - // - OrthoTime=0.; - for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k); - t1=usecond()/1e6; - std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1; - std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; - f *= lme[Nm-1]; - - t1=usecond()/1e6; - - - // getting eigenvalues - for(int k=0; k<Nm; ++k){ - eval2[k] = eval[k+k1-1]; - lme2[k] = lme[k+k1-1]; - } - setUnit_Qt(Nm,Qt); - diagonalize(eval2,lme2,Nm,Nm,Qt,grid); - t1=usecond()/1e6; - std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1; - - // sorting - eval2_copy = eval2; - - _sort.push(eval2,Nm); - t1=usecond()/1e6; - std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1; - - // Implicitly shifted QR transformations - setUnit_Qt(Nm,Qt); - for(int ip=0; ip<k2; ++ip){ - std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl; - } - - for(int ip=k2; ip<Nm; ++ip){ - std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl; - qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); - - } - t1=usecond()/1e6; - std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1; - assert(k2<Nm); - - - assert(k2<Nm); - assert(k1>0); - evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm); - - t1=usecond()/1e6; - std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1; - fflush(stdout); - - // Compressed vector f and beta(k2) - f *= Qt[Nm-1+Nm*(k2-1)]; - f += lme[k2-1] * evec[k2]; - beta_k = norm2(f); - beta_k = sqrt(beta_k); - std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl; - - RealD betar = 1.0/beta_k; - evec[k2] = betar * f; - lme[k2-1] = beta_k; - - // Convergence test - for(int k=0; k<Nm; ++k){ - eval2[k] = eval[k]; - lme2[k] = lme[k]; - - std::cout<<GridLogMessage << "eval2[" << k << "] = " << eval2[k] << std::endl; - } - setUnit_Qt(Nm,Qt); - diagonalize(eval2,lme2,Nk,Nm,Qt,grid); - t1=usecond()/1e6; - std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1; - - - Nconv = 0; - - if (iter >= Nminres) { - std::cout << GridLogMessage << "Rotation to test convergence " << std::endl; - - Field ev0_orig(grid); - ev0_orig = evec[0]; - - evec.rotate(Qt,0,Nk,0,Nk,Nm); - - { - std::cout << GridLogMessage << "Test convergence" << std::endl; - Field B(grid); - - for(int j = 0; j<Nk; j+=SkipTest){ - B=evec[j]; - //std::cout << "Checkerboard: " << evec[j].checkerboard << std::endl; - B.checkerboard = evec[0].checkerboard; - - _HermOpTest(B,v); - - RealD vnum = real(innerProduct(B,v)); // HermOp. - RealD vden = norm2(B); - RealD vv0 = norm2(v); - eval2[j] = vnum/vden; - v -= eval2[j]*B; - RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); - std::cout.precision(13); - std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] " - <<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j] << " (" << eval2_copy[j] << ")" - <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv - <<" "<< vnum/(sqrt(vden)*sqrt(vv0)) - << " norm(B["<<j<<"])="<< vden <<std::endl; - - // change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged - if((vv<eresid*eresid) && (j == Nconv) ){ - Nconv+=SkipTest; - } - } - - // test if we converged, if so, terminate - t1=usecond()/1e6; - std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1; - - std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl; - - if( Nconv>=Nstop || beta_k < betastp){ - goto converged; - } - - std::cout << GridLogMessage << "Rotate back" << std::endl; - //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; - { - Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); - for (int k=0;k<Nk;k++) - for (int j=0;j<Nk;j++) - qm(j,k) = Qt[k+Nm*j]; - GridStopWatch timeInv; - timeInv.Start(); - Eigen::MatrixXd qmI = qm.inverse(); - timeInv.Stop(); - std::vector<RealD> QtI(Nm*Nm); - for (int k=0;k<Nk;k++) - for (int j=0;j<Nk;j++) - QtI[k+Nm*j] = qmI(j,k); - - RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 ) - assert(res_check_rotate_inverse < 1e-7); - evec.rotate(QtI,0,Nk,0,Nk,Nm); - - axpy(ev0_orig,-1.0,evec[0],ev0_orig); - std::cout << GridLogMessage << "Rotation done (in " << timeInv.Elapsed() << " = " << timeInv.useconds() << " us" << - ", error = " << res_check_rotate_inverse << - "); | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl; - } - } - } else { - std::cout << GridLogMessage << "iter < Nminres: do not yet test for convergence\n"; - } // end of iter loop - } - - std::cout<<GridLogMessage<<"\n NOT converged.\n"; - abort(); - - converged: - - if (SkipTest == 1) { - eval = eval2; - } else { - - // test quickly - for (int j=0;j<Nstop;j+=SkipTest) { - std::cout<<GridLogMessage << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl; - } - - eval2_copy.resize(eval2.size()); - eval = eval2_copy; - } - - evec.sortInPlace(eval,reverse); - - { - - // test - for (int j=0;j<Nstop;j++) { - std::cout<<GridLogMessage << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl; - } - } - - //_sort.push(eval,evec,Nconv); - //evec.sort(eval,Nconv); - - std::cout<<GridLogMessage << "\n Converged\n Summary :\n"; - std::cout<<GridLogMessage << " -- Iterations = "<< Nconv << "\n"; - std::cout<<GridLogMessage << " -- beta(k) = "<< beta_k << "\n"; - std::cout<<GridLogMessage << " -- Nconv = "<< Nconv << "\n"; + assert(Nm <= evec.size() && Nm <= eval.size()); + + // quickly get an idea of the largest eigenvalue to more properly normalize the residuum + RealD evalMaxApprox = 0.0; + { + auto src_n = src; + auto tmp = src; + const int _MAX_ITER_IRL_MEVAPP_ = 50; + for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) { + _HermOpTest(src_n,tmp); + RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. + RealD vden = norm2(src_n); + RealD na = vnum/vden; + if (fabs(evalMaxApprox/na - 1.0) < 0.05) + i=_MAX_ITER_IRL_MEVAPP_; + evalMaxApprox = na; + std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl; + src_n = tmp; } + } + + std::vector<RealD> lme(Nm); + std::vector<RealD> lme2(Nm); + std::vector<RealD> eval2(Nm); + std::vector<RealD> eval2_copy(Nm); + Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); + + Field f(grid); + Field v(grid); + int k1 = 1; + int k2 = Nk; + RealD beta_k; + + Nconv = 0; + + // Set initial vector + evec[0] = src; + normalise(evec[0]); + + // Initial Nk steps + OrthoTime=0.; + for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k); + std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl; + std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; + + ////////////////////////////////// + // Restarting loop begins + ////////////////////////////////// + int iter; + for(iter = 0; iter<MaxIter; ++iter){ + + OrthoTime=0.; + + std::cout<< GridLogMessage <<" **********************"<< std::endl; + std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl; + std::cout<< GridLogMessage <<" **********************"<< std::endl; + + std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl; + for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k); + f *= lme[Nm-1]; + + std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl; + std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; + + ////////////////////////////////// + // getting eigenvalues + ////////////////////////////////// + for(int k=0; k<Nm; ++k){ + eval2[k] = eval[k+k1-1]; + lme2[k] = lme[k+k1-1]; + } + Qt = Eigen::MatrixXd::Identity(Nm,Nm); + diagonalize(eval2,lme2,Nm,Nm,Qt,grid); + std::cout<<GridLogIRL <<" diagonalized "<<std::endl; + + ////////////////////////////////// + // sorting + ////////////////////////////////// + eval2_copy = eval2; + + _sort.push(eval2,Nm); + + std::cout<<GridLogIRL <<" evals sorted "<<std::endl; + for(int ip=0; ip<k2; ++ip) std::cout<<GridLogIRL << "eval "<< ip << " "<< eval2[ip] << std::endl; + + ////////////////////////////////// + // Implicitly shifted QR transformations + ////////////////////////////////// + Qt = Eigen::MatrixXd::Identity(Nm,Nm); + std::cout<<GridLogIRL << "QR decompose " << std::endl; + for(int ip=k2; ip<Nm; ++ip){ + QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); + } + std::cout<<GridLogIRL <<"QR decompose done "<<std::endl; + + assert(k2<Nm); + assert(k2<Nm); + assert(k1>0); + // evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + basisRotate(evec._v,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + + std::cout<<GridLogIRL <<"QR rotation done "<<std::endl; + + //////////////////////////////////////////////////// + // Compressed vector f and beta(k2) + //////////////////////////////////////////////////// + f *= Qt(k2-1,Nm-1); + f += lme[k2-1] * evec[k2]; + beta_k = norm2(f); + beta_k = sqrt(beta_k); + std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl; + + RealD betar = 1.0/beta_k; + evec[k2] = betar * f; + lme[k2-1] = beta_k; + + //////////////////////////////////////////////////// + // Convergence test + //////////////////////////////////////////////////// + for(int k=0; k<Nm; ++k){ + eval2[k] = eval[k]; + lme2[k] = lme[k]; + // std::cout<<GridLogIRL << "eval2[" << k << "] = " << eval2[k] << std::endl; + } + Qt = Eigen::MatrixXd::Identity(Nm,Nm); + diagonalize(eval2,lme2,Nk,Nm,Qt,grid); + std::cout<<GridLogIRL <<" Diagonalized "<<std::endl; + + Nconv = 0; + if (iter >= MinRestart) { + std::cout << GridLogIRL << "Rotation to test convergence " << std::endl; + + Field ev0_orig(grid); + ev0_orig = evec[0]; + + // evec.rotate(Qt,0,Nk,0,Nk,Nm); + basisRotate(evec._v,Qt,0,Nk,0,Nk,Nm); + + { + std::cout << GridLogIRL << "Test convergence" << std::endl; + Field B(grid); + + for(int j = 0; j<Nk; j+=SkipTest){ + B=evec[j]; + + //std::cout << "Checkerboard: " << evec[j].checkerboard << std::endl; + B.checkerboard = evec[0].checkerboard; + + _HermOpTest(B,v); + + RealD vnum = real(innerProduct(B,v)); // HermOp. + RealD vden = norm2(B); + RealD vv0 = norm2(v); + eval2[j] = vnum/vden; + v -= eval2[j]*B; + RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); + std::cout.precision(13); + std::cout<<GridLogIRL << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] " + <<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j] << " (" << eval2_copy[j] << ")" + <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv + <<" "<< vnum/(sqrt(vden)*sqrt(vv0)) + <<std::endl; + + // change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged + if((vv<eresid*eresid) && (j == Nconv) ){ + Nconv+=SkipTest; + } + } + + // test if we converged, if so, terminate + std::cout<<GridLogIRL<<" #modes converged: "<<Nconv<<std::endl; + if( Nconv>=Nstop || beta_k < betastp){ + goto converged; + } + + std::cout << GridLogIRL << "Convergence testing: Rotating back" << std::endl; + //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; + { + Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk + for (int k=0;k<Nk;k++) + for (int j=0;j<Nk;j++) + qm(j,k) = Qt(j,k); + + Eigen::MatrixXd qmI = qm.inverse(); + std::cout << GridLogIRL << "Inverted ("<<Nk<<"x"<<Nk<<") matrix " << std::endl; + + + RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 ) + assert(res_check_rotate_inverse < 1e-7); + //evec.rotate(qmI,0,Nk,0,Nk,Nm); + basisRotate(evec._v,qmI,0,Nk,0,Nk,Nm); + + axpy(ev0_orig,-1.0,evec[0],ev0_orig); + std::cout << GridLogIRL << "Rotation done ; error = " << res_check_rotate_inverse << ");"<<std::endl; + std::cout << GridLogIRL << " | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl; + } + } + } else { + std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n"; + } // end of iter loop + } + + std::cout<<GridLogError<<"\n NOT converged.\n"; + abort(); + + converged: + + if (SkipTest == 1) { + eval = eval2; + } else { + // test quickly + for (int j=0;j<Nstop;j+=SkipTest) { + std::cout<<GridLogIRL << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl; + } + eval2_copy.resize(eval2.size()); + eval = eval2_copy; + } + // evec.sortInPlace(eval,reverse); + basisSortInPlace(evec._v,eval,reverse); + // test // PAB -- what does this test ? + for (int j=0;j<Nstop;j++) { + std::cout<<GridLogIRL << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl; + } + + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n"; + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL << " -- Iterations = "<< iter << "\n"; + std::cout << GridLogIRL << " -- beta(k) = "<< beta_k << "\n"; + std::cout << GridLogIRL << " -- Nconv = "<< Nconv << "\n"; + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + } + + private: +/* Saad PP. 195 +1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 +2. For k = 1,2,...,m Do: +3. wk:=Avk−βkv_{k−1} +4. αk:=(wk,vk) // +5. wk:=wk−αkvk // wk orthog vk +6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop +7. vk+1 := wk/βk+1 +8. EndDo + */ + void step(std::vector<RealD>& lmd, + std::vector<RealD>& lme, + BasisFieldVector<Field>& evec, + Field& w,int Nm,int k) + { + const RealD tiny = 1.0e-20; + assert( k< Nm ); + + GridStopWatch gsw_op,gsw_o; + + Field& evec_k = evec[k]; + + _HermOp(evec_k,w); + std::cout<<GridLogIRL << "_HermOp (poly)" <<std::endl; + + if(k>0) w -= lme[k-1] * evec[k-1]; + + ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) + RealD alph = real(zalph); + + w = w - alph * evec_k;// 5. wk:=wk−αkvk + + RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop + // 7. vk+1 := wk/βk+1 + + lmd[k] = alph; + lme[k] = beta; + + std::cout<<GridLogIRL << "linalg " <<std::endl; + + if (k>0 && k % orth_period == 0) { + orthogonalize(w,evec,k); // orthonormalise + std::cout<<GridLogIRL << "orthogonalised " <<std::endl; + } + + if(k < Nm-1) evec[k+1] = w; + + std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl; + if ( beta < tiny ) + std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; + } + + void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, // Nm x Nm + GridBase *grid) + { + Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); + + for(int i=0;i<Nk;i++) TriDiag(i,i) = lmd[i]; + for(int i=0;i<Nk-1;i++) TriDiag(i,i+1) = lme[i]; + for(int i=0;i<Nk-1;i++) TriDiag(i+1,i) = lme[i]; + + Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eigensolver(TriDiag); + + for (int i = 0; i < Nk; i++) { + lmd[Nk-1-i] = eigensolver.eigenvalues()(i); + } + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { + Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); + } + } + } + + /////////////////////////////////////////////////////////////////////////// + // File could end here if settle on Eigen ??? + /////////////////////////////////////////////////////////////////////////// + + void QR_decomp(std::vector<RealD>& lmd, // Nm + std::vector<RealD>& lme, // Nm + int Nk, int Nm, // Nk, Nm + Eigen::MatrixXd& Qt, // Nm x Nm matrix + RealD Dsh, int kmin, int kmax) + { + int k = kmin-1; + RealD x; + + RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); + RealD c = ( lmd[k] -Dsh) *Fden; + RealD s = -lme[k] *Fden; + + RealD tmpa1 = lmd[k]; + RealD tmpa2 = lmd[k+1]; + RealD tmpb = lme[k]; + + lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; + lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; + lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; + x =-s*lme[k+1]; + lme[k+1] = c*lme[k+1]; + + for(int i=0; i<Nk; ++i){ + RealD Qtmp1 = Qt(k,i); + RealD Qtmp2 = Qt(k+1,i); + Qt(k,i) = c*Qtmp1 - s*Qtmp2; + Qt(k+1,i)= s*Qtmp1 + c*Qtmp2; + } + + // Givens transformations + for(int k = kmin; k < kmax-1; ++k){ + + RealD Fden = 1.0/hypot(x,lme[k-1]); + RealD c = lme[k-1]*Fden; + RealD s = - x*Fden; + + RealD tmpa1 = lmd[k]; + RealD tmpa2 = lmd[k+1]; + RealD tmpb = lme[k]; + + lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; + lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; + lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; + lme[k-1] = c*lme[k-1] -s*x; + + if(k != kmax-2){ + x = -s*lme[k+1]; + lme[k+1] = c*lme[k+1]; + } + + for(int i=0; i<Nk; ++i){ + RealD Qtmp1 = Qt(k,i); + RealD Qtmp2 = Qt(k+1,i); + Qt(k,i) = c*Qtmp1 -s*Qtmp2; + Qt(k+1,i) = s*Qtmp1 +c*Qtmp2; + } + } + } + + void diagonalize(std::vector<RealD>& lmd, std::vector<RealD>& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) + { + Qt = Eigen::MatrixXd::Identity(Nm,Nm); + if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { + diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid); + } else if ( diagonalisation == IRLdiagonaliseWithQR ) { + diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid); + } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { + diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid); + } else { + assert(0); + } + } + +#ifdef USE_LAPACK +void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, + double *vl, double *vu, int *il, int *iu, double *abstol, + int *m, double *w, double *z, int *ldz, int *isuppz, + double *work, int *lwork, int *iwork, int *liwork, + int *info); #endif - }; - +void diagonalize_lapack(std::vector<RealD>& lmd, + std::vector<RealD>& lme, + int Nk, int Nm, + Eigen::MatrixXd& Qt, + GridBase *grid) +{ +#ifdef USE_LAPACK + const int size = Nm; + int NN = Nk; + double evals_tmp[NN]; + double evec_tmp[NN][NN]; + memset(evec_tmp[0],0,sizeof(double)*NN*NN); + double DD[NN]; + double EE[NN]; + for (int i = 0; i< NN; i++) { + for (int j = i - 1; j <= i + 1; j++) { + if ( j < NN && j >= 0 ) { + if (i==j) DD[i] = lmd[i]; + if (i==j) evals_tmp[i] = lmd[i]; + if (j==(i-1)) EE[j] = lme[j]; + } + } + } + int evals_found; + int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; + int liwork = 3+NN*10 ; + int iwork[liwork]; + double work[lwork]; + int isuppz[2*NN]; + char jobz = 'V'; // calculate evals & evecs + char range = 'I'; // calculate all evals + // char range = 'A'; // calculate all evals + char uplo = 'U'; // refer to upper half of original matrix + char compz = 'I'; // Compute eigenvectors of tridiagonal matrix + int ifail[NN]; + int info; + int total = grid->_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + int il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + double tol = 0.0; + if (1) { + memset(evals_tmp,0,sizeof(double)*NN); + if ( il <= NN){ + LAPACK_dstegr(&jobz, &range, &NN, + (double*)DD, (double*)EE, + &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' + &tol, // tolerance + &evals_found, evals_tmp, (double*)evec_tmp, &NN, + isuppz, + work, &lwork, iwork, &liwork, + &info); + for (int i = iu-1; i>= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; + if (il>1) evec_tmp[i-(il-1)][j]=0.; + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,NN*NN); + } + } + // Safer to sort instead of just reversing it, + // but the document of the routine says evals are sorted in increasing order. + // qr gives evals in decreasing order. + for(int i=0;i<NN;i++){ + lmd [NN-1-i]=evals_tmp[i]; + for(int j=0;j<NN;j++){ + Qt((NN-1-i),j)=evec_tmp[i][j]; + } + } +#else + assert(0); +#endif } -#endif + void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) + { + int QRiter = 100*Nm; + int kmin = 1; + int kmax = Nk; + + // (this should be more sophisticated) + for(int iter=0; iter<QRiter; ++iter){ + + // determination of 2x2 leading submatrix + RealD dsub = lmd[kmax-1]-lmd[kmax-2]; + RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); + RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub))); + // (Dsh: shift) + + // transformation + QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm + + // Convergence criterion (redef of kmin and kamx) + for(int j=kmax-1; j>= kmin; --j){ + RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); + if(fabs(lme[j-1])+dds > dds){ + kmax = j+1; + goto continued; + } + } + QRiter = iter; + return; + + continued: + for(int j=0; j<kmax-1; ++j){ + RealD dds = fabs(lmd[j])+fabs(lmd[j+1]); + if(fabs(lme[j])+dds > dds){ + kmin = j+1; + break; + } + } + } + std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n"; + abort(); + } + + }; +} + +#endif diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h index e715fc25..3ad516ef 100644 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h +++ b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h @@ -48,7 +48,7 @@ class BasisFieldVector { } } - void rotate(std::vector<RealD>& Qt,int j0, int j1, int k0,int k1,int Nm) { + void rotate(Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { GridBase* grid = _v[0]._grid; @@ -62,7 +62,7 @@ class BasisFieldVector { for(int j=j0; j<j1; ++j){ for(int k=k0; k<k1; ++k){ - B[j] +=Qt[k+Nm*j] * _v[k]._odata[ss]; + B[j] +=Qt(j,k) * _v[k]._odata[ss]; } } for(int j=j0; j<j1; ++j){ @@ -70,7 +70,6 @@ class BasisFieldVector { } } } - } size_t size() const { diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index a8723f32..7668765b 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -244,6 +244,7 @@ until convergence // Implicitly shifted QR transformations Qt = Eigen::MatrixXd::Identity(Nm,Nm); + for(int ip=k2; ip<Nm; ++ip){ // Eigen replacement for qr_decomp ??? qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); @@ -319,7 +320,7 @@ until convergence } // end of iter loop std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; - std::cout<< GridLogError <<" ImplicitlyRestartedLanczos::calc() NOT converged."; + std::cout << GridLogError <<" ImplicitlyRestartedLanczos::calc() NOT converged."; std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; abort(); diff --git a/lib/log/Log.cc b/lib/log/Log.cc index 65dc2812..6c7d9459 100644 --- a/lib/log/Log.cc +++ b/lib/log/Log.cc @@ -59,13 +59,15 @@ void GridLogTimestamp(int on){ } Colours GridLogColours(0); -GridLogger GridLogError(1, "Error", GridLogColours, "RED"); +GridLogger GridLogIRL (1, "IRL" , GridLogColours, "NORMAL"); +GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL"); +GridLogger GridLogError (1, "Error" , GridLogColours, "RED"); GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL"); -GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE"); +GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE"); GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); -GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE"); -GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE"); +GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE"); +GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); void GridLogConfigure(std::vector<std::string> &logstreams) { GridLogError.Active(0); diff --git a/lib/log/Log.h b/lib/log/Log.h index 74d080bb..8db83266 100644 --- a/lib/log/Log.h +++ b/lib/log/Log.h @@ -85,6 +85,7 @@ class Logger { protected: Colours &Painter; int active; + int timing_mode; static int timestamp; std::string name, topName; std::string COLOUR; @@ -101,20 +102,24 @@ public: name(nm), topName(topNm), Painter(col_class), + timing_mode(0), COLOUR(col) {} ; void Active(int on) {active = on;}; int isActive(void) {return active;}; static void Timestamp(int on) {timestamp = on;}; - + void Reset(void) { StopWatch.Reset(); } + void TimingMode(int on) { timing_mode = on; if(on) Reset(); } + friend std::ostream& operator<< (std::ostream& stream, Logger& log){ if ( log.active ) { - stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : "; - stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : "; + stream << log.background()<< std::left << log.topName << log.background()<< " : "; + stream << log.colour() << std::left << log.name << log.background() << " : "; if ( log.timestamp ) { StopWatch.Stop(); GridTime now = StopWatch.Elapsed(); + if ( log.timing_mode==1 ) StopWatch.Reset(); StopWatch.Start(); stream << log.evidence()<< now << log.background() << " : " ; } @@ -135,6 +140,8 @@ public: void GridLogConfigure(std::vector<std::string> &logstreams); +extern GridLogger GridLogIRL; +extern GridLogger GridLogSolver; extern GridLogger GridLogError; extern GridLogger GridLogWarning; extern GridLogger GridLogMessage; diff --git a/lib/threads/Threads.h b/lib/threads/Threads.h index d15f15ce..36daf2af 100644 --- a/lib/threads/Threads.h +++ b/lib/threads/Threads.h @@ -51,7 +51,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk> #define PARALLEL_CRITICAL #endif +#define parallel_region PARALLEL_REGION #define parallel_for PARALLEL_FOR_LOOP for +#define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for #define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for namespace Grid { diff --git a/tests/debug/Test_cheby.cc b/tests/debug/Test_cheby.cc index 40544c56..72d07885 100644 --- a/tests/debug/Test_cheby.cc +++ b/tests/debug/Test_cheby.cc @@ -37,8 +37,15 @@ RealD InverseApproximation(RealD x){ RealD SqrtApproximation(RealD x){ return std::sqrt(x); } +RealD Approximation32(RealD x){ + return std::pow(x,-1.0/32.0); +} +RealD Approximation2(RealD x){ + return std::pow(x,-1.0/2.0); +} + RealD StepFunction(RealD x){ - if ( x<0.1 ) return 1.0; + if ( x<10.0 ) return 1.0; else return 0.0; } @@ -56,7 +63,6 @@ int main (int argc, char ** argv) Chebyshev<LatticeFermion> ChebyInv(lo,hi,2000,InverseApproximation); - { std::ofstream of("chebyinv"); ChebyInv.csv(of); @@ -78,7 +84,6 @@ int main (int argc, char ** argv) ChebyStep.JacksonSmooth(); - { std::ofstream of("chebystepjack"); ChebyStep.csv(of); @@ -100,5 +105,30 @@ int main (int argc, char ** argv) ChebyNE.csv(of); } + lo=0.0; + hi=4.0; + Chebyshev<LatticeFermion> Cheby32(lo,hi,2000,Approximation32); + { + std::ofstream of("cheby32"); + Cheby32.csv(of); + } + Cheby32.JacksonSmooth(); + { + std::ofstream of("cheby32jack"); + Cheby32.csv(of); + } + + Chebyshev<LatticeFermion> ChebySqrt(lo,hi,2000,Approximation2); + { + std::ofstream of("chebysqrt"); + ChebySqrt.csv(of); + } + ChebySqrt.JacksonSmooth(); + { + std::ofstream of("chebysqrtjack"); + ChebySqrt.csv(of); + } + + Grid_finalize(); } diff --git a/tests/hmc/Test_remez.cc b/tests/hmc/Test_remez.cc index bc851173..5f4b0a25 100644 --- a/tests/hmc/Test_remez.cc +++ b/tests/hmc/Test_remez.cc @@ -38,11 +38,11 @@ int main (int argc, char ** argv) std::cout<<GridLogMessage << "Testing Remez"<<std::endl; - double lo=0.01; - double hi=1.0; + double lo=1.0e-3; + double hi=5.0; int precision=64; - int degree=10; - AlgRemez remez(0.001,1.0,precision); + int degree=16; + AlgRemez remez(lo,hi,precision); //////////////////////////////////////// // sqrt and inverse sqrt @@ -50,21 +50,50 @@ int main (int argc, char ** argv) std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/2)"<<std::endl; remez.generateApprox(degree,1,2); - MultiShiftFunction Sqrt(remez,1.0,false); - MultiShiftFunction InvSqrt(remez,1.0,true); + MultiShiftFunction Root2(remez,1.0,false); + MultiShiftFunction InvRoot2(remez,1.0,true); std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/4)"<<std::endl; remez.generateApprox(degree,1,4); - MultiShiftFunction SqrtSqrt(remez,1.0,false); - MultiShiftFunction InvSqrtSqrt(remez,1.0,true); + MultiShiftFunction Root4(remez,1.0,false); + MultiShiftFunction InvRoot4(remez,1.0,true); + std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/8)"<<std::endl; + remez.generateApprox(degree,1,8); + MultiShiftFunction Root8(remez,1.0,false); + MultiShiftFunction InvRoot8(remez,1.0,true); + + std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/16)"<<std::endl; + remez.generateApprox(degree,1,16); + MultiShiftFunction Root16(remez,1.0,false); + MultiShiftFunction InvRoot16(remez,1.0,true); + + std::cout<<GridLogMessage << "Generating degree "<<degree<<" for x^(1/32)"<<std::endl; + remez.generateApprox(degree,1,32); + MultiShiftFunction Root32(remez,1.0,false); + MultiShiftFunction InvRoot32(remez,1.0,true); - ofstream gnuplot(std::string("Sqrt.gnu"),std::ios::out|std::ios::trunc); - Sqrt.gnuplot(gnuplot); + ofstream gnuplot(std::string("Root2.gnu"),std::ios::out|std::ios::trunc); + Root2.gnuplot(gnuplot); + + ofstream gnuplot_i2(std::string("InvRoot2.gnu"),std::ios::out|std::ios::trunc); + InvRoot2.gnuplot(gnuplot_i2); + + ofstream gnuplot_i4(std::string("InvRoot4.gnu"),std::ios::out|std::ios::trunc); + InvRoot4.gnuplot(gnuplot_i4); + + ofstream gnuplot_i8(std::string("InvRoot8.gnu"),std::ios::out|std::ios::trunc); + InvRoot8.gnuplot(gnuplot_i8); + + ofstream gnuplot_i16(std::string("InvRoot16.gnu"),std::ios::out|std::ios::trunc); + InvRoot16.gnuplot(gnuplot_i16); + + ofstream gnuplot_i32(std::string("InvRoot32.gnu"),std::ios::out|std::ios::trunc); + InvRoot32.gnuplot(gnuplot_i32); + + - ofstream gnuplot_inv(std::string("InvSqrt.gnu"),std::ios::out|std::ios::trunc); - InvSqrt.gnuplot(gnuplot); double x=0.6789; double sx=std::sqrt(x); @@ -72,10 +101,10 @@ int main (int argc, char ** argv) double isx=1.0/sx; double issx=1.0/ssx; - double asx =Sqrt.approx(x); - double assx =SqrtSqrt.approx(x); - double aisx =InvSqrt.approx(x); - double aissx=InvSqrtSqrt.approx(x); + double asx =Root2.approx(x); + double assx =Root4.approx(x); + double aisx =InvRoot2.approx(x); + double aissx=InvRoot4.approx(x); std::cout<<GridLogMessage << "x^(1/2) : "<<sx<<" "<<asx<<std::endl; std::cout<<GridLogMessage << "x^(1/4) : "<<ssx<<" "<<assx<<std::endl; From 9aff354ab5c4a1e6bab3f0847bd1e3944e5ddc44 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 13 Oct 2017 13:22:26 +0100 Subject: [PATCH 02/45] Final version prior to reunification --- .../BlockImplicitlyRestartedLanczos.h | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h index 90d45193..de3f1790 100644 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h @@ -35,9 +35,6 @@ Author: Christoph Lehner <clehner@bnl.gov> //#include <zlib.h> #include <sys/stat.h> -#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h> -#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h> -#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h> namespace Grid { @@ -178,7 +175,7 @@ class BlockImplicitlyRestartedLanczos { //////////////////////////////// // Embedded objects //////////////////////////////// - SortEigen<Field> _sort; + // SortEigen<Field> _sort; LinearFunction<Field> &_HermOp; LinearFunction<Field> &_HermOpTest; ///////////////////////// @@ -212,11 +209,10 @@ public: return nn; } - void orthogonalize(Field& w, BasisFieldVector<Field>& evec,int k) + void orthogonalize(Field& w, std::vector<Field>& evec,int k) { OrthoTime-=usecond()/1e6; - //evec.orthogonalize(w,k); - basisOrthogonalize(evec._v,w,k); + basisOrthogonalize(evec,w,k); normalise(w); OrthoTime+=usecond()/1e6; } @@ -238,7 +234,7 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - void calc(std::vector<RealD>& eval, BasisFieldVector<Field>& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) + void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) { GridBase *grid = src._grid; assert(grid == evec[0]._grid); @@ -341,7 +337,8 @@ until convergence ////////////////////////////////// eval2_copy = eval2; - _sort.push(eval2,Nm); + // _sort.push(eval2,Nm); + std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end()); std::cout<<GridLogIRL <<" evals sorted "<<std::endl; for(int ip=0; ip<k2; ++ip) std::cout<<GridLogIRL << "eval "<< ip << " "<< eval2[ip] << std::endl; @@ -359,8 +356,7 @@ until convergence assert(k2<Nm); assert(k2<Nm); assert(k1>0); - // evec.rotate(Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis - basisRotate(evec._v,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis std::cout<<GridLogIRL <<"QR rotation done "<<std::endl; @@ -396,8 +392,7 @@ until convergence Field ev0_orig(grid); ev0_orig = evec[0]; - // evec.rotate(Qt,0,Nk,0,Nk,Nm); - basisRotate(evec._v,Qt,0,Nk,0,Nk,Nm); + basisRotate(evec,Qt,0,Nk,0,Nk,Nm); { std::cout << GridLogIRL << "Test convergence" << std::endl; @@ -436,7 +431,6 @@ until convergence goto converged; } - std::cout << GridLogIRL << "Convergence testing: Rotating back" << std::endl; //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; { Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk @@ -445,16 +439,17 @@ until convergence qm(j,k) = Qt(j,k); Eigen::MatrixXd qmI = qm.inverse(); - std::cout << GridLogIRL << "Inverted ("<<Nk<<"x"<<Nk<<") matrix " << std::endl; - RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 ) + + std::cout << GridLogIRL << "\tInverted ("<<Nk<<"x"<<Nk<<") Qt matrix " << " error = " << res_check_rotate_inverse <<std::endl; + assert(res_check_rotate_inverse < 1e-7); - //evec.rotate(qmI,0,Nk,0,Nk,Nm); - basisRotate(evec._v,qmI,0,Nk,0,Nk,Nm); - + + basisRotate(evec,qmI,0,Nk,0,Nk,Nm); + std::cout << GridLogIRL << "\t Basis rotation done "<<std::endl; + axpy(ev0_orig,-1.0,evec[0],ev0_orig); - std::cout << GridLogIRL << "Rotation done ; error = " << res_check_rotate_inverse << ");"<<std::endl; std::cout << GridLogIRL << " | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl; } } @@ -471,15 +466,17 @@ until convergence if (SkipTest == 1) { eval = eval2; } else { - // test quickly + // test quickly + // PAB -- what precisely does this test? for (int j=0;j<Nstop;j+=SkipTest) { std::cout<<GridLogIRL << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl; } eval2_copy.resize(eval2.size()); eval = eval2_copy; } - // evec.sortInPlace(eval,reverse); - basisSortInPlace(evec._v,eval,reverse); + + basisSortInPlace(evec,eval,reverse); + // test // PAB -- what does this test ? for (int j=0;j<Nstop;j++) { std::cout<<GridLogIRL << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl; @@ -507,7 +504,7 @@ until convergence */ void step(std::vector<RealD>& lmd, std::vector<RealD>& lme, - BasisFieldVector<Field>& evec, + std::vector<Field>& evec, Field& w,int Nm,int k) { const RealD tiny = 1.0e-20; From 4b4d18793535b43e4389dc09f6fe1de59640fb13 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 13 Oct 2017 13:22:44 +0100 Subject: [PATCH 03/45] Reunified the Lanczos implementations --- .../iterative/ImplicitlyRestartedLanczos.h | 668 +++++++++++------- 1 file changed, 414 insertions(+), 254 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 7668765b..f32e4fa5 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -7,8 +7,9 @@ Copyright (C) 2015 Author: Peter Boyle <paboyle@ph.ed.ac.uk> -Author: Chulwoo Jung -Author: Guido Cossu +Author: paboyle <paboyle@ph.ed.ac.uk> +Author: Chulwoo Jung <chulwoo@bnl.gov> +Author: Christoph Lehner <clehner@bnl.gov> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,125 +28,191 @@ Author: Guido Cossu See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_IRL_H -#define GRID_IRL_H +#ifndef GRID_BIRL_H +#define GRID_BIRL_H #include <string.h> //memset +//#include <zlib.h> +#include <sys/stat.h> -namespace Grid { +namespace Grid { - enum IRLdiagonalisation { - IRLdiagonaliseWithDSTEGR, - IRLdiagonaliseWithQR, - IRLdiagonaliseWithEigen - }; - -//////////////////////////////////////////////////////////////////////////////// -// Helper class for sorting the evalues AND evectors by Field -// Use pointer swizzle on vectors -//////////////////////////////////////////////////////////////////////////////// template<class Field> -class SortEigen { - private: - static bool less_lmd(RealD left,RealD right){ - return left > right; - } - static bool less_pair(std::pair<RealD,Field const*>& left, - std::pair<RealD,Field const*>& right){ - return left.first > (right.first); - } - - public: - void push(std::vector<RealD>& lmd,std::vector<Field>& evec,int N) { - - //////////////////////////////////////////////////////////////////////// - // PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set. - // : The vector reorder should be done by pointer swizzle somehow - //////////////////////////////////////////////////////////////////////// - std::vector<Field> cpy(lmd.size(),evec[0]._grid); - for(int i=0;i<lmd.size();i++) cpy[i] = evec[i]; - - std::vector<std::pair<RealD, Field const*> > emod(lmd.size()); +void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k) +{ + for(int j=0; j<k; ++j){ + auto ip = innerProduct(basis[j],w); + w = w - ip*basis[j]; + } +} - for(int i=0;i<lmd.size();++i) emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]); - - partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); - - typename std::vector<std::pair<RealD, Field const*> >::iterator it = emod.begin(); - for(int i=0;i<N;++i){ - lmd[i]=it->first; - evec[i]=*(it->second); - ++it; +template<class Field> +void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) +{ + typedef typename Field::vector_object vobj; + GridBase* grid = basis[0]._grid; + + parallel_region + { + std::vector < vobj > B(Nm); // Thread private + + parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ + for(int j=j0; j<j1; ++j) B[j]=0.; + + for(int j=j0; j<j1; ++j){ + for(int k=k0; k<k1; ++k){ + B[j] +=Qt(j,k) * basis[k]._odata[ss]; + } + } + for(int j=j0; j<j1; ++j){ + basis[j]._odata[ss] = B[j]; + } } } - void push(std::vector<RealD>& lmd,int N) { - std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); +} + +template<class Field> +void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx) +{ + int vlen = idx.size(); + + assert(vlen>=1); + assert(vlen<=sort_vals.size()); + assert(vlen<=_v.size()); + + for (size_t i=0;i<vlen;i++) { + + if (idx[i] != i) { + + assert(idx[i] > i); + ////////////////////////////////////// + // idx[i] is a table of desired sources giving a permutation. + // + // Swap v[i] with v[idx[i]]. + // + // Find j>i for which _vnew[j] = _vold[i], + // track the move idx[j] => idx[i] + // track the move idx[i] => i + ////////////////////////////////////// + size_t j; + for (j=i;j<idx.size();j++) + if (idx[j]==i) + break; + + assert(j!=idx.size()); + assert(idx[j]==i); + + std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy + std::swap(sort_vals[i],sort_vals[idx[i]]); + + idx[j] = idx[i]; + idx[i] = i; + } } - bool saturated(RealD lmd, RealD thrs) { - return fabs(lmd) > fabs(thrs); +} + +inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals) +{ + std::vector<int> idx(sort_vals.size()); + std::iota(idx.begin(), idx.end(), 0); + + // sort indexes based on comparing values in v + std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { + return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); + }); + return idx; +} + +template<class Field> +void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse) +{ + std::vector<int> idx = basisSortGetIndex(sort_vals); + if (reverse) + std::reverse(idx.begin(), idx.end()); + + basisReorderInPlace(_v,sort_vals,idx); +} + +// PAB: faster to compute the inner products first then fuse loops. +// If performance critical can improve. +template<class Field> +void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) { + result = zero; + assert(_v.size()==eval.size()); + int N = (int)_v.size(); + for (int i=0;i<N;i++) { + Field& tmp = _v[i]; + axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result); } +} + +enum IRLdiagonalisation { + IRLdiagonaliseWithDSTEGR, + IRLdiagonaliseWithQR, + IRLdiagonaliseWithEigen }; ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// + template<class Field> class ImplicitlyRestartedLanczos { - -private: - - int MaxIter; // Max iterations - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - int Nm; // Nm -- total number of vectors - RealD eresid; + private: + const RealD small = 1.0e-8; + int MaxIter; + int MinRestart; // Minimum number of restarts; only check for convergence after + int Nstop; // Number of evecs checked for convergence + int Nk; // Number of converged sought + // int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk + int Nm; // Nm -- total number of vectors IRLdiagonalisation diagonalisation; - //////////////////////////////////// + int orth_period; + + RealD OrthoTime; + RealD eresid, betastp; + //////////////////////////////// // Embedded objects - //////////////////////////////////// - SortEigen<Field> _sort; - LinearOperatorBase<Field> &_Linop; - OperatorFunction<Field> &_poly; - + //////////////////////////////// + LinearFunction<Field> &_HermOp; + LinearFunction<Field> &_HermOpTest; ///////////////////////// // Constructor ///////////////////////// public: - ImplicitlyRestartedLanczos(LinearOperatorBase<Field> &Linop, // op - OperatorFunction<Field> & poly, // polynomial - int _Nstop, // really sought vecs - int _Nk, // sought vecs - int _Nm, // total vecs - RealD _eresid, // resid in lmd deficit - int _MaxIter, // Max iterations - IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen ) : - _Linop(Linop), _poly(poly), - Nstop(_Nstop), Nk(_Nk), Nm(_Nm), - eresid(_eresid), MaxIter(_MaxIter), - diagonalisation(_diagonalisation) - { }; + ImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, + LinearFunction<Field> & HermOpTest, + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + RealD _betastp, // if beta(k) < betastp: converged + int _MaxIter, // Max iterations + int _MinRestart, int _orth_period = 1, + IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : + _HermOp(HermOp), _HermOpTest(HermOpTest), + Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), + eresid(_eresid), betastp(_betastp), + MaxIter(_MaxIter) , MinRestart(_MinRestart), + orth_period(_orth_period), diagonalisation(_diagonalisation) { }; //////////////////////////////// // Helpers //////////////////////////////// - static RealD normalise(Field& v) + template<typename T> static RealD normalise(T& v) { RealD nn = norm2(v); nn = sqrt(nn); v = v * (1.0/nn); return nn; } - - void orthogonalize(Field& w, std::vector<Field>& evec, int k) + + void orthogonalize(Field& w, std::vector<Field>& evec,int k) { - typedef typename Field::scalar_type MyComplex; - MyComplex ip; - - for(int j=0; j<k; ++j){ - ip = innerProduct(evec[j],w); - w = w - ip * evec[j]; - } + OrthoTime-=usecond()/1e6; + basisOrthogonalize(evec,w,k); normalise(w); + OrthoTime+=usecond()/1e6; } /* Rudy Arthur's thesis pp.137 @@ -165,185 +232,265 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv) + void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) { + GridBase *grid = src._grid; + assert(grid == evec[0]._grid); - GridBase *grid = evec[0]._grid; - assert(grid == src._grid); - - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; - std::cout << GridLogMessage <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; - std::cout << GridLogMessage <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; - std::cout << GridLogMessage <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; - std::cout << GridLogMessage <<" -- total Nm = " << Nm <<" vectors"<< std::endl; - std::cout << GridLogMessage <<" -- size of eval = " << eval.size() << std::endl; - std::cout << GridLogMessage <<" -- size of evec = " << evec.size() << std::endl; + GridLogIRL.TimingMode(1); + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; + std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; + std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl; + std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl; + std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl; if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { - std::cout << GridLogMessage << "Diagonalisation is DSTEGR "<<std::endl; + std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl; } else if ( diagonalisation == IRLdiagonaliseWithQR ) { - std::cout << GridLogMessage << "Diagonalisation is QR "<<std::endl; + std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl; } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { - std::cout << GridLogMessage << "Diagonalisation is Eigen "<<std::endl; + std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl; } - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + + assert(Nm <= evec.size() && Nm <= eval.size()); - assert(Nm == evec.size() && Nm == eval.size()); + // quickly get an idea of the largest eigenvalue to more properly normalize the residuum + RealD evalMaxApprox = 0.0; + { + auto src_n = src; + auto tmp = src; + const int _MAX_ITER_IRL_MEVAPP_ = 50; + for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) { + _HermOpTest(src_n,tmp); + RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. + RealD vden = norm2(src_n); + RealD na = vnum/vden; + if (fabs(evalMaxApprox/na - 1.0) < 0.05) + i=_MAX_ITER_IRL_MEVAPP_; + evalMaxApprox = na; + std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl; + src_n = tmp; + } + } std::vector<RealD> lme(Nm); std::vector<RealD> lme2(Nm); std::vector<RealD> eval2(Nm); + std::vector<RealD> eval2_copy(Nm); + Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); - Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); - - std::vector<int> Iconv(Nm); - std::vector<Field> B(Nm,grid); // waste of space replicating - Field f(grid); Field v(grid); - int k1 = 1; int k2 = Nk; - - Nconv = 0; - RealD beta_k; + + Nconv = 0; // Set initial vector evec[0] = src; - std::cout << GridLogMessage <<"norm2(src)= " << norm2(src)<<std::endl; - normalise(evec[0]); - std::cout << GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl; - + // Initial Nk steps + OrthoTime=0.; for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k); - + std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl; + std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; + + ////////////////////////////////// // Restarting loop begins + ////////////////////////////////// int iter; for(iter = 0; iter<MaxIter; ++iter){ + OrthoTime=0.; + std::cout<< GridLogMessage <<" **********************"<< std::endl; std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl; std::cout<< GridLogMessage <<" **********************"<< std::endl; - + + std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl; for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k); - f *= lme[Nm-1]; - + + std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl; + std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; + + ////////////////////////////////// // getting eigenvalues + ////////////////////////////////// for(int k=0; k<Nm; ++k){ eval2[k] = eval[k+k1-1]; lme2[k] = lme[k+k1-1]; } Qt = Eigen::MatrixXd::Identity(Nm,Nm); diagonalize(eval2,lme2,Nm,Nm,Qt,grid); + std::cout<<GridLogIRL <<" diagonalized "<<std::endl; + ////////////////////////////////// // sorting - _sort.push(eval2,Nm); - - // Implicitly shifted QR transformations - Qt = Eigen::MatrixXd::Identity(Nm,Nm); + ////////////////////////////////// + eval2_copy = eval2; + // _sort.push(eval2,Nm); + std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end()); + + std::cout<<GridLogIRL <<" evals sorted "<<std::endl; + for(int ip=0; ip<k2; ++ip) std::cout<<GridLogIRL << "eval "<< ip << " "<< eval2[ip] << std::endl; + + ////////////////////////////////// + // Implicitly shifted QR transformations + ////////////////////////////////// + Qt = Eigen::MatrixXd::Identity(Nm,Nm); + std::cout<<GridLogIRL << "QR decompose " << std::endl; for(int ip=k2; ip<Nm; ++ip){ - // Eigen replacement for qr_decomp ??? - qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); + QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); } - - for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; - - for(int j=k1-1; j<k2+1; ++j){ - for(int k=0; k<Nm; ++k){ - B[j].checkerboard = evec[k].checkerboard; - B[j] += Qt(j,k) * evec[k]; - } - } - for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j]; + std::cout<<GridLogIRL <<"QR decompose done "<<std::endl; + + assert(k2<Nm); + assert(k2<Nm); + assert(k1>0); + basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + + std::cout<<GridLogIRL <<"QR rotation done "<<std::endl; + //////////////////////////////////////////////////// // Compressed vector f and beta(k2) + //////////////////////////////////////////////////// f *= Qt(k2-1,Nm-1); f += lme[k2-1] * evec[k2]; beta_k = norm2(f); beta_k = sqrt(beta_k); - std::cout<< GridLogMessage<<" beta(k) = "<<beta_k<<std::endl; - + std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl; + RealD betar = 1.0/beta_k; evec[k2] = betar * f; lme[k2-1] = beta_k; - + + //////////////////////////////////////////////////// // Convergence test + //////////////////////////////////////////////////// for(int k=0; k<Nm; ++k){ eval2[k] = eval[k]; lme2[k] = lme[k]; + // std::cout<<GridLogIRL << "eval2[" << k << "] = " << eval2[k] << std::endl; } Qt = Eigen::MatrixXd::Identity(Nm,Nm); diagonalize(eval2,lme2,Nk,Nm,Qt,grid); - - for(int k = 0; k<Nk; ++k) B[k]=0.0; - - for(int j = 0; j<Nk; ++j){ - for(int k = 0; k<Nk; ++k){ - B[j].checkerboard = evec[k].checkerboard; - B[j] += Qt(j,k) * evec[k]; - } - } - + std::cout<<GridLogIRL <<" Diagonalized "<<std::endl; + Nconv = 0; - for(int i=0; i<Nk; ++i){ + if (iter >= MinRestart) { + std::cout << GridLogIRL << "Rotation to test convergence " << std::endl; - _Linop.HermOp(B[i],v); + Field ev0_orig(grid); + ev0_orig = evec[0]; - RealD vnum = real(innerProduct(B[i],v)); // HermOp. - RealD vden = norm2(B[i]); - eval2[i] = vnum/vden; - v -= eval2[i]*B[i]; - RealD vv = norm2(v); - - std::cout.precision(13); - std::cout << GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] "; - std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i]; - std::cout << " |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl; - - // change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged - if((vv<eresid*eresid) && (i == Nconv) ){ - Iconv[Nconv] = i; - ++Nconv; - } - - } // i-loop end - - std::cout<< GridLogMessage <<" #modes converged: "<<Nconv<<std::endl; + basisRotate(evec,Qt,0,Nk,0,Nk,Nm); - if( Nconv>=Nstop ){ - goto converged; - } - } // end of iter loop - - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; - std::cout << GridLogError <<" ImplicitlyRestartedLanczos::calc() NOT converged."; - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + { + std::cout << GridLogIRL << "Test convergence" << std::endl; + Field B(grid); + + for(int j = 0; j<Nk; j+=SkipTest){ + B=evec[j]; + + //std::cout << "Checkerboard: " << evec[j].checkerboard << std::endl; + B.checkerboard = evec[0].checkerboard; + + _HermOpTest(B,v); + + RealD vnum = real(innerProduct(B,v)); // HermOp. + RealD vden = norm2(B); + RealD vv0 = norm2(v); + eval2[j] = vnum/vden; + v -= eval2[j]*B; + RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); + std::cout.precision(13); + std::cout<<GridLogIRL << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] " + <<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j] << " (" << eval2_copy[j] << ")" + <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv + <<" "<< vnum/(sqrt(vden)*sqrt(vv0)) + <<std::endl; + + // change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged + if((vv<eresid*eresid) && (j == Nconv) ){ + Nconv+=SkipTest; + } + } + + // test if we converged, if so, terminate + std::cout<<GridLogIRL<<" #modes converged: "<<Nconv<<std::endl; + if( Nconv>=Nstop || beta_k < betastp){ + goto converged; + } + + //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; + { + Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk + for (int k=0;k<Nk;k++) + for (int j=0;j<Nk;j++) + qm(j,k) = Qt(j,k); + + Eigen::MatrixXd qmI = qm.inverse(); + + RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 ) + + std::cout << GridLogIRL << "\tInverted ("<<Nk<<"x"<<Nk<<") Qt matrix " << " error = " << res_check_rotate_inverse <<std::endl; + + assert(res_check_rotate_inverse < 1e-7); + + basisRotate(evec,qmI,0,Nk,0,Nk,Nm); + std::cout << GridLogIRL << "\t Basis rotation done "<<std::endl; + + axpy(ev0_orig,-1.0,evec[0],ev0_orig); + std::cout << GridLogIRL << " | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl; + } + } + } else { + std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n"; + } // end of iter loop + } + + std::cout<<GridLogError<<"\n NOT converged.\n"; abort(); converged: - // Sorting - eval.resize(Nconv); - evec.resize(Nconv,grid); - for(int i=0; i<Nconv; ++i){ - eval[i] = eval2[Iconv[i]]; - evec[i] = B[Iconv[i]]; + + if (SkipTest == 1) { + eval = eval2; + } else { + ////////////////////////////////////////////// + // test quickly + // PAB -- what precisely does this test? Don't like this eval2, eval2_copy etc... + ////////////////////////////////////////////// + for (int j=0;j<Nstop;j+=SkipTest) { + std::cout<<GridLogIRL << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl; + } + eval2_copy.resize(eval2.size()); + eval = eval2_copy; } - _sort.push(eval,evec,Nconv); - - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; - std::cout << GridLogMessage << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n"; - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; - std::cout << GridLogMessage << " -- Iterations = "<< iter << "\n"; - std::cout << GridLogMessage << " -- beta(k) = "<< beta_k << "\n"; - std::cout << GridLogMessage << " -- Nconv = "<< Nconv << "\n"; - std::cout << GridLogMessage <<"**************************************************************************"<< std::endl; + + basisSortInPlace(evec,eval,reverse); + + for (int j=0;j<Nstop;j++) { + std::cout<<GridLogIRL << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl; + } + + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n"; + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; + std::cout << GridLogIRL << " -- Iterations = "<< iter << "\n"; + std::cout << GridLogIRL << " -- beta(k) = "<< beta_k << "\n"; + std::cout << GridLogIRL << " -- Nconv = "<< Nconv << "\n"; + std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; } -private: + private: /* Saad PP. 195 1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 2. For k = 1,2,...,m Do: @@ -361,28 +508,41 @@ private: { const RealD tiny = 1.0e-20; assert( k< Nm ); - - _poly(_Linop,evec[k],w); // 3. wk:=Avk−βkv_{k−1} - + + GridStopWatch gsw_op,gsw_o; + + Field& evec_k = evec[k]; + + _HermOp(evec_k,w); + std::cout<<GridLogIRL << "_HermOp (poly)" <<std::endl; + if(k>0) w -= lme[k-1] * evec[k-1]; - - ComplexD zalph = innerProduct(evec[k],w); // 4. αk:=(wk,vk) + + ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) RealD alph = real(zalph); - - w = w - alph * evec[k];// 5. wk:=wk−αkvk - + + w = w - alph * evec_k;// 5. wk:=wk−αkvk + RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop // 7. vk+1 := wk/βk+1 - + lmd[k] = alph; lme[k] = beta; - - if ( k > 0 ) orthogonalize(w,evec,k); // orthonormalise - if ( k < Nm-1) evec[k+1] = w; - - if ( beta < tiny ) std::cout << GridLogMessage << " beta is tiny "<<beta<<std::endl; + + std::cout<<GridLogIRL << "linalg " <<std::endl; + + if (k>0 && k % orth_period == 0) { + orthogonalize(w,evec,k); // orthonormalise + std::cout<<GridLogIRL << "orthogonalised " <<std::endl; + } + + if(k < Nm-1) evec[k+1] = w; + + std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl; + if ( beta < tiny ) + std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; } - + void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme, int Nk, int Nm, Eigen::MatrixXd & Qt, // Nm x Nm @@ -405,11 +565,12 @@ private: } } } + /////////////////////////////////////////////////////////////////////////// // File could end here if settle on Eigen ??? /////////////////////////////////////////////////////////////////////////// - void qr_decomp(std::vector<RealD>& lmd, // Nm + void QR_decomp(std::vector<RealD>& lmd, // Nm std::vector<RealD>& lme, // Nm int Nk, int Nm, // Nk, Nm Eigen::MatrixXd& Qt, // Nm x Nm matrix @@ -576,51 +737,50 @@ void diagonalize_lapack(std::vector<RealD>& lmd, #endif } - void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, - GridBase *grid) - { - int Niter = 100*Nm; - int kmin = 1; - int kmax = Nk; - - // (this should be more sophisticated) - for(int iter=0; iter<Niter; ++iter){ - - // determination of 2x2 leading submatrix - RealD dsub = lmd[kmax-1]-lmd[kmax-2]; - RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); - RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub))); - // (Dsh: shift) - - // transformation - qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm - - // Convergence criterion (redef of kmin and kamx) - for(int j=kmax-1; j>= kmin; --j){ - RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); - if(fabs(lme[j-1])+dds > dds){ - kmax = j+1; - goto continued; - } - } - Niter = iter; - return; - - continued: - for(int j=0; j<kmax-1; ++j){ - RealD dds = fabs(lmd[j])+fabs(lmd[j+1]); - if(fabs(lme[j])+dds > dds){ - kmin = j+1; - break; - } +void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme, + int Nk, int Nm, + Eigen::MatrixXd & Qt, + GridBase *grid) +{ + int QRiter = 100*Nm; + int kmin = 1; + int kmax = Nk; + + // (this should be more sophisticated) + for(int iter=0; iter<QRiter; ++iter){ + + // determination of 2x2 leading submatrix + RealD dsub = lmd[kmax-1]-lmd[kmax-2]; + RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); + RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub))); + // (Dsh: shift) + + // transformation + QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm + + // Convergence criterion (redef of kmin and kamx) + for(int j=kmax-1; j>= kmin; --j){ + RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); + if(fabs(lme[j-1])+dds > dds){ + kmax = j+1; + goto continued; + } + } + QRiter = iter; + return; + + continued: + for(int j=0; j<kmax-1; ++j){ + RealD dds = fabs(lmd[j])+fabs(lmd[j+1]); + if(fabs(lme[j])+dds > dds){ + kmin = j+1; + break; } } - std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<Niter<<"\n"; - abort(); } - - }; + std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n"; + abort(); +} +}; } #endif From 47af3565f4cf5a0568dab5f64fc4e2f992385759 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 13 Oct 2017 13:23:07 +0100 Subject: [PATCH 04/45] Logging improvement; reunified the Lanczos codes --- .../BlockImplicitlyRestartedLanczos.h | 789 ------------------ lib/log/Log.cc | 2 +- lib/log/Log.h | 30 +- lib/util/Init.cc | 2 +- tests/lanczos/Test_dwf_compressed_lanczos.cc | 17 +- 5 files changed, 36 insertions(+), 804 deletions(-) delete mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h deleted file mode 100644 index de3f1790..00000000 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h +++ /dev/null @@ -1,789 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h - - Copyright (C) 2015 - -Author: Peter Boyle <paboyle@ph.ed.ac.uk> -Author: paboyle <paboyle@ph.ed.ac.uk> -Author: Chulwoo Jung <chulwoo@bnl.gov> -Author: Christoph Lehner <clehner@bnl.gov> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_BIRL_H -#define GRID_BIRL_H - -#include <string.h> //memset -//#include <zlib.h> -#include <sys/stat.h> - - -namespace Grid { - -template<class Field> -void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k) -{ - for(int j=0; j<k; ++j){ - auto ip = innerProduct(basis[j],w); - w = w - ip*basis[j]; - } -} - -template<class Field> -void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) -{ - typedef typename Field::vector_object vobj; - GridBase* grid = basis[0]._grid; - - parallel_region - { - std::vector < vobj > B(Nm); // Thread private - - parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ - for(int j=j0; j<j1; ++j) B[j]=0.; - - for(int j=j0; j<j1; ++j){ - for(int k=k0; k<k1; ++k){ - B[j] +=Qt(j,k) * basis[k]._odata[ss]; - } - } - for(int j=j0; j<j1; ++j){ - basis[j]._odata[ss] = B[j]; - } - } - } -} - -template<class Field> -void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx) -{ - int vlen = idx.size(); - - assert(vlen>=1); - assert(vlen<=sort_vals.size()); - assert(vlen<=_v.size()); - - for (size_t i=0;i<vlen;i++) { - - if (idx[i] != i) { - - assert(idx[i] > i); - ////////////////////////////////////// - // idx[i] is a table of desired sources giving a permutation. - // - // Swap v[i] with v[idx[i]]. - // - // Find j>i for which _vnew[j] = _vold[i], - // track the move idx[j] => idx[i] - // track the move idx[i] => i - ////////////////////////////////////// - size_t j; - for (j=i;j<idx.size();j++) - if (idx[j]==i) - break; - - assert(j!=idx.size()); - assert(idx[j]==i); - - std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy - std::swap(sort_vals[i],sort_vals[idx[i]]); - - idx[j] = idx[i]; - idx[i] = i; - } - } -} - -std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals) -{ - std::vector<int> idx(sort_vals.size()); - std::iota(idx.begin(), idx.end(), 0); - - // sort indexes based on comparing values in v - std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { - return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); - }); - return idx; -} - -template<class Field> -void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse) -{ - std::vector<int> idx = basisSortGetIndex(sort_vals); - if (reverse) - std::reverse(idx.begin(), idx.end()); - - basisReorderInPlace(_v,sort_vals,idx); -} - -// PAB: faster to compute the inner products first then fuse loops. -// If performance critical can improve. -template<class Field> -void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) { - result = zero; - assert(_v.size()==eval.size()); - int N = (int)_v.size(); - for (int i=0;i<N;i++) { - Field& tmp = _v[i]; - axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result); - } -} - - /* enum IRLdiagonalisation { - IRLdiagonaliseWithDSTEGR, - IRLdiagonaliseWithQR, - IRLdiagonaliseWithEigen - };*/ - -///////////////////////////////////////////////////////////// -// Implicitly restarted lanczos -///////////////////////////////////////////////////////////// - -template<class Field> -class BlockImplicitlyRestartedLanczos { - private: - const RealD small = 1.0e-8; - int MaxIter; - int MinRestart; // Minimum number of restarts; only check for convergence after - int Nstop; // Number of evecs checked for convergence - int Nk; // Number of converged sought - // int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk - int Nm; // Nm -- total number of vectors - IRLdiagonalisation diagonalisation; - int orth_period; - - RealD OrthoTime; - RealD eresid, betastp; - //////////////////////////////// - // Embedded objects - //////////////////////////////// - // SortEigen<Field> _sort; - LinearFunction<Field> &_HermOp; - LinearFunction<Field> &_HermOpTest; - ///////////////////////// - // Constructor - ///////////////////////// -public: - BlockImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, - LinearFunction<Field> & HermOpTest, - int _Nstop, // sought vecs - int _Nk, // sought vecs - int _Nm, // spare vecs - RealD _eresid, // resid in lmdue deficit - RealD _betastp, // if beta(k) < betastp: converged - int _MaxIter, // Max iterations - int _MinRestart, int _orth_period = 1, - IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : - _HermOp(HermOp), _HermOpTest(HermOpTest), - Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), - eresid(_eresid), betastp(_betastp), - MaxIter(_MaxIter) , MinRestart(_MinRestart), - orth_period(_orth_period), diagonalisation(_diagonalisation) { }; - - //////////////////////////////// - // Helpers - //////////////////////////////// - template<typename T> static RealD normalise(T& v) - { - RealD nn = norm2(v); - nn = sqrt(nn); - v = v * (1.0/nn); - return nn; - } - - void orthogonalize(Field& w, std::vector<Field>& evec,int k) - { - OrthoTime-=usecond()/1e6; - basisOrthogonalize(evec,w,k); - normalise(w); - OrthoTime+=usecond()/1e6; - } - -/* Rudy Arthur's thesis pp.137 ------------------------- -Require: M > K P = M − K † -Compute the factorization AVM = VM HM + fM eM -repeat - Q=I - for i = 1,...,P do - QiRi =HM −θiI Q = QQi - H M = Q †i H M Q i - end for - βK =HM(K+1,K) σK =Q(M,K) - r=vK+1βK +rσK - VK =VM(1:M)Q(1:M,1:K) - HK =HM(1:K,1:K) - →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM -until convergence -*/ - void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) - { - GridBase *grid = src._grid; - assert(grid == evec[0]._grid); - - GridLogIRL.TimingMode(1); - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl; - std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl; - std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl; - std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl; - std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl; - if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { - std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl; - } else if ( diagonalisation == IRLdiagonaliseWithQR ) { - std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl; - } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { - std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl; - } - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - - assert(Nm <= evec.size() && Nm <= eval.size()); - - // quickly get an idea of the largest eigenvalue to more properly normalize the residuum - RealD evalMaxApprox = 0.0; - { - auto src_n = src; - auto tmp = src; - const int _MAX_ITER_IRL_MEVAPP_ = 50; - for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) { - _HermOpTest(src_n,tmp); - RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. - RealD vden = norm2(src_n); - RealD na = vnum/vden; - if (fabs(evalMaxApprox/na - 1.0) < 0.05) - i=_MAX_ITER_IRL_MEVAPP_; - evalMaxApprox = na; - std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl; - src_n = tmp; - } - } - - std::vector<RealD> lme(Nm); - std::vector<RealD> lme2(Nm); - std::vector<RealD> eval2(Nm); - std::vector<RealD> eval2_copy(Nm); - Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm); - - Field f(grid); - Field v(grid); - int k1 = 1; - int k2 = Nk; - RealD beta_k; - - Nconv = 0; - - // Set initial vector - evec[0] = src; - normalise(evec[0]); - - // Initial Nk steps - OrthoTime=0.; - for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k); - std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl; - std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; - - ////////////////////////////////// - // Restarting loop begins - ////////////////////////////////// - int iter; - for(iter = 0; iter<MaxIter; ++iter){ - - OrthoTime=0.; - - std::cout<< GridLogMessage <<" **********************"<< std::endl; - std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl; - std::cout<< GridLogMessage <<" **********************"<< std::endl; - - std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl; - for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k); - f *= lme[Nm-1]; - - std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl; - std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; - - ////////////////////////////////// - // getting eigenvalues - ////////////////////////////////// - for(int k=0; k<Nm; ++k){ - eval2[k] = eval[k+k1-1]; - lme2[k] = lme[k+k1-1]; - } - Qt = Eigen::MatrixXd::Identity(Nm,Nm); - diagonalize(eval2,lme2,Nm,Nm,Qt,grid); - std::cout<<GridLogIRL <<" diagonalized "<<std::endl; - - ////////////////////////////////// - // sorting - ////////////////////////////////// - eval2_copy = eval2; - - // _sort.push(eval2,Nm); - std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end()); - - std::cout<<GridLogIRL <<" evals sorted "<<std::endl; - for(int ip=0; ip<k2; ++ip) std::cout<<GridLogIRL << "eval "<< ip << " "<< eval2[ip] << std::endl; - - ////////////////////////////////// - // Implicitly shifted QR transformations - ////////////////////////////////// - Qt = Eigen::MatrixXd::Identity(Nm,Nm); - std::cout<<GridLogIRL << "QR decompose " << std::endl; - for(int ip=k2; ip<Nm; ++ip){ - QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); - } - std::cout<<GridLogIRL <<"QR decompose done "<<std::endl; - - assert(k2<Nm); - assert(k2<Nm); - assert(k1>0); - basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis - - std::cout<<GridLogIRL <<"QR rotation done "<<std::endl; - - //////////////////////////////////////////////////// - // Compressed vector f and beta(k2) - //////////////////////////////////////////////////// - f *= Qt(k2-1,Nm-1); - f += lme[k2-1] * evec[k2]; - beta_k = norm2(f); - beta_k = sqrt(beta_k); - std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl; - - RealD betar = 1.0/beta_k; - evec[k2] = betar * f; - lme[k2-1] = beta_k; - - //////////////////////////////////////////////////// - // Convergence test - //////////////////////////////////////////////////// - for(int k=0; k<Nm; ++k){ - eval2[k] = eval[k]; - lme2[k] = lme[k]; - // std::cout<<GridLogIRL << "eval2[" << k << "] = " << eval2[k] << std::endl; - } - Qt = Eigen::MatrixXd::Identity(Nm,Nm); - diagonalize(eval2,lme2,Nk,Nm,Qt,grid); - std::cout<<GridLogIRL <<" Diagonalized "<<std::endl; - - Nconv = 0; - if (iter >= MinRestart) { - std::cout << GridLogIRL << "Rotation to test convergence " << std::endl; - - Field ev0_orig(grid); - ev0_orig = evec[0]; - - basisRotate(evec,Qt,0,Nk,0,Nk,Nm); - - { - std::cout << GridLogIRL << "Test convergence" << std::endl; - Field B(grid); - - for(int j = 0; j<Nk; j+=SkipTest){ - B=evec[j]; - - //std::cout << "Checkerboard: " << evec[j].checkerboard << std::endl; - B.checkerboard = evec[0].checkerboard; - - _HermOpTest(B,v); - - RealD vnum = real(innerProduct(B,v)); // HermOp. - RealD vden = norm2(B); - RealD vv0 = norm2(v); - eval2[j] = vnum/vden; - v -= eval2[j]*B; - RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); - std::cout.precision(13); - std::cout<<GridLogIRL << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] " - <<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j] << " (" << eval2_copy[j] << ")" - <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv - <<" "<< vnum/(sqrt(vden)*sqrt(vv0)) - <<std::endl; - - // change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged - if((vv<eresid*eresid) && (j == Nconv) ){ - Nconv+=SkipTest; - } - } - - // test if we converged, if so, terminate - std::cout<<GridLogIRL<<" #modes converged: "<<Nconv<<std::endl; - if( Nconv>=Nstop || beta_k < betastp){ - goto converged; - } - - //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; - { - Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk - for (int k=0;k<Nk;k++) - for (int j=0;j<Nk;j++) - qm(j,k) = Qt(j,k); - - Eigen::MatrixXd qmI = qm.inverse(); - - RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 ) - - std::cout << GridLogIRL << "\tInverted ("<<Nk<<"x"<<Nk<<") Qt matrix " << " error = " << res_check_rotate_inverse <<std::endl; - - assert(res_check_rotate_inverse < 1e-7); - - basisRotate(evec,qmI,0,Nk,0,Nk,Nm); - std::cout << GridLogIRL << "\t Basis rotation done "<<std::endl; - - axpy(ev0_orig,-1.0,evec[0],ev0_orig); - std::cout << GridLogIRL << " | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl; - } - } - } else { - std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n"; - } // end of iter loop - } - - std::cout<<GridLogError<<"\n NOT converged.\n"; - abort(); - - converged: - - if (SkipTest == 1) { - eval = eval2; - } else { - // test quickly - // PAB -- what precisely does this test? - for (int j=0;j<Nstop;j+=SkipTest) { - std::cout<<GridLogIRL << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl; - } - eval2_copy.resize(eval2.size()); - eval = eval2_copy; - } - - basisSortInPlace(evec,eval,reverse); - - // test // PAB -- what does this test ? - for (int j=0;j<Nstop;j++) { - std::cout<<GridLogIRL << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl; - } - - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n"; - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - std::cout << GridLogIRL << " -- Iterations = "<< iter << "\n"; - std::cout << GridLogIRL << " -- beta(k) = "<< beta_k << "\n"; - std::cout << GridLogIRL << " -- Nconv = "<< Nconv << "\n"; - std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; - } - - private: -/* Saad PP. 195 -1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0 -2. For k = 1,2,...,m Do: -3. wk:=Avk−βkv_{k−1} -4. αk:=(wk,vk) // -5. wk:=wk−αkvk // wk orthog vk -6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop -7. vk+1 := wk/βk+1 -8. EndDo - */ - void step(std::vector<RealD>& lmd, - std::vector<RealD>& lme, - std::vector<Field>& evec, - Field& w,int Nm,int k) - { - const RealD tiny = 1.0e-20; - assert( k< Nm ); - - GridStopWatch gsw_op,gsw_o; - - Field& evec_k = evec[k]; - - _HermOp(evec_k,w); - std::cout<<GridLogIRL << "_HermOp (poly)" <<std::endl; - - if(k>0) w -= lme[k-1] * evec[k-1]; - - ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) - RealD alph = real(zalph); - - w = w - alph * evec_k;// 5. wk:=wk−αkvk - - RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop - // 7. vk+1 := wk/βk+1 - - lmd[k] = alph; - lme[k] = beta; - - std::cout<<GridLogIRL << "linalg " <<std::endl; - - if (k>0 && k % orth_period == 0) { - orthogonalize(w,evec,k); // orthonormalise - std::cout<<GridLogIRL << "orthogonalised " <<std::endl; - } - - if(k < Nm-1) evec[k+1] = w; - - std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl; - if ( beta < tiny ) - std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; - } - - void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, // Nm x Nm - GridBase *grid) - { - Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk); - - for(int i=0;i<Nk;i++) TriDiag(i,i) = lmd[i]; - for(int i=0;i<Nk-1;i++) TriDiag(i,i+1) = lme[i]; - for(int i=0;i<Nk-1;i++) TriDiag(i+1,i) = lme[i]; - - Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eigensolver(TriDiag); - - for (int i = 0; i < Nk; i++) { - lmd[Nk-1-i] = eigensolver.eigenvalues()(i); - } - for (int i = 0; i < Nk; i++) { - for (int j = 0; j < Nk; j++) { - Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i); - } - } - } - - /////////////////////////////////////////////////////////////////////////// - // File could end here if settle on Eigen ??? - /////////////////////////////////////////////////////////////////////////// - - void QR_decomp(std::vector<RealD>& lmd, // Nm - std::vector<RealD>& lme, // Nm - int Nk, int Nm, // Nk, Nm - Eigen::MatrixXd& Qt, // Nm x Nm matrix - RealD Dsh, int kmin, int kmax) - { - int k = kmin-1; - RealD x; - - RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]); - RealD c = ( lmd[k] -Dsh) *Fden; - RealD s = -lme[k] *Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; - - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - x =-s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - - for(int i=0; i<Nk; ++i){ - RealD Qtmp1 = Qt(k,i); - RealD Qtmp2 = Qt(k+1,i); - Qt(k,i) = c*Qtmp1 - s*Qtmp2; - Qt(k+1,i)= s*Qtmp1 + c*Qtmp2; - } - - // Givens transformations - for(int k = kmin; k < kmax-1; ++k){ - - RealD Fden = 1.0/hypot(x,lme[k-1]); - RealD c = lme[k-1]*Fden; - RealD s = - x*Fden; - - RealD tmpa1 = lmd[k]; - RealD tmpa2 = lmd[k+1]; - RealD tmpb = lme[k]; - - lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb; - lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb; - lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb; - lme[k-1] = c*lme[k-1] -s*x; - - if(k != kmax-2){ - x = -s*lme[k+1]; - lme[k+1] = c*lme[k+1]; - } - - for(int i=0; i<Nk; ++i){ - RealD Qtmp1 = Qt(k,i); - RealD Qtmp2 = Qt(k+1,i); - Qt(k,i) = c*Qtmp1 -s*Qtmp2; - Qt(k+1,i) = s*Qtmp1 +c*Qtmp2; - } - } - } - - void diagonalize(std::vector<RealD>& lmd, std::vector<RealD>& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, - GridBase *grid) - { - Qt = Eigen::MatrixXd::Identity(Nm,Nm); - if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) { - diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid); - } else if ( diagonalisation == IRLdiagonaliseWithQR ) { - diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid); - } else if ( diagonalisation == IRLdiagonaliseWithEigen ) { - diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid); - } else { - assert(0); - } - } - -#ifdef USE_LAPACK -void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, - double *vl, double *vu, int *il, int *iu, double *abstol, - int *m, double *w, double *z, int *ldz, int *isuppz, - double *work, int *lwork, int *iwork, int *liwork, - int *info); -#endif - -void diagonalize_lapack(std::vector<RealD>& lmd, - std::vector<RealD>& lme, - int Nk, int Nm, - Eigen::MatrixXd& Qt, - GridBase *grid) -{ -#ifdef USE_LAPACK - const int size = Nm; - int NN = Nk; - double evals_tmp[NN]; - double evec_tmp[NN][NN]; - memset(evec_tmp[0],0,sizeof(double)*NN*NN); - double DD[NN]; - double EE[NN]; - for (int i = 0; i< NN; i++) { - for (int j = i - 1; j <= i + 1; j++) { - if ( j < NN && j >= 0 ) { - if (i==j) DD[i] = lmd[i]; - if (i==j) evals_tmp[i] = lmd[i]; - if (j==(i-1)) EE[j] = lme[j]; - } - } - } - int evals_found; - int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; - int liwork = 3+NN*10 ; - int iwork[liwork]; - double work[lwork]; - int isuppz[2*NN]; - char jobz = 'V'; // calculate evals & evecs - char range = 'I'; // calculate all evals - // char range = 'A'; // calculate all evals - char uplo = 'U'; // refer to upper half of original matrix - char compz = 'I'; // Compute eigenvectors of tridiagonal matrix - int ifail[NN]; - int info; - int total = grid->_Nprocessors; - int node = grid->_processor; - int interval = (NN/total)+1; - double vl = 0.0, vu = 0.0; - int il = interval*node+1 , iu = interval*(node+1); - if (iu > NN) iu=NN; - double tol = 0.0; - if (1) { - memset(evals_tmp,0,sizeof(double)*NN); - if ( il <= NN){ - LAPACK_dstegr(&jobz, &range, &NN, - (double*)DD, (double*)EE, - &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' - &tol, // tolerance - &evals_found, evals_tmp, (double*)evec_tmp, &NN, - isuppz, - work, &lwork, iwork, &liwork, - &info); - for (int i = iu-1; i>= il-1; i--){ - evals_tmp[i] = evals_tmp[i - (il-1)]; - if (il>1) evals_tmp[i-(il-1)]=0.; - for (int j = 0; j< NN; j++){ - evec_tmp[i][j] = evec_tmp[i - (il-1)][j]; - if (il>1) evec_tmp[i-(il-1)][j]=0.; - } - } - } - { - grid->GlobalSumVector(evals_tmp,NN); - grid->GlobalSumVector((double*)evec_tmp,NN*NN); - } - } - // Safer to sort instead of just reversing it, - // but the document of the routine says evals are sorted in increasing order. - // qr gives evals in decreasing order. - for(int i=0;i<NN;i++){ - lmd [NN-1-i]=evals_tmp[i]; - for(int j=0;j<NN;j++){ - Qt((NN-1-i),j)=evec_tmp[i][j]; - } - } -#else - assert(0); -#endif -} - - void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme, - int Nk, int Nm, - Eigen::MatrixXd & Qt, - GridBase *grid) - { - int QRiter = 100*Nm; - int kmin = 1; - int kmax = Nk; - - // (this should be more sophisticated) - for(int iter=0; iter<QRiter; ++iter){ - - // determination of 2x2 leading submatrix - RealD dsub = lmd[kmax-1]-lmd[kmax-2]; - RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); - RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub))); - // (Dsh: shift) - - // transformation - QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm - - // Convergence criterion (redef of kmin and kamx) - for(int j=kmax-1; j>= kmin; --j){ - RealD dds = fabs(lmd[j-1])+fabs(lmd[j]); - if(fabs(lme[j-1])+dds > dds){ - kmax = j+1; - goto continued; - } - } - QRiter = iter; - return; - - continued: - for(int j=0; j<kmax-1; ++j){ - RealD dds = fabs(lmd[j])+fabs(lmd[j+1]); - if(fabs(lme[j])+dds > dds){ - kmin = j+1; - break; - } - } - } - std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n"; - abort(); - } - - }; -} - -#endif diff --git a/lib/log/Log.cc b/lib/log/Log.cc index 6c7d9459..bc46893f 100644 --- a/lib/log/Log.cc +++ b/lib/log/Log.cc @@ -50,7 +50,7 @@ namespace Grid { return (status==0) ? res.get() : name ; } -GridStopWatch Logger::StopWatch; +GridStopWatch Logger::GlobalStopWatch; int Logger::timestamp; std::ostream Logger::devnull(0); diff --git a/lib/log/Log.h b/lib/log/Log.h index 8db83266..1b4732ab 100644 --- a/lib/log/Log.h +++ b/lib/log/Log.h @@ -91,7 +91,9 @@ protected: std::string COLOUR; public: - static GridStopWatch StopWatch; + static GridStopWatch GlobalStopWatch; + GridStopWatch LocalStopWatch; + GridStopWatch *StopWatch; static std::ostream devnull; std::string background() {return Painter.colour["NORMAL"];} @@ -103,13 +105,25 @@ public: topName(topNm), Painter(col_class), timing_mode(0), - COLOUR(col) {} ; + COLOUR(col) + { + StopWatch = & GlobalStopWatch; + }; void Active(int on) {active = on;}; int isActive(void) {return active;}; static void Timestamp(int on) {timestamp = on;}; - void Reset(void) { StopWatch.Reset(); } - void TimingMode(int on) { timing_mode = on; if(on) Reset(); } + void Reset(void) { + StopWatch->Reset(); + StopWatch->Start(); + } + void TimingMode(int on) { + timing_mode = on; + if(on) { + StopWatch = &LocalStopWatch; + Reset(); + } + } friend std::ostream& operator<< (std::ostream& stream, Logger& log){ @@ -117,10 +131,10 @@ public: stream << log.background()<< std::left << log.topName << log.background()<< " : "; stream << log.colour() << std::left << log.name << log.background() << " : "; if ( log.timestamp ) { - StopWatch.Stop(); - GridTime now = StopWatch.Elapsed(); - if ( log.timing_mode==1 ) StopWatch.Reset(); - StopWatch.Start(); + log.StopWatch->Stop(); + GridTime now = log.StopWatch->Elapsed(); + if ( log.timing_mode==1 ) log.StopWatch->Reset(); + log.StopWatch->Start(); stream << log.evidence()<< now << log.background() << " : " ; } stream << log.colour(); diff --git a/lib/util/Init.cc b/lib/util/Init.cc index 1266d34d..031f8f5a 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -208,7 +208,7 @@ static int Grid_is_initialised = 0; void Grid_init(int *argc,char ***argv) { - GridLogger::StopWatch.Start(); + GridLogger::GlobalStopWatch.Start(); std::string arg; diff --git a/tests/lanczos/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc index 7fe37387..544d0358 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -21,7 +21,14 @@ (ortho krylov low poly); and then fix up lowest say 200 eigenvalues by 1 run with high-degree poly (600 could be enough) */ #include <Grid/Grid.h> -#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockImplicitlyRestartedLanczos.h> +#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> +///////////////////////////////////////////////////////////////////////////// +// The following are now decoupled from the Lanczos and deal with grids. +// Safe to replace functionality +///////////////////////////////////////////////////////////////////////////// +#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h> +#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h> +#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h> #include "FieldVectorIO.h" #include "Params.h" @@ -319,7 +326,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo Op2 = &Op2plain; } ProjectedHermOp<CoarseLatticeFermion<Nstop1>,LatticeFermion> Op2nopoly(pr,HermOp); - BlockImplicitlyRestartedLanczos<CoarseLatticeFermion<Nstop1> > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2); + ImplicitlyRestartedLanczos<CoarseLatticeFermion<Nstop1> > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2); src_coarse = 1.0; @@ -350,7 +357,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo ) { - IRL2.calc(eval2,coef,src_coarse,Nconv,true,SkipTest2); + IRL2.calc(eval2,coef._v,src_coarse,Nconv,true,SkipTest2); coef.resize(Nstop2); eval2.resize(Nstop2); @@ -641,7 +648,7 @@ int main (int argc, char ** argv) { } // First round of Lanczos to get low mode basis - BlockImplicitlyRestartedLanczos<LatticeFermion> IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1); + ImplicitlyRestartedLanczos<LatticeFermion> IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1); int Nconv; char tag[1024]; @@ -650,7 +657,7 @@ int main (int argc, char ** argv) { if (simple_krylov_basis) { quick_krylov_basis(evec,src,Op1,Nstop1); } else { - IRL1.calc(eval1,evec,src,Nconv,false,1); + IRL1.calc(eval1,evec._v,src,Nconv,false,1); } evec.resize(Nstop1); // and throw away superfluous eval1.resize(Nstop1); From e325929851aa0e26055875a22b39aee39ed186cd Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 13 Oct 2017 14:02:43 +0100 Subject: [PATCH 05/45] ALl codes compile against the new Lanczos call signature --- lib/algorithms/LinearOperator.h | 59 +++++++++++++++++++ lib/algorithms/approx/Chebyshev.h | 35 ----------- .../iterative/ImplicitlyRestartedLanczos.h | 6 +- tests/lanczos/Test_dwf_compressed_lanczos.cc | 30 +--------- tests/lanczos/Test_dwf_lanczos.cc | 11 ++-- tests/lanczos/Test_synthetic_lanczos.cc | 10 ++-- tests/lanczos/Test_wilson_lanczos.cc | 9 ++- 7 files changed, 82 insertions(+), 78 deletions(-) diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index f1b8820e..0d32cc15 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -346,6 +346,7 @@ namespace Grid { virtual void operator() (const Field &in, Field &out) = 0; }; + ///////////////////////////////////////////////////////////// // Base classes for Multishift solvers for operators ///////////////////////////////////////////////////////////// @@ -368,6 +369,64 @@ namespace Grid { }; */ + //////////////////////////////////////////////////////////////////////////////////////////// + // Hermitian operator Linear function and operator function + //////////////////////////////////////////////////////////////////////////////////////////// + template<class Field> + class HermOpOperatorFunction : public OperatorFunction<Field> { + void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { + Linop.HermOp(in,out); + }; + }; + + template<typename Field> + class PlainHermOp : public LinearFunction<Field> { + public: + LinearOperatorBase<Field> &_Linop; + + PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop) + {} + + void operator()(const Field& in, Field& out) { + _Linop.HermOp(in,out); + } + }; + + template<typename Field> + class FunctionHermOp : public LinearFunction<Field> { + public: + OperatorFunction<Field> & _poly; + LinearOperatorBase<Field> &_Linop; + + FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop) + : _poly(poly), _Linop(linop) {}; + + void operator()(const Field& in, Field& out) { + _poly(_Linop,in,out); + } + }; + + template<class Field> + class Polynomial : public OperatorFunction<Field> { + private: + std::vector<RealD> Coeffs; + public: + Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { }; + + // Implement the required interface + void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { + + Field AtoN(in._grid); + Field Mtmp(in._grid); + AtoN = in; + out = AtoN*Coeffs[0]; + for(int n=1;n<Coeffs.size();n++){ + Mtmp = AtoN; + Linop.HermOp(Mtmp,AtoN); + out=out+AtoN*Coeffs[n]; + } + }; + }; } diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index 5088c51b..7a6e9a9b 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -34,41 +34,6 @@ Author: Christoph Lehner <clehner@bnl.gov> namespace Grid { - //////////////////////////////////////////////////////////////////////////////////////////// - // Simple general polynomial with user supplied coefficients - //////////////////////////////////////////////////////////////////////////////////////////// - template<class Field> - class HermOpOperatorFunction : public OperatorFunction<Field> { - void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { - Linop.HermOp(in,out); - }; - }; - - template<class Field> - class Polynomial : public OperatorFunction<Field> { - private: - std::vector<RealD> Coeffs; - public: - Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { }; - - // Implement the required interface - void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { - - Field AtoN(in._grid); - Field Mtmp(in._grid); - AtoN = in; - out = AtoN*Coeffs[0]; -// std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl; -// std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl; - for(int n=1;n<Coeffs.size();n++){ - Mtmp = AtoN; - Linop.HermOp(Mtmp,AtoN); - out=out+AtoN*Coeffs[n]; -// std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl; -// std::cout << n<<" " <<norm2(out)<<std::endl; - } - }; - }; //////////////////////////////////////////////////////////////////////////////////////////// // Generic Chebyshev approximations diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index f32e4fa5..6d3e0755 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -186,9 +186,9 @@ public: int _Nk, // sought vecs int _Nm, // spare vecs RealD _eresid, // resid in lmdue deficit - RealD _betastp, // if beta(k) < betastp: converged int _MaxIter, // Max iterations - int _MinRestart, int _orth_period = 1, + RealD _betastp=0.0, // if beta(k) < betastp: converged + int _MinRestart=1, int _orth_period = 1, IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : _HermOp(HermOp), _HermOpTest(HermOpTest), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), @@ -232,7 +232,7 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse, int SkipTest) + void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=true, int SkipTest=0) { GridBase *grid = src._grid; assert(grid == evec[0]._grid); diff --git a/tests/lanczos/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc index 544d0358..10d6c3ae 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -100,19 +100,6 @@ void write_history(char* fn, std::vector<RealD>& hist) { fclose(f); } -template<typename Field> -class FunctionHermOp : public LinearFunction<Field> { -public: - OperatorFunction<Field> & _poly; - LinearOperatorBase<Field> &_Linop; - - FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop) : _poly(poly), _Linop(linop) { - } - - void operator()(const Field& in, Field& out) { - _poly(_Linop,in,out); - } -}; template<typename Field> class CheckpointedLinearFunction : public LinearFunction<Field> { @@ -268,19 +255,6 @@ public: } }; -template<typename Field> -class PlainHermOp : public LinearFunction<Field> { -public: - LinearOperatorBase<Field> &_Linop; - - PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop) { - } - - void operator()(const Field& in, Field& out) { - _Linop.HermOp(in,out); - } -}; - template<typename vtype, int N > using CoarseSiteFieldGeneral = iScalar< iVector<vtype, N> >; template<int N> using CoarseSiteFieldD = CoarseSiteFieldGeneral< vComplexD, N >; template<int N> using CoarseSiteFieldF = CoarseSiteFieldGeneral< vComplexF, N >; @@ -326,7 +300,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo Op2 = &Op2plain; } ProjectedHermOp<CoarseLatticeFermion<Nstop1>,LatticeFermion> Op2nopoly(pr,HermOp); - ImplicitlyRestartedLanczos<CoarseLatticeFermion<Nstop1> > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2); + ImplicitlyRestartedLanczos<CoarseLatticeFermion<Nstop1> > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,MaxIt,betastp2,MinRes2); src_coarse = 1.0; @@ -648,7 +622,7 @@ int main (int argc, char ** argv) { } // First round of Lanczos to get low mode basis - ImplicitlyRestartedLanczos<LatticeFermion> IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,betastp1,MaxIt,MinRes1); + ImplicitlyRestartedLanczos<LatticeFermion> IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,MaxIt,betastp1,MinRes1); int Nconv; char tag[1024]; diff --git a/tests/lanczos/Test_dwf_lanczos.cc b/tests/lanczos/Test_dwf_lanczos.cc index 1dd5dae3..b1e205cf 100644 --- a/tests/lanczos/Test_dwf_lanczos.cc +++ b/tests/lanczos/Test_dwf_lanczos.cc @@ -84,11 +84,12 @@ int main (int argc, char ** argv) std::vector<double> Coeffs { 0.,-1.}; Polynomial<FermionField> PolyX(Coeffs); - Chebyshev<FermionField> Cheb(0.2,5.,11); -// ChebyshevLanczos<LatticeFermion> Cheb(9.,1.,0.,20); -// Cheb.csv(std::cout); -// exit(-24); - ImplicitlyRestartedLanczos<FermionField> IRL(HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt); + Chebyshev<FermionField> Cheby(0.2,5.,11); + + FunctionHermOp<FermionField> OpCheby(Cheby,HermOp); + PlainHermOp<FermionField> Op (HermOp); + + ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); std::vector<RealD> eval(Nm); diff --git a/tests/lanczos/Test_synthetic_lanczos.cc b/tests/lanczos/Test_synthetic_lanczos.cc index 32fd6f32..4be9ca31 100644 --- a/tests/lanczos/Test_synthetic_lanczos.cc +++ b/tests/lanczos/Test_synthetic_lanczos.cc @@ -119,12 +119,13 @@ int main (int argc, char ** argv) RealD beta = 0.1; RealD mu = 0.0; int order = 11; - ChebyshevLanczos<LatticeComplex> Cheby(alpha,beta,mu,order); + Chebyshev<LatticeComplex> Cheby(alpha,beta,order); std::ofstream file("cheby.dat"); Cheby.csv(file); - HermOpOperatorFunction<LatticeComplex> X; DumbOperator<LatticeComplex> HermOp(grid); + FunctionHermOp<LatticeComplex> OpCheby(Cheby,HermOp); + PlainHermOp<LatticeComplex> Op(HermOp); const int Nk = 40; const int Nm = 80; @@ -133,8 +134,9 @@ int main (int argc, char ** argv) int Nconv; RealD eresid = 1.0e-6; - ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nk,Nm,eresid,Nit); - ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nk,Nm,eresid,Nit); + + ImplicitlyRestartedLanczos<LatticeComplex> IRL(Op,Op,Nk,Nk,Nm,eresid,Nit); + ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(OpCheby,Op,Nk,Nk,Nm,eresid,Nit); LatticeComplex src(grid); gaussian(RNG,src); { diff --git a/tests/lanczos/Test_wilson_lanczos.cc b/tests/lanczos/Test_wilson_lanczos.cc index e8549234..eabc86d7 100644 --- a/tests/lanczos/Test_wilson_lanczos.cc +++ b/tests/lanczos/Test_wilson_lanczos.cc @@ -86,9 +86,12 @@ int main(int argc, char** argv) { std::vector<double> Coeffs{0, 1.}; Polynomial<FermionField> PolyX(Coeffs); - Chebyshev<FermionField> Cheb(0.0, 10., 12); - ImplicitlyRestartedLanczos<FermionField> IRL(HermOp, PolyX, Nstop, Nk, Nm, - resid, MaxIt); + Chebyshev<FermionField> Cheby(0.0, 10., 12); + + FunctionHermOp<FermionField> OpCheby(Cheby,HermOp); + PlainHermOp<FermionField> Op (HermOp); + + ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt); std::vector<RealD> eval(Nm); FermionField src(FGrid); From 28ba8a0f481f0451b5dc22691fe0ad35963af55a Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:45:57 +0100 Subject: [PATCH 06/45] Force spacing more nicely --- lib/log/Log.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/log/Log.h b/lib/log/Log.h index 1b4732ab..ddff4c1d 100644 --- a/lib/log/Log.h +++ b/lib/log/Log.h @@ -135,7 +135,7 @@ public: GridTime now = log.StopWatch->Elapsed(); if ( log.timing_mode==1 ) log.StopWatch->Reset(); log.StopWatch->Start(); - stream << log.evidence()<< now << log.background() << " : " ; + stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ; } stream << log.colour(); return stream; From 303e0b927d20b0fd8c91d548f2642ee5b0a06d84 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:46:33 +0100 Subject: [PATCH 07/45] Improvements for coarse grid compressed lanczos --- lib/algorithms/CoarsenedMatrix.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index c2910151..8af8d7ac 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -103,29 +103,32 @@ namespace Grid { GridBase *CoarseGrid; GridBase *FineGrid; std::vector<Lattice<Fobj> > subspace; + int checkerboard; - Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid) : - CoarseGrid(_CoarseGrid), + Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) : + CoarseGrid(_CoarseGrid), FineGrid(_FineGrid), - subspace(nbasis,_FineGrid) + subspace(nbasis,_FineGrid), + checkerboard(_checkerboard) { }; void Orthogonalise(void){ CoarseScalar InnerProd(CoarseGrid); + std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl; blockOrthogonalise(InnerProd,subspace); + std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl; + blockOrthogonalise(InnerProd,subspace); + // std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl; + // CheckOrthogonal(); } void CheckOrthogonal(void){ CoarseVector iProj(CoarseGrid); CoarseVector eProj(CoarseGrid); - Lattice<CComplex> pokey(CoarseGrid); - - for(int i=0;i<nbasis;i++){ blockProject(iProj,subspace[i],subspace); - eProj=zero; - for(int ss=0;ss<CoarseGrid->oSites();ss++){ + parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){ eProj._odata[ss](i)=CComplex(1.0); } eProj=eProj - iProj; @@ -137,6 +140,7 @@ namespace Grid { blockProject(CoarseVec,FineVec,subspace); } void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){ + FineVec.checkerboard = subspace[0].checkerboard; blockPromote(CoarseVec,FineVec,subspace); } void CreateSubspaceRandom(GridParallelRNG &RNG){ @@ -147,6 +151,7 @@ namespace Grid { Orthogonalise(); } + /* virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { // Run a Lanczos with sloppy convergence @@ -195,7 +200,7 @@ namespace Grid { std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl; } } - + */ virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { RealD scale; From d83868fdbbc6a3e9f67c966a190d517a2fb7f9f7 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:47:10 +0100 Subject: [PATCH 08/45] Identity linear op added -- useful in circumstances where a linear op may or may not be needed. Supply a trivial one if not needed --- lib/algorithms/LinearOperator.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h index 0d32cc15..2a68a7b9 100644 --- a/lib/algorithms/LinearOperator.h +++ b/lib/algorithms/LinearOperator.h @@ -346,6 +346,13 @@ namespace Grid { virtual void operator() (const Field &in, Field &out) = 0; }; + template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { + public: + void operator() (const Field &in, Field &out){ + out = in; + }; + }; + ///////////////////////////////////////////////////////////// // Base classes for Multishift solvers for operators From f6c3f6bf2d6ff210e25844b64f0d09fe5d074212 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:47:59 +0100 Subject: [PATCH 09/45] XML serialisation of parms and initialise from parms object --- lib/algorithms/approx/Chebyshev.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index 7a6e9a9b..b34fac7f 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -34,6 +34,12 @@ Author: Christoph Lehner <clehner@bnl.gov> namespace Grid { +struct ChebyParams : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams, + RealD, alpha, + RealD, beta, + int, Npoly); +}; //////////////////////////////////////////////////////////////////////////////////////////// // Generic Chebyshev approximations @@ -67,6 +73,7 @@ namespace Grid { }; Chebyshev(){}; + Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);}; Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);}; Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);}; From a479325349d5eed9351abe5adf267311d8b6d34c Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:48:47 +0100 Subject: [PATCH 10/45] Rewrite of local coherence lanczos --- .../Test_dwf_compressed_lanczos_reorg.cc | 518 ++++++++++++++++++ 1 file changed, 518 insertions(+) create mode 100644 tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc new file mode 100644 index 00000000..a0691116 --- /dev/null +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -0,0 +1,518 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc + + Copyright (C) 2017 + +Author: Leans heavily on Christoph Lehner's code +Author: Peter Boyle <paboyle@ph.ed.ac.uk> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include <Grid/Grid.h> +#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +struct LanczosParams : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, + ChebyParams, Cheby,/*Chebyshev*/ + int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/ + int, Nk, /*Vecs in Lanczos seek converge*/ + int, Nm, /*Total vecs in Lanczos include restart*/ + RealD, resid, /*residual*/ + int, MaxIt, + RealD, betastp, /* ? */ + int, MinRes); // Must restart +}; + +struct CompressedLanczosParams : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(CompressedLanczosParams, + LanczosParams, FineParams, + LanczosParams, CoarseParams, + ChebyParams, Smoother, + std::vector<int>, blockSize, + std::string, config, + std::vector < std::complex<double> >, omega, + RealD, mass, + RealD, M5 + ); +}; + +// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function +template<class Fobj,class CComplex,int nbasis> +class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { +public: + typedef iVector<CComplex,nbasis > CoarseSiteVector; + typedef Lattice<CoarseSiteVector> CoarseField; + typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field + typedef Lattice<Fobj> FineField; + + LinearOperatorBase<FineField> &_Linop; + Aggregation<Fobj,CComplex,nbasis> &_Aggregate; + + ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) : + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + FineField fin(FineGrid); + FineField fout(FineGrid); + + _Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl; + _Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl; + _Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl; + } +}; + +template<class Fobj,class CComplex,int nbasis> +class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { +public: + typedef iVector<CComplex,nbasis > CoarseSiteVector; + typedef Lattice<CoarseSiteVector> CoarseField; + typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field + typedef Lattice<Fobj> FineField; + + + OperatorFunction<FineField> & _poly; + LinearOperatorBase<FineField> &_Linop; + Aggregation<Fobj,CComplex,nbasis> &_Aggregate; + + ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop, + Aggregation<Fobj,CComplex,nbasis> &aggregate) : + _poly(poly), + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + + FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; + FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; + + _Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl; + _poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl; + _Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl; + } +}; + +template<class Fobj,class CComplex,int nbasis> +class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > > +{ + public: + typedef iVector<CComplex,nbasis > CoarseSiteVector; + typedef Lattice<CoarseSiteVector> CoarseField; + typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field + typedef Lattice<Fobj> FineField; + + LinearFunction<CoarseField> & _Poly; + OperatorFunction<FineField> & _smoother; + LinearOperatorBase<FineField> &_Linop; + Aggregation<Fobj,CComplex,nbasis> &_Aggregate; + + ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly, + OperatorFunction<FineField> &smoother, + LinearOperatorBase<FineField> &Linop, + Aggregation<Fobj,CComplex,nbasis> &Aggregate) + : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly) { }; + + int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) + { + CoarseField v(B); + RealD eval_poly = eval; + // Apply operator + _Poly(B,v); + + RealD vnum = real(innerProduct(B,v)); // HermOp. + RealD vden = norm2(B); + RealD vv0 = norm2(v); + eval = vnum/vden; + v -= eval*B; + + RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); + + std::cout.precision(13); + std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] " + <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" + <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv + <<std::endl; + + int conv=0; + if( (vv<eresid*eresid) ) conv = 1; + return conv; + } + int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) + { + GridBase *FineGrid = _Aggregate.FineGrid; + + int checkerboard = _Aggregate.checkerboard; + + FineField fB(FineGrid);fB.checkerboard =checkerboard; + FineField fv(FineGrid);fv.checkerboard =checkerboard; + + _Aggregate.PromoteFromSubspace(B,fv); + _smoother(_Linop,fv,fB); + + RealD eval_poly = eval; + _Linop.HermOp(fB,fv); + + RealD vnum = real(innerProduct(fB,fv)); // HermOp. + RealD vden = norm2(fB); + RealD vv0 = norm2(fv); + eval = vnum/vden; + fv -= eval*fB; + RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0); + + std::cout.precision(13); + std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] " + <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" + <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv + <<std::endl; + + if( (vv<eresid*eresid) ) return 1; + return 0; + } +}; + + +//////////////////////////////////////////// +// Make serializable Lanczos params +//////////////////////////////////////////// +template<class Fobj,class CComplex,int nbasis> +class CoarseFineIRL +{ +public: + typedef iVector<CComplex,nbasis > CoarseSiteVector; + typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field + typedef Lattice<CoarseSiteVector> CoarseField; + typedef Lattice<Fobj> FineField; + +private: + GridBase *_CoarseGrid; + GridBase *_FineGrid; + int _checkerboard; + LinearOperatorBase<FineField> & _FineOp; + + // FIXME replace Aggregation with vector of fine; the code reuse is too small for + // the hassle and complexity of cross coupling. + Aggregation<Fobj,CComplex,nbasis> _Aggregate; + std::vector<RealD> evals_fine; + std::vector<RealD> evals_coarse; + std::vector<CoarseField> evec_coarse; +public: + CoarseFineIRL(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase<FineField> &FineOp, + int checkerboard) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _Aggregate(CoarseGrid,FineGrid,checkerboard), + _FineOp(FineOp), + _checkerboard(checkerboard) + { + evals_fine.resize(0); + evals_coarse.resize(0); + }; + void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } + + template<typename T> static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = ::sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void testFine(void) + { + int Nk = nbasis; + _Aggregate.subspace.resize(Nk,_FineGrid); + _Aggregate.subspace[0]=1.0; + _Aggregate.subspace[0].checkerboard=_checkerboard; + normalise(_Aggregate.subspace[0]); + PlainHermOp<FineField> Op(_FineOp); + for(int k=1;k<Nk;k++){ + _Aggregate.subspace[k].checkerboard=_checkerboard; + Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]); + normalise(_Aggregate.subspace[k]); + } + } + + + void checkpointFine(std::string evecs_file,std::string evals_file) + { + assert(_Aggregate.subspace.size()==nbasis); + emptyUserRecord record; + { + ScidacWriter WR; + WR.open(evecs_file); + for(int k=0;k<nbasis;k++) { + WR.writeScidacFieldRecord(_Aggregate.subspace[k],record); + } + WR.close(); + } + { + XmlWriter WR(evals_file); + write(WR,"evals",evals_fine); + } + } + void checkpointCoarse(std::string evecs_file,std::string evals_file) + { + int n = evec_coarse.size(); + emptyUserRecord record; + { + ScidacWriter WR; + WR.open(evecs_file); + for(int k=0;k<n;k++) { + WR.writeScidacFieldRecord(evec_coarse[k],record); + } + WR.close(); + } + { + XmlWriter WR(evals_file); + write(WR,"evals",evals_coarse); + } + } + + void checkpointFineRestore(std::string evecs_file,std::string evals_file) + { + { + XmlReader RD(evals_file); + read(RD,"evals",evals_fine); + } + assert(evals_fine.size()==nbasis); + + emptyUserRecord record; + { + ScidacReader RD ; + RD.open(evecs_file); + for(int k=0;k<nbasis;k++) { + RD.readScidacFieldRecord(_Aggregate.subspace[k],record); + } + RD.close(); + } + } + + void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid, + RealD MaxIt, RealD betastp, int MinRes) + { + assert(nbasis<=Nm); + Chebyshev<FineField> Cheby(cheby_parms); + FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp); + PlainHermOp<FineField> Op(_FineOp); + + evals_fine.resize(Nm); + _Aggregate.subspace.resize(Nm,_FineGrid); + + ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); + + FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; + + int Nconv; + IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false,0); + + // Shrink down to number saved + assert(Nstop>=nbasis); + assert(Nconv>=nbasis); + evals_fine.resize(nbasis); + _Aggregate.subspace.resize(nbasis,_FineGrid); + } + void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth, + int Nstop, int Nk, int Nm,RealD resid, + RealD MaxIt, RealD betastp, int MinRes) + { + Chebyshev<FineField> Cheby(cheby_op); + ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate); + ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate); + ////////////////////////////////////////////////////////////////////////////////////////////////// + // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL + ////////////////////////////////////////////////////////////////////////////////////////////////// + + Chebyshev<FineField> ChebySmooth(cheby_smooth); + ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate); + + + evals_coarse.resize(Nm); + evec_coarse.resize(Nm,_CoarseGrid); + + CoarseField src(_CoarseGrid); src=1.0; + + ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); + int Nconv=0; + IRL.calc(evals_coarse,evec_coarse,src,Nconv,false,1); + assert(Nconv>=Nstop); + + for (int i=0;i<Nstop;i++){ + std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl; + } + // We got the evalues of the Cheby operator; + // Reconstruct eigenvalues of original operator via Chebyshev inverse + for (int i=0;i<Nstop;i++){ + + RealD eval_guess; + if (i==0) eval_guess = 0; + else eval_guess = evals_coarse[i-1]; + + RealD eval_poly = evals_coarse[i]; + RealD eval_op = Cheby.approxInv(eval_poly,eval_guess,100,1e-10); + std::cout << i << " Reconstructed eval = " << eval_op << " from guess " <<eval_guess<< " Cheby poly " << eval_poly << std::endl; + evals_coarse[i] = eval_op; + } + + } +}; + + +int main (int argc, char ** argv) { + + Grid_init(&argc,&argv); + GridLogIRL.TimingMode(1); + + CompressedLanczosParams Params; + { + Params.omega.resize(10); + Params.blockSize.resize(5); + XmlWriter writer("Params_template.xml"); + write(writer,"Params",Params); + std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl; + } + + { + XmlReader reader("./Params.xml"); + read(reader, "Params", Params); + } + + int Ls = (int)Params.omega.size(); + RealD mass = Params.mass; + RealD M5 = Params.M5; + std::vector<int> blockSize = Params.blockSize; + + // Grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector<int> fineLatt = GridDefaultLatt(); + int dims=fineLatt.size(); + assert(blockSize.size()==dims+1); + std::vector<int> coarseLatt(dims); + std::vector<int> coarseLatt5d ; + + for (int d=0;d<coarseLatt.size();d++){ + coarseLatt[d] = fineLatt[d]/blockSize[d]; assert(coarseLatt[d]*blockSize[d]==fineLatt[d]); + } + + std::cout << GridLogMessage<< " 5d coarse lattice is "; + for (int i=0;i<coarseLatt.size();i++){ + std::cout << coarseLatt[i]<<"x"; + } + int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls); + std::cout << cLs<<std::endl; + + GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4); + GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4); + GridRedBlackCartesian * CoarseGrid5rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid5); + + // Gauge field + LatticeGaugeField Umu(UGrid); + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,Params.config); + std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; + + // ZMobius EO Operator + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf); + + // Eigenvector storage + LanczosParams fine =Params.FineParams; + LanczosParams coarse=Params.CoarseParams; + + const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop; + const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk; + const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm; + + std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl; + assert(Nm2 >= Nm1); + + const int nbasis= 60; + assert(nbasis==Ns1); + CoarseFineIRL<vSpinColourVector,vTComplex,nbasis> IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd); + std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl; + + int do_fine = 1; + int do_coarse = 0; + int do_smooth = 0; + if ( do_fine ) { + std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl; + IRL.calcFine(fine.Cheby, + fine.Nstop,fine.Nk,fine.Nm, + fine.resid,fine.MaxIt, + fine.betastp,fine.MinRes); + + std::cout << GridLogIRL<<"checkpointing"<<std::endl; + IRL.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml")); + std::cout << GridLogIRL<<"checkpoint written"<<std::endl; + } else { + // IRL.testFine(); + IRL.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml")); + } + + std::cout << GridLogMessage << "Orthogonalising " << nbasis<<" Nm "<<Nm2<< std::endl; + IRL.Orthogonalise(); + + std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl; + IRL.calcCoarse(coarse.Cheby,Params.Smoother, + coarse.Nstop, coarse.Nk,coarse.Nm, + coarse.resid, coarse.MaxIt, + coarse.betastp,coarse.MinRes); + + IRL.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml")); + + // IRL.smoothedCoarseEigenvalues(); + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Questions pending + // -- i) Mixed Precision sensitivity discussion. + // -- ii) Stopping condition and checks on the convergence of all evecs; ordering + // -- iii) Total matmul count compared to no compression. + // -- iv) Log tree walk back from maximal mode + // -- v) betastp? + // -- vi) eval2, eval2_copy annoying + // -- vii) Smoothing and checking. + // -- viii) Different poly in convergence check vs. IRL restart+ logging of which have converged; locking, assume no deconverge? + // -- xi) CG 10 iters inverse iteration 1 pass. vs. Chebyshev. vs. Result *after* convergence declaration for each, apply H. + // i.e. coarse2fine + /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + Grid_finalize(); +} + From b8654be0efb33979dff627893b10dc872627a910 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:49:23 +0100 Subject: [PATCH 11/45] 64 bit safe offsets --- lib/parallelIO/BinaryIO.h | 94 +++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 43 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index d14f3fe2..a2abc9be 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -261,7 +261,7 @@ class BinaryIO { GridBase *grid, std::vector<fobj> &iodata, std::string file, - int offset, + Integer offset, const std::string &format, int control, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -367,7 +367,7 @@ class BinaryIO { assert(0); #endif } else { - std::cout << GridLogMessage << "C++ read I/O " << file << " : " + std::cout << GridLogMessage << "C++ read I/O " << file << " : " << iodata.size() * sizeof(fobj) << " bytes" << std::endl; std::ifstream fin; fin.open(file, std::ios::binary | std::ios::in); @@ -444,48 +444,56 @@ class BinaryIO { assert(0); #endif } else { + + std::cout << GridLogMessage << "C++ write I/O " << file << " : " + << iodata.size() * sizeof(fobj) << " bytes" << std::endl; std::ofstream fout; - fout.exceptions ( std::fstream::failbit | std::fstream::badbit ); - try { - fout.open(file,std::ios::binary|std::ios::out|std::ios::in); - } catch (const std::fstream::failure& exc) { - std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl; - std::cout << GridLogError << "Exception description: " << exc.what() << std::endl; - std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl; - #ifdef USE_MPI_IO - MPI_Abort(MPI_COMM_WORLD,1); - #else - exit(1); - #endif - } - std::cout << GridLogMessage<< "C++ write I/O "<< file<<" : " - << iodata.size()*sizeof(fobj)<<" bytes"<<std::endl; - - if ( control & BINARYIO_MASTER_APPEND ) { - fout.seekp(0,fout.end); - } else { - fout.seekp(offset+myrank*lsites*sizeof(fobj)); + fout.exceptions ( std::fstream::failbit | std::fstream::badbit ); + try { + fout.open(file,std::ios::binary|std::ios::out|std::ios::in); + } catch (const std::fstream::failure& exc) { + std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl; + std::cout << GridLogError << "Exception description: " << exc.what() << std::endl; + std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl; +#ifdef USE_MPI_IO + MPI_Abort(MPI_COMM_WORLD,1); +#else + exit(1); +#endif + } + + if ( control & BINARYIO_MASTER_APPEND ) { + try { + fout.seekp(0,fout.end); + } catch (const std::fstream::failure& exc) { + std::cout << "Exception in seeking file end " << file << std::endl; + } + } else { + try { + fout.seekp(offset+myrank*lsites*sizeof(fobj)); + } catch (const std::fstream::failure& exc) { + std::cout << "Exception in seeking file " << file <<" offset "<< offset << std::endl; + } } - - try { - fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0); - } - catch (const std::fstream::failure& exc) { - std::cout << "Exception in writing file " << file << std::endl; - std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl; - #ifdef USE_MPI_IO - MPI_Abort(MPI_COMM_WORLD,1); - #else - exit(1); - #endif - } + try { + fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0); + } + catch (const std::fstream::failure& exc) { + std::cout << "Exception in writing file " << file << std::endl; + std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl; +#ifdef USE_MPI_IO + MPI_Abort(MPI_COMM_WORLD,1); +#else + exit(1); +#endif + } fout.close(); - } - timer.Stop(); - } - + } + timer.Stop(); + } + std::cout<<GridLogMessage<<"IOobject: "; if ( control & BINARYIO_READ) std::cout << " read "; else std::cout << " write "; @@ -515,7 +523,7 @@ class BinaryIO { static inline void readLatticeObject(Lattice<vobj> &Umu, std::string file, munger munge, - int offset, + Integer offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -552,7 +560,7 @@ class BinaryIO { static inline void writeLatticeObject(Lattice<vobj> &Umu, std::string file, munger munge, - int offset, + Integer offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, @@ -589,7 +597,7 @@ class BinaryIO { static inline void readRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - int offset, + Integer offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) @@ -651,7 +659,7 @@ class BinaryIO { static inline void writeRNG(GridSerialRNG &serial, GridParallelRNG ¶llel, std::string file, - int offset, + Integer offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb) From 66295b99aada692f68c6547ce7d435e8d7df9e66 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:50:05 +0100 Subject: [PATCH 12/45] Bit less verbose SciDAC IO --- lib/parallelIO/IldgIO.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index ba71153d..1f2b7c90 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -147,7 +147,7 @@ namespace QCD { _scidacRecord = sr; - std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl; + // std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl; } /////////////////////////////////////////////////////// @@ -349,7 +349,6 @@ class GridLimeWriter : public BinaryIO { uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; createLimeRecordHeader(record_name, 0, 0, PayloadSize); - // std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl; // std::cout << "W Gsites " <<field._grid->_gsites<<std::endl; // std::cout << "W Payload expected " <<PayloadSize<<std::endl; @@ -382,7 +381,7 @@ class GridLimeWriter : public BinaryIO { std::stringstream streamb; streamb << std::hex << scidac_csumb; checksum.suma= streama.str(); checksum.sumb= streamb.str(); - std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; + // std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM)); } }; @@ -642,7 +641,7 @@ class IldgReader : public GridLimeReader { // Copy out the string std::vector<char> xmlc(nbytes+1,'\0'); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); - std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl; + // std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl; ////////////////////////////////// // ILDG format record @@ -686,7 +685,7 @@ class IldgReader : public GridLimeReader { std::string xmls(&xmlc[0]); // is it a USQCD info field if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) { - std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl; + // std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl; XmlReader RD(&xmlc[0],""); read(RD,"usqcdInfo",usqcdInfo_); found_usqcdInfo = 1; From b395a312afa64798eb46a0751e21c45094b050f9 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:50:37 +0100 Subject: [PATCH 13/45] Better error messaging --- lib/serialisation/XmlIO.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index a132a2f0..c0c45adc 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -68,10 +68,10 @@ std::string XmlWriter::XmlString(void) XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("") { pugi::xml_parse_result result; - result = doc_.load_string(xmlstring); + result = doc_.load_file(xmlstring); if ( !result ) { - cerr << "XML error description: " << result.description() << "\n"; - cerr << "XML error offset : " << result.offset << "\n"; + cerr << "XML error description: char * " << result.description() << " "<< xmlstring << "\n"; + cerr << "XML error offset : char * " << result.offset << " "<<xmlstring <<"\n"; abort(); } if ( toplev == std::string("") ) { @@ -87,8 +87,8 @@ XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName) pugi::xml_parse_result result; result = doc_.load_file(fileName_.c_str()); if ( !result ) { - cerr << "XML error description: " << result.description() << "\n"; - cerr << "XML error offset : " << result.offset << "\n"; + cerr << "XML error description: " << result.description() <<" "<< fileName_ <<"\n"; + cerr << "XML error offset : " << result.offset <<" "<< fileName_ <<"\n"; abort(); } if ( toplev == std::string("") ) { From 08583afaff9d87bcc5e10f2a13b32a4de64a4b12 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:51:18 +0100 Subject: [PATCH 14/45] Red black friendly coarsening --- lib/lattice/Lattice_transfer.h | 54 ++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 713a8788..48688e43 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -109,8 +109,8 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData, coarseData=zero; - // Loop with a cache friendly loop ordering - for(int sf=0;sf<fine->oSites();sf++){ + // Loop over coars parallel, and then loop over fine associated with coarse. + parallel_for(int sf=0;sf<fine->oSites();sf++){ int sc; std::vector<int> coor_c(_ndimension); @@ -119,8 +119,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData, for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); +PARALLEL_CRITICAL for(int i=0;i<nbasis;i++) { - + coarseData._odata[sc](i)=coarseData._odata[sc](i) + innerProduct(Basis[i]._odata[sf],fineData._odata[sf]); @@ -139,6 +140,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ, GridBase * coarse= coarseA._grid; fineZ.checkerboard=fineX.checkerboard; + assert(fineX.checkerboard==fineY.checkerboard); subdivides(coarse,fine); // require they map conformable(fineX,fineY); conformable(fineX,fineZ); @@ -180,9 +182,10 @@ template<class vobj,class CComplex> GridBase *coarse(CoarseInner._grid); GridBase *fine (fineX._grid); - Lattice<dotp> fine_inner(fine); + Lattice<dotp> fine_inner(fine); fine_inner.checkerboard = fineX.checkerboard; Lattice<dotp> coarse_inner(coarse); + // Precision promotion? fine_inner = localInnerProduct(fineX,fineY); blockSum(coarse_inner,fine_inner); parallel_for(int ss=0;ss<coarse->oSites();ss++){ @@ -193,7 +196,7 @@ template<class vobj,class CComplex> inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX) { GridBase *coarse = ip._grid; - Lattice<vobj> zz(fineX._grid); zz=zero; + Lattice<vobj> zz(fineX._grid); zz=zero; zz.checkerboard=fineX.checkerboard; blockInnerProduct(ip,fineX,fineX); ip = pow(ip,-0.5); blockZAXPY(fineX,ip,fineX,zz); @@ -216,19 +219,25 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; } + // Turn this around to loop threaded over sc and interior loop + // over sf would thread better coarseData=zero; - for(int sf=0;sf<fine->oSites();sf++){ - + parallel_region { + int sc; std::vector<int> coor_c(_ndimension); std::vector<int> coor_f(_ndimension); - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - - coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; + parallel_for_internal(int sf=0;sf<fine->oSites();sf++){ + + Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + +PARALLEL_CRITICAL + coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; + } } return; } @@ -238,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob { GridBase * fine = unpicked._grid; - Lattice<vobj> zz(fine); + Lattice<vobj> zz(fine); zz.checkerboard = unpicked.checkerboard; Lattice<iScalar<vInteger> > fcoor(fine); zz = zero; @@ -303,20 +312,21 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData, } // Loop with a cache friendly loop ordering - for(int sf=0;sf<fine->oSites();sf++){ - + parallel_region { int sc; std::vector<int> coor_c(_ndimension); std::vector<int> coor_f(_ndimension); - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - - for(int i=0;i<nbasis;i++) { - if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf]; - else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf]; + parallel_for_internal(int sf=0;sf<fine->oSites();sf++){ + Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + + for(int i=0;i<nbasis;i++) { + if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf]; + else fineData._odata[sf]=fineData._odata[sf]+coarseData._odata[sc](i)*Basis[i]._odata[sf]; + } } } return; From 3d63b4894e10c59b64c4ddc97c0ea35ffebb6d9d Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:52:47 +0100 Subject: [PATCH 15/45] Use existing functionality where possible --- tests/lanczos/FieldBasisVector.h | 81 ++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 tests/lanczos/FieldBasisVector.h diff --git a/tests/lanczos/FieldBasisVector.h b/tests/lanczos/FieldBasisVector.h new file mode 100644 index 00000000..9a21aa46 --- /dev/null +++ b/tests/lanczos/FieldBasisVector.h @@ -0,0 +1,81 @@ +namespace Grid { + +template<class Field> +class BasisFieldVector { + public: + int _Nm; + + typedef typename Field::scalar_type Coeff_t; + typedef typename Field::vector_type vCoeff_t; + typedef typename Field::vector_object vobj; + typedef typename vobj::scalar_object sobj; + + std::vector<Field> _v; // _Nfull vectors + + void report(int n,GridBase* value) { + + std::cout << GridLogMessage << "BasisFieldVector allocated:\n"; + std::cout << GridLogMessage << " Delta N = " << n << "\n"; + std::cout << GridLogMessage << " Size of full vectors (size) = " << + ((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n"; + std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl; + + value->Barrier(); + +#ifdef __linux + if (value->IsBoss()) { + system("cat /proc/meminfo"); + } +#endif + + value->Barrier(); + + } + + BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) { + report(Nm,value); + } + + ~BasisFieldVector() { + } + + Field& operator[](int i) { + return _v[i]; + } + + void orthogonalize(Field& w, int k) { + basisOrthogonalize(_v,w,k); + } + + void rotate(Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { + basisRotate(_v,Qt,j0,j1,k0,k1,Nm); + } + + size_t size() const { + return _Nm; + } + + void resize(int n) { + if (n > _Nm) + _v.reserve(n); + + _v.resize(n,_v[0]._grid); + + if (n < _Nm) + _v.shrink_to_fit(); + + report(n - _Nm,_v[0]._grid); + + _Nm = n; + } + + void sortInPlace(std::vector<RealD>& sort_vals, bool reverse) { + basisSortInPlace(_v,sort_vals,reverse); + } + + void deflate(const std::vector<RealD>& eval,const Field& src_orig,Field& result) { + basisDeflate(_v,eval,src_orig,result); + } + + }; +} From e4d461cb03ee3b039345c3c4ec29704dec5c8d94 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:53:19 +0100 Subject: [PATCH 16/45] Messagign --- tests/lanczos/Test_dwf_compressed_lanczos.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/lanczos/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc index 10d6c3ae..a6eb95e9 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -26,9 +26,9 @@ // The following are now decoupled from the Lanczos and deal with grids. // Safe to replace functionality ///////////////////////////////////////////////////////////////////////////// -#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h> -#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h> -#include <Grid/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h> +#include "BlockedGrid.h" +#include "FieldBasisVector.h" +#include "BlockProjector.h" #include "FieldVectorIO.h" #include "Params.h" @@ -431,6 +431,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo auto result = src_orig; // undeflated solve + std::cout << GridLogMessage << " Undeflated solve "<<std::endl; result = zero; CG(HermOp, src_orig, result); // if (UCoarseGrid->IsBoss()) @@ -438,6 +439,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo // CG.ResHistory.clear(); // deflated solve with all eigenvectors + std::cout << GridLogMessage << " Deflated solve with all evectors"<<std::endl; result = zero; pr.deflate(coef,eval2,Nstop2,src_orig,result); CG(HermOp, src_orig, result); @@ -446,6 +448,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo // CG.ResHistory.clear(); // deflated solve with non-blocked eigenvectors + std::cout << GridLogMessage << " Deflated solve with non-blocked evectors"<<std::endl; result = zero; pr.deflate(coef,eval1,Nstop1,src_orig,result); CG(HermOp, src_orig, result); @@ -454,6 +457,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo // CG.ResHistory.clear(); // deflated solve with all eigenvectors and original eigenvalues from proj + std::cout << GridLogMessage << " Deflated solve with all eigenvectors and original eigenvalues from proj"<<std::endl; result = zero; pr.deflate(coef,eval3,Nstop2,src_orig,result); CG(HermOp, src_orig, result); From f4336e480a20a6c2935d5762c1e45b4383ee7232 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:53:44 +0100 Subject: [PATCH 17/45] Faster converge time --- tests/solver/Test_dwf_mrhs_cg.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index d9215db2..079fa85a 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -190,7 +190,7 @@ int main (int argc, char ** argv) MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); - ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000); + ConjugateGradient<FermionField> CG((1.0e-5/(me+1)),10000); s_res = zero; CG(HermOp,s_src,s_res); From d577211cc376303d88355df5bb101ff8aaf6f9ab Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Wed, 25 Oct 2017 23:57:54 +0100 Subject: [PATCH 18/45] Relax stoppign condition --- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index 90969b85..fbc6dd32 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -113,7 +113,7 @@ int main (int argc, char ** argv) MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); - ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000); + ConjugateGradient<FermionField> CG((1.0e-5/(me+1)),10000); s_res = zero; CG(HermOp,s_src,s_res); From e9be293444039051630aca103ae861b51cf242a5 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 01:59:30 +0100 Subject: [PATCH 19/45] Better messaging --- lib/parallelIO/BinaryIO.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index a2abc9be..b40a75af 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -356,7 +356,7 @@ class BinaryIO { if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { #ifdef USE_MPI_IO - std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl; + std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl; ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0); ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); @@ -367,7 +367,7 @@ class BinaryIO { assert(0); #endif } else { - std::cout << GridLogMessage << "C++ read I/O " << file << " : " + std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : " << iodata.size() * sizeof(fobj) << " bytes" << std::endl; std::ifstream fin; fin.open(file, std::ios::binary | std::ios::in); @@ -413,9 +413,9 @@ class BinaryIO { timer.Start(); if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { #ifdef USE_MPI_IO - std::cout << GridLogMessage << "MPI write I/O " << file << std::endl; + std::cout << GridLogMessage <<"IOobject: MPI write I/O " << file << std::endl; ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); - std::cout << GridLogMessage << "Checking for errors" << std::endl; + // std::cout << GridLogMessage << "Checking for errors" << std::endl; if (ierr != MPI_SUCCESS) { char error_string[BUFSIZ]; @@ -445,7 +445,7 @@ class BinaryIO { #endif } else { - std::cout << GridLogMessage << "C++ write I/O " << file << " : " + std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : " << iodata.size() * sizeof(fobj) << " bytes" << std::endl; std::ofstream fout; From ccd20df8276fa1951f7d6489bce95c3a65de57eb Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 01:59:59 +0100 Subject: [PATCH 20/45] Better IRL interface --- tests/lanczos/BlockProjector.h | 143 +++++++ tests/lanczos/BlockedGrid.h | 401 ++++++++++++++++++ tests/lanczos/Test_dwf_compressed_lanczos.cc | 4 +- .../Test_dwf_compressed_lanczos_reorg.cc | 21 +- 4 files changed, 557 insertions(+), 12 deletions(-) create mode 100644 tests/lanczos/BlockProjector.h create mode 100644 tests/lanczos/BlockedGrid.h diff --git a/tests/lanczos/BlockProjector.h b/tests/lanczos/BlockProjector.h new file mode 100644 index 00000000..6becaa66 --- /dev/null +++ b/tests/lanczos/BlockProjector.h @@ -0,0 +1,143 @@ +namespace Grid { + +/* + BlockProjector + + If _HP_BLOCK_PROJECTORS_ is defined, we assume that _evec is a basis that is not + fully orthonormalized (to the precision of the coarse field) and we allow for higher-precision + coarse field than basis field. + +*/ +//#define _HP_BLOCK_PROJECTORS_ + +template<typename Field> +class BlockProjector { +public: + + BasisFieldVector<Field>& _evec; + BlockedGrid<Field>& _bgrid; + + BlockProjector(BasisFieldVector<Field>& evec, BlockedGrid<Field>& bgrid) : _evec(evec), _bgrid(bgrid) { + } + + void createOrthonormalBasis(RealD thres = 0.0) { + + GridStopWatch sw; + sw.Start(); + + int cnt = 0; + +#pragma omp parallel shared(cnt) + { + int lcnt = 0; + +#pragma omp for + for (int b=0;b<_bgrid._o_blocks;b++) { + + for (int i=0;i<_evec._Nm;i++) { + + auto nrm0 = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]); + + // |i> -= <j|i> |j> + for (int j=0;j<i;j++) { + _bgrid.block_caxpy(b,_evec._v[i],-_bgrid.block_sp(b,_evec._v[j],_evec._v[i]),_evec._v[j],_evec._v[i]); + } + + auto nrm = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]); + + auto eps = nrm/nrm0; + if (Reduce(eps).real() < thres) { + lcnt++; + } + + // TODO: if norm is too small, remove this eigenvector/mark as not needed; in practice: set it to zero norm here and return a mask + // that is then used later to decide not to write certain eigenvectors to disk (add a norm calculation before subtraction step and look at nrm/nrm0 < eps to decide) + _bgrid.block_cscale(b,1.0 / sqrt(nrm),_evec._v[i]); + + } + + } + +#pragma omp critical + { + cnt += lcnt; + } + } + sw.Stop(); + std::cout << GridLogMessage << "Gram-Schmidt to create blocked basis took " << sw.Elapsed() << " (" << ((RealD)cnt / (RealD)_bgrid._o_blocks / (RealD)_evec._Nm) + << " below threshold)" << std::endl; + + } + + template<typename CoarseField> + void coarseToFine(const CoarseField& in, Field& out) { + + out = zero; + out.checkerboard = _evec._v[0].checkerboard; + + int Nbasis = sizeof(in._odata[0]._internal._internal) / sizeof(in._odata[0]._internal._internal[0]); + assert(Nbasis == _evec._Nm); + +#pragma omp parallel for + for (int b=0;b<_bgrid._o_blocks;b++) { + for (int j=0;j<_evec._Nm;j++) { + _bgrid.block_caxpy(b,out,in._odata[b]._internal._internal[j],_evec._v[j],out); + } + } + + } + + template<typename CoarseField> + void fineToCoarse(const Field& in, CoarseField& out) { + + out = zero; + + int Nbasis = sizeof(out._odata[0]._internal._internal) / sizeof(out._odata[0]._internal._internal[0]); + assert(Nbasis == _evec._Nm); + + + Field tmp(_bgrid._grid); + tmp = in; + +#pragma omp parallel for + for (int b=0;b<_bgrid._o_blocks;b++) { + for (int j=0;j<_evec._Nm;j++) { + // |rhs> -= <j|rhs> |j> + auto c = _bgrid.block_sp(b,_evec._v[j],tmp); + _bgrid.block_caxpy(b,tmp,-c,_evec._v[j],tmp); // may make this more numerically stable + out._odata[b]._internal._internal[j] = c; + } + } + + } + + template<typename CoarseField> + void deflateFine(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) { + result = zero; + for (int i=0;i<N;i++) { + Field tmp(result._grid); + coarseToFine(_coef._v[i],tmp); + axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result); + } + } + + template<typename CoarseField> + void deflateCoarse(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) { + CoarseField src_coarse(_coef._v[0]._grid); + CoarseField result_coarse = src_coarse; + result_coarse = zero; + fineToCoarse(src_orig,src_coarse); + for (int i=0;i<N;i++) { + axpy(result_coarse,TensorRemove(innerProduct(_coef._v[i],src_coarse)) / eval[i],_coef._v[i],result_coarse); + } + coarseToFine(result_coarse,result); + } + + template<typename CoarseField> + void deflate(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) { + // Deflation on coarse Grid is much faster, so use it by default. Deflation on fine Grid is kept for legacy reasons for now. + deflateCoarse(_coef,eval,N,src_orig,result); + } + +}; +} diff --git a/tests/lanczos/BlockedGrid.h b/tests/lanczos/BlockedGrid.h new file mode 100644 index 00000000..821272de --- /dev/null +++ b/tests/lanczos/BlockedGrid.h @@ -0,0 +1,401 @@ +namespace Grid { + +template<typename Field> +class BlockedGrid { +public: + GridBase* _grid; + typedef typename Field::scalar_type Coeff_t; + typedef typename Field::vector_type vCoeff_t; + + std::vector<int> _bs; // block size + std::vector<int> _nb; // number of blocks + std::vector<int> _l; // local dimensions irrespective of cb + std::vector<int> _l_cb; // local dimensions of checkerboarded vector + std::vector<int> _l_cb_o; // local dimensions of inner checkerboarded vector + std::vector<int> _bs_cb; // block size in checkerboarded vector + std::vector<int> _nb_o; // number of blocks of simd o-sites + + int _nd, _blocks, _cf_size, _cf_block_size, _cf_o_block_size, _o_blocks, _block_sites; + + BlockedGrid(GridBase* grid, const std::vector<int>& block_size) : + _grid(grid), _bs(block_size), _nd((int)_bs.size()), + _nb(block_size), _l(block_size), _l_cb(block_size), _nb_o(block_size), + _l_cb_o(block_size), _bs_cb(block_size) { + + _blocks = 1; + _o_blocks = 1; + _l = grid->FullDimensions(); + _l_cb = grid->LocalDimensions(); + _l_cb_o = grid->_rdimensions; + + _cf_size = 1; + _block_sites = 1; + for (int i=0;i<_nd;i++) { + _l[i] /= grid->_processors[i]; + + assert(!(_l[i] % _bs[i])); // lattice must accommodate choice of blocksize + + int r = _l[i] / _l_cb[i]; + assert(!(_bs[i] % r)); // checkerboarding must accommodate choice of blocksize + _bs_cb[i] = _bs[i] / r; + _block_sites *= _bs_cb[i]; + _nb[i] = _l[i] / _bs[i]; + _nb_o[i] = _nb[i] / _grid->_simd_layout[i]; + if (_nb[i] % _grid->_simd_layout[i]) { // simd must accommodate choice of blocksize + std::cout << GridLogMessage << "Problem: _nb[" << i << "] = " << _nb[i] << " _grid->_simd_layout[" << i << "] = " << _grid->_simd_layout[i] << std::endl; + assert(0); + } + _blocks *= _nb[i]; + _o_blocks *= _nb_o[i]; + _cf_size *= _l[i]; + } + + _cf_size *= 12 / 2; + _cf_block_size = _cf_size / _blocks; + _cf_o_block_size = _cf_size / _o_blocks; + + std::cout << GridLogMessage << "BlockedGrid:" << std::endl; + std::cout << GridLogMessage << " _l = " << _l << std::endl; + std::cout << GridLogMessage << " _l_cb = " << _l_cb << std::endl; + std::cout << GridLogMessage << " _l_cb_o = " << _l_cb_o << std::endl; + std::cout << GridLogMessage << " _bs = " << _bs << std::endl; + std::cout << GridLogMessage << " _bs_cb = " << _bs_cb << std::endl; + + std::cout << GridLogMessage << " _nb = " << _nb << std::endl; + std::cout << GridLogMessage << " _nb_o = " << _nb_o << std::endl; + std::cout << GridLogMessage << " _blocks = " << _blocks << std::endl; + std::cout << GridLogMessage << " _o_blocks = " << _o_blocks << std::endl; + std::cout << GridLogMessage << " sizeof(vCoeff_t) = " << sizeof(vCoeff_t) << std::endl; + std::cout << GridLogMessage << " _cf_size = " << _cf_size << std::endl; + std::cout << GridLogMessage << " _cf_block_size = " << _cf_block_size << std::endl; + std::cout << GridLogMessage << " _block_sites = " << _block_sites << std::endl; + std::cout << GridLogMessage << " _grid->oSites() = " << _grid->oSites() << std::endl; + + // _grid->Barrier(); + //abort(); + } + + void block_to_coor(int b, std::vector<int>& x0) { + + std::vector<int> bcoor; + bcoor.resize(_nd); + x0.resize(_nd); + assert(b < _o_blocks); + Lexicographic::CoorFromIndex(bcoor,b,_nb_o); + int i; + + for (i=0;i<_nd;i++) { + x0[i] = bcoor[i]*_bs_cb[i]; + } + + //std::cout << GridLogMessage << "Map block b -> " << x0 << std::endl; + + } + + void block_site_to_o_coor(const std::vector<int>& x0, std::vector<int>& coor, int i) { + Lexicographic::CoorFromIndex(coor,i,_bs_cb); + for (int j=0;j<_nd;j++) + coor[j] += x0[j]; + } + + int block_site_to_o_site(const std::vector<int>& x0, int i) { + std::vector<int> coor; coor.resize(_nd); + block_site_to_o_coor(x0,coor,i); + Lexicographic::IndexFromCoor(coor,i,_l_cb_o); + return i; + } + + vCoeff_t block_sp(int b, const Field& x, const Field& y) { + + std::vector<int> x0; + block_to_coor(b,x0); + + vCoeff_t ret = 0.0; + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + ret += TensorRemove(innerProduct(x._odata[ss],y._odata[ss])); + } + + return ret; + + } + + vCoeff_t block_sp(int b, const Field& x, const std::vector< ComplexD >& y) { + + std::vector<int> x0; + block_to_coor(b,x0); + + constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); + int lsize = _cf_o_block_size / _block_sites; + + std::vector< ComplexD > ret(nsimd); + for (int i=0;i<nsimd;i++) + ret[i] = 0.0; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + int n = lsize / nsimd; + for (int l=0;l<n;l++) { + for (int j=0;j<nsimd;j++) { + int t = lsize * i + l*nsimd + j; + + ret[j] += conjugate(((Coeff_t*)&x._odata[ss]._internal)[l*nsimd + j]) * y[t]; + } + } + } + + vCoeff_t vret; + for (int i=0;i<nsimd;i++) + ((Coeff_t*)&vret)[i] = (Coeff_t)ret[i]; + + return vret; + + } + + template<class T> + void vcaxpy(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x,const iScalar<T>& y) { + vcaxpy(r._internal,a,x._internal,y._internal); + } + + template<class T,int N> + void vcaxpy(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x,const iVector<T,N>& y) { + for (int i=0;i<N;i++) + vcaxpy(r._internal[i],a,x._internal[i],y._internal[i]); + } + + void vcaxpy(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x,const vCoeff_t& y) { + r = a*x + y; + } + + void block_caxpy(int b, Field& ret, const vCoeff_t& a, const Field& x, const Field& y) { + + std::vector<int> x0; + block_to_coor(b,x0); + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + vcaxpy(ret._odata[ss],a,x._odata[ss],y._odata[ss]); + } + + } + + void block_caxpy(int b, std::vector< ComplexD >& ret, const vCoeff_t& a, const Field& x, const std::vector< ComplexD >& y) { + std::vector<int> x0; + block_to_coor(b,x0); + + constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); + int lsize = _cf_o_block_size / _block_sites; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + int n = lsize / nsimd; + for (int l=0;l<n;l++) { + vCoeff_t r = a* ((vCoeff_t*)&x._odata[ss]._internal)[l]; + + for (int j=0;j<nsimd;j++) { + int t = lsize * i + l*nsimd + j; + ret[t] = y[t] + ((Coeff_t*)&r)[j]; + } + } + } + + } + + void block_set(int b, Field& ret, const std::vector< ComplexD >& x) { + std::vector<int> x0; + block_to_coor(b,x0); + + int lsize = _cf_o_block_size / _block_sites; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + for (int l=0;l<lsize;l++) + ((Coeff_t*)&ret._odata[ss]._internal)[l] = (Coeff_t)x[lsize * i + l]; // convert precision + } + + } + + void block_get(int b, const Field& ret, std::vector< ComplexD >& x) { + std::vector<int> x0; + block_to_coor(b,x0); + + int lsize = _cf_o_block_size / _block_sites; + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + + for (int l=0;l<lsize;l++) + x[lsize * i + l] = (ComplexD)((Coeff_t*)&ret._odata[ss]._internal)[l]; + } + + } + + template<class T> + void vcscale(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x) { + vcscale(r._internal,a,x._internal); + } + + template<class T,int N> + void vcscale(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x) { + for (int i=0;i<N;i++) + vcscale(r._internal[i],a,x._internal[i]); + } + + void vcscale(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x) { + r = a*x; + } + + void block_cscale(int b, const vCoeff_t& a, Field& ret) { + + std::vector<int> x0; + block_to_coor(b,x0); + + for (int i=0;i<_block_sites;i++) { // only odd sites + int ss = block_site_to_o_site(x0,i); + vcscale(ret._odata[ss],a,ret._odata[ss]); + } + } + + void getCanonicalBlockOffset(int cb, std::vector<int>& x0) { + const int ndim = 5; + assert(_nb.size() == ndim); + std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; + std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; + x0.resize(ndim); + + assert(cb >= 0); + assert(cb < _nbc[0]*_nbc[1]*_nbc[2]*_nbc[3]*_nbc[4]); + + Lexicographic::CoorFromIndex(x0,cb,_nbc); + int i; + + for (i=0;i<ndim;i++) { + x0[i] *= _bsc[i]; + } + + //if (cb < 2) + // std::cout << GridLogMessage << "Map: " << cb << " To: " << x0 << std::endl; + } + + void pokeBlockOfVectorCanonical(int cb,Field& v,const std::vector<float>& buf) { + std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; + std::vector<int> ldim = v._grid->LocalDimensions(); + std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; + const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; + // take canonical block cb of v and put it in canonical ordering in buf + std::vector<int> cx0; + getCanonicalBlockOffset(cb,cx0); + +#pragma omp parallel + { + std::vector<int> co0,cl0; + co0=cx0; cl0=cx0; + +#pragma omp for + for (int i=0;i<_nbsc;i++) { + Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo + for (int j=0;j<(int)_bsc.size();j++) + cl0[j] = cx0[j] + co0[j]; + + std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; + int oi = v._grid->oIndex(l0); + int ii = v._grid->iIndex(l0); + int lti = i; + + //if (cb < 2 && i<2) + // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; + + for (int s=0;s<4;s++) + for (int c=0;c<3;c++) { + Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; + int ti = 12*lti + 3*s + c; + ld = Coeff_t(buf[2*ti+0], buf[2*ti+1]); + } + } + } + } + + void peekBlockOfVectorCanonical(int cb,const Field& v,std::vector<float>& buf) { + std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; + std::vector<int> ldim = v._grid->LocalDimensions(); + std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; + const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; + // take canonical block cb of v and put it in canonical ordering in buf + std::vector<int> cx0; + getCanonicalBlockOffset(cb,cx0); + + buf.resize(_cf_block_size * 2); + +#pragma omp parallel + { + std::vector<int> co0,cl0; + co0=cx0; cl0=cx0; + +#pragma omp for + for (int i=0;i<_nbsc;i++) { + Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo + for (int j=0;j<(int)_bsc.size();j++) + cl0[j] = cx0[j] + co0[j]; + + std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; + int oi = v._grid->oIndex(l0); + int ii = v._grid->iIndex(l0); + int lti = i; + + //if (cb < 2 && i<2) + // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; + + for (int s=0;s<4;s++) + for (int c=0;c<3;c++) { + Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; + int ti = 12*lti + 3*s + c; + buf[2*ti+0] = ld.real(); + buf[2*ti+1] = ld.imag(); + } + } + } + } + + int globalToLocalCanonicalBlock(int slot,const std::vector<int>& src_nodes,int nb) { + // processor coordinate + int _nd = (int)src_nodes.size(); + std::vector<int> _src_nodes = src_nodes; + std::vector<int> pco(_nd); + Lexicographic::CoorFromIndex(pco,slot,_src_nodes); + std::vector<int> cpco = { pco[1], pco[2], pco[3], pco[4], pco[0] }; + + // get local block + std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; + assert(_nd == 5); + std::vector<int> c_src_local_blocks(_nd); + for (int i=0;i<_nd;i++) { + assert(_grid->_fdimensions[i] % (src_nodes[i] * _bs[i]) == 0); + c_src_local_blocks[(i+4) % 5] = _grid->_fdimensions[i] / src_nodes[i] / _bs[i]; + } + std::vector<int> cbcoor(_nd); // coordinate of block in slot in canonical form + Lexicographic::CoorFromIndex(cbcoor,nb,c_src_local_blocks); + + // cpco, cbcoor + std::vector<int> clbcoor(_nd); + for (int i=0;i<_nd;i++) { + int cgcoor = cpco[i] * c_src_local_blocks[i] + cbcoor[i]; // global block coordinate + int pcoor = cgcoor / _nbc[i]; // processor coordinate in my Grid + int tpcoor = _grid->_processor_coor[(i+1)%5]; + if (pcoor != tpcoor) + return -1; + clbcoor[i] = cgcoor - tpcoor * _nbc[i]; // canonical local block coordinate for canonical dimension i + } + + int lnb; + Lexicographic::IndexFromCoor(clbcoor,lnb,_nbc); + //std::cout << "Mapped slot = " << slot << " nb = " << nb << " to " << lnb << std::endl; + return lnb; + } + + + }; + +} diff --git a/tests/lanczos/Test_dwf_compressed_lanczos.cc b/tests/lanczos/Test_dwf_compressed_lanczos.cc index a6eb95e9..45690f05 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos.cc @@ -331,7 +331,7 @@ void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npo ) { - IRL2.calc(eval2,coef._v,src_coarse,Nconv,true,SkipTest2); + IRL2.calc(eval2,coef._v,src_coarse,Nconv,true); coef.resize(Nstop2); eval2.resize(Nstop2); @@ -635,7 +635,7 @@ int main (int argc, char ** argv) { if (simple_krylov_basis) { quick_krylov_basis(evec,src,Op1,Nstop1); } else { - IRL1.calc(eval1,evec._v,src,Nconv,false,1); + IRL1.calc(eval1,evec._v,src,Nconv,false); } evec.resize(Nstop1); // and throw away superfluous eval1.resize(Nstop1); diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index a0691116..8fbbacbc 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -56,6 +56,7 @@ struct CompressedLanczosParams : Serializable { LanczosParams, FineParams, LanczosParams, CoarseParams, ChebyParams, Smoother, + RealD , coarse_relax_tol, std::vector<int>, blockSize, std::string, config, std::vector < std::complex<double> >, omega, @@ -137,12 +138,13 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc OperatorFunction<FineField> & _smoother; LinearOperatorBase<FineField> &_Linop; Aggregation<Fobj,CComplex,nbasis> &_Aggregate; - + RealD _coarse_relax_tol; ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly, OperatorFunction<FineField> &smoother, LinearOperatorBase<FineField> &Linop, - Aggregation<Fobj,CComplex,nbasis> &Aggregate) - : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly) { }; + Aggregation<Fobj,CComplex,nbasis> &Aggregate, + RealD coarse_relax_tol=5.0e3) + : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { @@ -196,7 +198,7 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv <<std::endl; - + if ( j > nbasis ) eresid = eresid*_coarse_relax_tol; if( (vv<eresid*eresid) ) return 1; return 0; } @@ -337,7 +339,7 @@ public: FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; int Nconv; - IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false,0); + IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); // Shrink down to number saved assert(Nstop>=nbasis); @@ -345,7 +347,7 @@ public: evals_fine.resize(nbasis); _Aggregate.subspace.resize(nbasis,_FineGrid); } - void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth, + void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, int Nstop, int Nk, int Nm,RealD resid, RealD MaxIt, RealD betastp, int MinRes) { @@ -357,8 +359,7 @@ public: ////////////////////////////////////////////////////////////////////////////////////////////////// Chebyshev<FineField> ChebySmooth(cheby_smooth); - ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate); - + ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); evals_coarse.resize(Nm); evec_coarse.resize(Nm,_CoarseGrid); @@ -367,7 +368,7 @@ public: ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); int Nconv=0; - IRL.calc(evals_coarse,evec_coarse,src,Nconv,false,1); + IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); assert(Nconv>=Nstop); for (int i=0;i<Nstop;i++){ @@ -492,7 +493,7 @@ int main (int argc, char ** argv) { IRL.Orthogonalise(); std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl; - IRL.calcCoarse(coarse.Cheby,Params.Smoother, + IRL.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol, coarse.Nstop, coarse.Nk,coarse.Nm, coarse.resid, coarse.MaxIt, coarse.betastp,coarse.MinRes); From a34c8a2961f3d3f1d3389419ef9355ad976f0ab1 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 07:45:56 +0100 Subject: [PATCH 21/45] Update to IRL; getting close to the structure I would like. --- .../iterative/ImplicitlyRestartedLanczos.h | 234 +++++++++++------- 1 file changed, 142 insertions(+), 92 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 6d3e0755..4be2715a 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -71,6 +71,23 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i } } +// Extract a single rotated vector +template<class Field> +void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) +{ + typedef typename Field::vector_object vobj; + GridBase* grid = basis[0]._grid; + + result.checkerboard = basis[0].checkerboard; + parallel_for(int ss=0;ss < grid->oSites();ss++){ + vobj B = zero; + for(int k=k0; k<k1; ++k){ + B +=Qt(j,k) * basis[k]._odata[ss]; + } + result._odata[ss] = B; + } +} + template<class Field> void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx) { @@ -87,9 +104,7 @@ void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, s assert(idx[i] > i); ////////////////////////////////////// // idx[i] is a table of desired sources giving a permutation. - // // Swap v[i] with v[idx[i]]. - // // Find j>i for which _vnew[j] = _vold[i], // track the move idx[j] => idx[i] // track the move idx[i] => i @@ -155,6 +170,49 @@ enum IRLdiagonalisation { ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// +template<class Field> class ImplicitlyRestartedLanczosTester +{ + public: + virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); + virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); +}; + +template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field> +{ + public: + LinearFunction<Field> &_HermOpTest; + ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOpTest) : _HermOpTest(HermOpTest) { }; + int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) + { + return TestConvergence(j,resid,B,eval,evalMaxApprox); + } + int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox) + { + Field v(B); + RealD eval_poly = eval; + // Apply operator + _HermOpTest(B,v); + + RealD vnum = real(innerProduct(B,v)); // HermOp. + RealD vden = norm2(B); + RealD vv0 = norm2(v); + eval = vnum/vden; + v -= eval*B; + + RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); + + std::cout.precision(13); + std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] " + <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" + <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv + <<std::endl; + + int conv=0; + if( (vv<eresid*eresid) ) conv = 1; + + return conv; + } +}; template<class Field> class ImplicitlyRestartedLanczos { @@ -174,14 +232,19 @@ class ImplicitlyRestartedLanczos { //////////////////////////////// // Embedded objects //////////////////////////////// - LinearFunction<Field> &_HermOp; - LinearFunction<Field> &_HermOpTest; + LinearFunction<Field> &_HermOp; + LinearFunction<Field> &_HermOpTest; + ImplicitlyRestartedLanczosTester<Field> &_Tester; + // Default tester provided (we need a ref to something in default case) + ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester; ///////////////////////// // Constructor ///////////////////////// + public: ImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, LinearFunction<Field> & HermOpTest, + ImplicitlyRestartedLanczosTester<Field> & Tester, int _Nstop, // sought vecs int _Nk, // sought vecs int _Nm, // spare vecs @@ -190,7 +253,23 @@ public: RealD _betastp=0.0, // if beta(k) < betastp: converged int _MinRestart=1, int _orth_period = 1, IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : - _HermOp(HermOp), _HermOpTest(HermOpTest), + SimpleTester(HermOpTest), _HermOp(HermOp), _HermOpTest(HermOpTest), _Tester(Tester), + Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), + eresid(_eresid), betastp(_betastp), + MaxIter(_MaxIter) , MinRestart(_MinRestart), + orth_period(_orth_period), diagonalisation(_diagonalisation) { }; + + ImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, + LinearFunction<Field> & HermOpTest, + int _Nstop, // sought vecs + int _Nk, // sought vecs + int _Nm, // spare vecs + RealD _eresid, // resid in lmdue deficit + int _MaxIter, // Max iterations + RealD _betastp=0.0, // if beta(k) < betastp: converged + int _MinRestart=1, int _orth_period = 1, + IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : + SimpleTester(HermOpTest), _HermOp(HermOp), _HermOpTest(HermOpTest), _Tester(SimpleTester), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), MaxIter(_MaxIter) , MinRestart(_MinRestart), @@ -232,7 +311,7 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=true, int SkipTest=0) + void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=true) { GridBase *grid = src._grid; assert(grid == evec[0]._grid); @@ -335,11 +414,18 @@ until convergence ////////////////////////////////// eval2_copy = eval2; - // _sort.push(eval2,Nm); - std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end()); + std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>()); std::cout<<GridLogIRL <<" evals sorted "<<std::endl; - for(int ip=0; ip<k2; ++ip) std::cout<<GridLogIRL << "eval "<< ip << " "<< eval2[ip] << std::endl; + const int chunk=8; + for(int io=0; io<k2;io+=chunk){ + std::cout<<GridLogIRL << "eval "<< io ; + for(int ii=0;ii<chunk;ii++){ + if ( (io+ii)<k2 ) + std::cout<< " "<< std::setw(10)<< eval2[io+ii]; + } + std::cout << std::endl; + } ////////////////////////////////// // Implicitly shifted QR transformations @@ -351,11 +437,9 @@ until convergence } std::cout<<GridLogIRL <<"QR decompose done "<<std::endl; - assert(k2<Nm); - assert(k2<Nm); - assert(k1>0); - basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis + assert(k2<Nm); assert(k2<Nm); assert(k1>0); + basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis std::cout<<GridLogIRL <<"QR rotation done "<<std::endl; //////////////////////////////////////////////////// @@ -385,72 +469,37 @@ until convergence Nconv = 0; if (iter >= MinRestart) { - std::cout << GridLogIRL << "Rotation to test convergence " << std::endl; - - Field ev0_orig(grid); - ev0_orig = evec[0]; - - basisRotate(evec,Qt,0,Nk,0,Nk,Nm); - { - std::cout << GridLogIRL << "Test convergence" << std::endl; - Field B(grid); - - for(int j = 0; j<Nk; j+=SkipTest){ - B=evec[j]; + std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl; - //std::cout << "Checkerboard: " << evec[j].checkerboard << std::endl; - B.checkerboard = evec[0].checkerboard; + Field B(grid); B.checkerboard = evec[0].checkerboard; - _HermOpTest(B,v); - - RealD vnum = real(innerProduct(B,v)); // HermOp. - RealD vden = norm2(B); - RealD vv0 = norm2(v); - eval2[j] = vnum/vden; - v -= eval2[j]*B; - RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); - std::cout.precision(13); - std::cout<<GridLogIRL << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<j<<"] " - <<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[j] << " (" << eval2_copy[j] << ")" - <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv - <<" "<< vnum/(sqrt(vden)*sqrt(vv0)) - <<std::endl; - - // change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged - if((vv<eresid*eresid) && (j == Nconv) ){ - Nconv+=SkipTest; + // power of two search pattern; not every evalue in eval2 is assessed. + for(int jj = 1; jj<=Nstop; jj*=2){ + int j = Nstop-jj; + RealD e = eval2_copy[j]; // Discard the evalue + basisRotateJ(B,evec,Qt,j,0,Nk,Nm); + if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { + if ( j > Nconv ) { + Nconv=j+1; + jj=Nstop; // Terminate the scan } } - - // test if we converged, if so, terminate - std::cout<<GridLogIRL<<" #modes converged: "<<Nconv<<std::endl; - if( Nconv>=Nstop || beta_k < betastp){ - goto converged; - } - - //B[j] +=Qt[k+_Nm*j] * _v[k]._odata[ss]; - { - Eigen::MatrixXd qm = Eigen::MatrixXd::Zero(Nk,Nk); // Restrict Qt to Nk x Nk - for (int k=0;k<Nk;k++) - for (int j=0;j<Nk;j++) - qm(j,k) = Qt(j,k); - - Eigen::MatrixXd qmI = qm.inverse(); - - RealD res_check_rotate_inverse = (qm*qmI - Eigen::MatrixXd::Identity(Nk,Nk)).norm(); // sqrt( |X|^2 ) - - std::cout << GridLogIRL << "\tInverted ("<<Nk<<"x"<<Nk<<") Qt matrix " << " error = " << res_check_rotate_inverse <<std::endl; - - assert(res_check_rotate_inverse < 1e-7); - - basisRotate(evec,qmI,0,Nk,0,Nk,Nm); - std::cout << GridLogIRL << "\t Basis rotation done "<<std::endl; - - axpy(ev0_orig,-1.0,evec[0],ev0_orig); - std::cout << GridLogIRL << " | evec[0] - evec[0]_orig | = " << ::sqrt(norm2(ev0_orig)) << std::endl; - } } + // Do evec[0] for good measure + { + int j=0; + RealD e = eval2_copy[0]; + basisRotateJ(B,evec,Qt,j,0,Nk,Nm); + _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox); + } + // test if we converged, if so, terminate + std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl; + // if( Nconv>=Nstop || beta_k < betastp){ + if( Nconv>=Nstop){ + goto converged; + } + } else { std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n"; } // end of iter loop @@ -461,24 +510,28 @@ until convergence converged: - if (SkipTest == 1) { - eval = eval2; - } else { - ////////////////////////////////////////////// - // test quickly - // PAB -- what precisely does this test? Don't like this eval2, eval2_copy etc... - ////////////////////////////////////////////// - for (int j=0;j<Nstop;j+=SkipTest) { - std::cout<<GridLogIRL << "Eigenvalue[" << j << "] = " << eval2[j] << " (" << eval2_copy[j] << ")" << std::endl; + { + Field B(grid); B.checkerboard = evec[0].checkerboard; + basisRotate(evec,Qt,0,Nk,0,Nk,Nm); + std::cout << GridLogIRL << " Rotated basis"<<std::endl; + Nconv=0; + ////////////////////////////////////////////////////////////////////// + // Full final convergence test; unconditionally applied + ////////////////////////////////////////////////////////////////////// + for(int j = 0; j<=Nk; j++){ + B=evec[j]; + if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) { + Nconv++; + } } - eval2_copy.resize(eval2.size()); - eval = eval2_copy; - } - basisSortInPlace(evec,eval,reverse); + if ( Nconv < Nstop ) + std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl; - for (int j=0;j<Nstop;j++) { - std::cout<<GridLogIRL << " |e[" << j << "]|^2 = " << norm2(evec[j]) << std::endl; + eval=eval2; + + basisSortInPlace(evec,eval,reverse); + } std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; @@ -513,8 +566,7 @@ until convergence Field& evec_k = evec[k]; - _HermOp(evec_k,w); - std::cout<<GridLogIRL << "_HermOp (poly)" <<std::endl; + _HermOp(evec_k,w); std::cout<<GridLogIRL << "Poly(HermOp)" <<std::endl; if(k>0) w -= lme[k-1] * evec[k-1]; @@ -529,8 +581,6 @@ until convergence lmd[k] = alph; lme[k] = beta; - std::cout<<GridLogIRL << "linalg " <<std::endl; - if (k>0 && k % orth_period == 0) { orthogonalize(w,evec,k); // orthonormalise std::cout<<GridLogIRL << "orthogonalised " <<std::endl; From 31f99574fa63e2efdce647989467d12db248be8e Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 07:47:42 +0100 Subject: [PATCH 22/45] Moving these out of algorithms --- .../BlockProjector.h | 143 ------- .../BlockedGrid.h | 401 ------------------ .../FieldBasisVector.h | 162 ------- 3 files changed, 706 deletions(-) delete mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h delete mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h delete mode 100644 lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h deleted file mode 100644 index 6becaa66..00000000 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockProjector.h +++ /dev/null @@ -1,143 +0,0 @@ -namespace Grid { - -/* - BlockProjector - - If _HP_BLOCK_PROJECTORS_ is defined, we assume that _evec is a basis that is not - fully orthonormalized (to the precision of the coarse field) and we allow for higher-precision - coarse field than basis field. - -*/ -//#define _HP_BLOCK_PROJECTORS_ - -template<typename Field> -class BlockProjector { -public: - - BasisFieldVector<Field>& _evec; - BlockedGrid<Field>& _bgrid; - - BlockProjector(BasisFieldVector<Field>& evec, BlockedGrid<Field>& bgrid) : _evec(evec), _bgrid(bgrid) { - } - - void createOrthonormalBasis(RealD thres = 0.0) { - - GridStopWatch sw; - sw.Start(); - - int cnt = 0; - -#pragma omp parallel shared(cnt) - { - int lcnt = 0; - -#pragma omp for - for (int b=0;b<_bgrid._o_blocks;b++) { - - for (int i=0;i<_evec._Nm;i++) { - - auto nrm0 = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]); - - // |i> -= <j|i> |j> - for (int j=0;j<i;j++) { - _bgrid.block_caxpy(b,_evec._v[i],-_bgrid.block_sp(b,_evec._v[j],_evec._v[i]),_evec._v[j],_evec._v[i]); - } - - auto nrm = _bgrid.block_sp(b,_evec._v[i],_evec._v[i]); - - auto eps = nrm/nrm0; - if (Reduce(eps).real() < thres) { - lcnt++; - } - - // TODO: if norm is too small, remove this eigenvector/mark as not needed; in practice: set it to zero norm here and return a mask - // that is then used later to decide not to write certain eigenvectors to disk (add a norm calculation before subtraction step and look at nrm/nrm0 < eps to decide) - _bgrid.block_cscale(b,1.0 / sqrt(nrm),_evec._v[i]); - - } - - } - -#pragma omp critical - { - cnt += lcnt; - } - } - sw.Stop(); - std::cout << GridLogMessage << "Gram-Schmidt to create blocked basis took " << sw.Elapsed() << " (" << ((RealD)cnt / (RealD)_bgrid._o_blocks / (RealD)_evec._Nm) - << " below threshold)" << std::endl; - - } - - template<typename CoarseField> - void coarseToFine(const CoarseField& in, Field& out) { - - out = zero; - out.checkerboard = _evec._v[0].checkerboard; - - int Nbasis = sizeof(in._odata[0]._internal._internal) / sizeof(in._odata[0]._internal._internal[0]); - assert(Nbasis == _evec._Nm); - -#pragma omp parallel for - for (int b=0;b<_bgrid._o_blocks;b++) { - for (int j=0;j<_evec._Nm;j++) { - _bgrid.block_caxpy(b,out,in._odata[b]._internal._internal[j],_evec._v[j],out); - } - } - - } - - template<typename CoarseField> - void fineToCoarse(const Field& in, CoarseField& out) { - - out = zero; - - int Nbasis = sizeof(out._odata[0]._internal._internal) / sizeof(out._odata[0]._internal._internal[0]); - assert(Nbasis == _evec._Nm); - - - Field tmp(_bgrid._grid); - tmp = in; - -#pragma omp parallel for - for (int b=0;b<_bgrid._o_blocks;b++) { - for (int j=0;j<_evec._Nm;j++) { - // |rhs> -= <j|rhs> |j> - auto c = _bgrid.block_sp(b,_evec._v[j],tmp); - _bgrid.block_caxpy(b,tmp,-c,_evec._v[j],tmp); // may make this more numerically stable - out._odata[b]._internal._internal[j] = c; - } - } - - } - - template<typename CoarseField> - void deflateFine(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) { - result = zero; - for (int i=0;i<N;i++) { - Field tmp(result._grid); - coarseToFine(_coef._v[i],tmp); - axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result); - } - } - - template<typename CoarseField> - void deflateCoarse(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) { - CoarseField src_coarse(_coef._v[0]._grid); - CoarseField result_coarse = src_coarse; - result_coarse = zero; - fineToCoarse(src_orig,src_coarse); - for (int i=0;i<N;i++) { - axpy(result_coarse,TensorRemove(innerProduct(_coef._v[i],src_coarse)) / eval[i],_coef._v[i],result_coarse); - } - coarseToFine(result_coarse,result); - } - - template<typename CoarseField> - void deflate(BasisFieldVector<CoarseField>& _coef,const std::vector<RealD>& eval,int N,const Field& src_orig,Field& result) { - // Deflation on coarse Grid is much faster, so use it by default. Deflation on fine Grid is kept for legacy reasons for now. - deflateCoarse(_coef,eval,N,src_orig,result); - } - -}; -} diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h deleted file mode 100644 index 821272de..00000000 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/BlockedGrid.h +++ /dev/null @@ -1,401 +0,0 @@ -namespace Grid { - -template<typename Field> -class BlockedGrid { -public: - GridBase* _grid; - typedef typename Field::scalar_type Coeff_t; - typedef typename Field::vector_type vCoeff_t; - - std::vector<int> _bs; // block size - std::vector<int> _nb; // number of blocks - std::vector<int> _l; // local dimensions irrespective of cb - std::vector<int> _l_cb; // local dimensions of checkerboarded vector - std::vector<int> _l_cb_o; // local dimensions of inner checkerboarded vector - std::vector<int> _bs_cb; // block size in checkerboarded vector - std::vector<int> _nb_o; // number of blocks of simd o-sites - - int _nd, _blocks, _cf_size, _cf_block_size, _cf_o_block_size, _o_blocks, _block_sites; - - BlockedGrid(GridBase* grid, const std::vector<int>& block_size) : - _grid(grid), _bs(block_size), _nd((int)_bs.size()), - _nb(block_size), _l(block_size), _l_cb(block_size), _nb_o(block_size), - _l_cb_o(block_size), _bs_cb(block_size) { - - _blocks = 1; - _o_blocks = 1; - _l = grid->FullDimensions(); - _l_cb = grid->LocalDimensions(); - _l_cb_o = grid->_rdimensions; - - _cf_size = 1; - _block_sites = 1; - for (int i=0;i<_nd;i++) { - _l[i] /= grid->_processors[i]; - - assert(!(_l[i] % _bs[i])); // lattice must accommodate choice of blocksize - - int r = _l[i] / _l_cb[i]; - assert(!(_bs[i] % r)); // checkerboarding must accommodate choice of blocksize - _bs_cb[i] = _bs[i] / r; - _block_sites *= _bs_cb[i]; - _nb[i] = _l[i] / _bs[i]; - _nb_o[i] = _nb[i] / _grid->_simd_layout[i]; - if (_nb[i] % _grid->_simd_layout[i]) { // simd must accommodate choice of blocksize - std::cout << GridLogMessage << "Problem: _nb[" << i << "] = " << _nb[i] << " _grid->_simd_layout[" << i << "] = " << _grid->_simd_layout[i] << std::endl; - assert(0); - } - _blocks *= _nb[i]; - _o_blocks *= _nb_o[i]; - _cf_size *= _l[i]; - } - - _cf_size *= 12 / 2; - _cf_block_size = _cf_size / _blocks; - _cf_o_block_size = _cf_size / _o_blocks; - - std::cout << GridLogMessage << "BlockedGrid:" << std::endl; - std::cout << GridLogMessage << " _l = " << _l << std::endl; - std::cout << GridLogMessage << " _l_cb = " << _l_cb << std::endl; - std::cout << GridLogMessage << " _l_cb_o = " << _l_cb_o << std::endl; - std::cout << GridLogMessage << " _bs = " << _bs << std::endl; - std::cout << GridLogMessage << " _bs_cb = " << _bs_cb << std::endl; - - std::cout << GridLogMessage << " _nb = " << _nb << std::endl; - std::cout << GridLogMessage << " _nb_o = " << _nb_o << std::endl; - std::cout << GridLogMessage << " _blocks = " << _blocks << std::endl; - std::cout << GridLogMessage << " _o_blocks = " << _o_blocks << std::endl; - std::cout << GridLogMessage << " sizeof(vCoeff_t) = " << sizeof(vCoeff_t) << std::endl; - std::cout << GridLogMessage << " _cf_size = " << _cf_size << std::endl; - std::cout << GridLogMessage << " _cf_block_size = " << _cf_block_size << std::endl; - std::cout << GridLogMessage << " _block_sites = " << _block_sites << std::endl; - std::cout << GridLogMessage << " _grid->oSites() = " << _grid->oSites() << std::endl; - - // _grid->Barrier(); - //abort(); - } - - void block_to_coor(int b, std::vector<int>& x0) { - - std::vector<int> bcoor; - bcoor.resize(_nd); - x0.resize(_nd); - assert(b < _o_blocks); - Lexicographic::CoorFromIndex(bcoor,b,_nb_o); - int i; - - for (i=0;i<_nd;i++) { - x0[i] = bcoor[i]*_bs_cb[i]; - } - - //std::cout << GridLogMessage << "Map block b -> " << x0 << std::endl; - - } - - void block_site_to_o_coor(const std::vector<int>& x0, std::vector<int>& coor, int i) { - Lexicographic::CoorFromIndex(coor,i,_bs_cb); - for (int j=0;j<_nd;j++) - coor[j] += x0[j]; - } - - int block_site_to_o_site(const std::vector<int>& x0, int i) { - std::vector<int> coor; coor.resize(_nd); - block_site_to_o_coor(x0,coor,i); - Lexicographic::IndexFromCoor(coor,i,_l_cb_o); - return i; - } - - vCoeff_t block_sp(int b, const Field& x, const Field& y) { - - std::vector<int> x0; - block_to_coor(b,x0); - - vCoeff_t ret = 0.0; - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - ret += TensorRemove(innerProduct(x._odata[ss],y._odata[ss])); - } - - return ret; - - } - - vCoeff_t block_sp(int b, const Field& x, const std::vector< ComplexD >& y) { - - std::vector<int> x0; - block_to_coor(b,x0); - - constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); - int lsize = _cf_o_block_size / _block_sites; - - std::vector< ComplexD > ret(nsimd); - for (int i=0;i<nsimd;i++) - ret[i] = 0.0; - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - - int n = lsize / nsimd; - for (int l=0;l<n;l++) { - for (int j=0;j<nsimd;j++) { - int t = lsize * i + l*nsimd + j; - - ret[j] += conjugate(((Coeff_t*)&x._odata[ss]._internal)[l*nsimd + j]) * y[t]; - } - } - } - - vCoeff_t vret; - for (int i=0;i<nsimd;i++) - ((Coeff_t*)&vret)[i] = (Coeff_t)ret[i]; - - return vret; - - } - - template<class T> - void vcaxpy(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x,const iScalar<T>& y) { - vcaxpy(r._internal,a,x._internal,y._internal); - } - - template<class T,int N> - void vcaxpy(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x,const iVector<T,N>& y) { - for (int i=0;i<N;i++) - vcaxpy(r._internal[i],a,x._internal[i],y._internal[i]); - } - - void vcaxpy(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x,const vCoeff_t& y) { - r = a*x + y; - } - - void block_caxpy(int b, Field& ret, const vCoeff_t& a, const Field& x, const Field& y) { - - std::vector<int> x0; - block_to_coor(b,x0); - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - vcaxpy(ret._odata[ss],a,x._odata[ss],y._odata[ss]); - } - - } - - void block_caxpy(int b, std::vector< ComplexD >& ret, const vCoeff_t& a, const Field& x, const std::vector< ComplexD >& y) { - std::vector<int> x0; - block_to_coor(b,x0); - - constexpr int nsimd = sizeof(vCoeff_t) / sizeof(Coeff_t); - int lsize = _cf_o_block_size / _block_sites; - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - - int n = lsize / nsimd; - for (int l=0;l<n;l++) { - vCoeff_t r = a* ((vCoeff_t*)&x._odata[ss]._internal)[l]; - - for (int j=0;j<nsimd;j++) { - int t = lsize * i + l*nsimd + j; - ret[t] = y[t] + ((Coeff_t*)&r)[j]; - } - } - } - - } - - void block_set(int b, Field& ret, const std::vector< ComplexD >& x) { - std::vector<int> x0; - block_to_coor(b,x0); - - int lsize = _cf_o_block_size / _block_sites; - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - - for (int l=0;l<lsize;l++) - ((Coeff_t*)&ret._odata[ss]._internal)[l] = (Coeff_t)x[lsize * i + l]; // convert precision - } - - } - - void block_get(int b, const Field& ret, std::vector< ComplexD >& x) { - std::vector<int> x0; - block_to_coor(b,x0); - - int lsize = _cf_o_block_size / _block_sites; - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - - for (int l=0;l<lsize;l++) - x[lsize * i + l] = (ComplexD)((Coeff_t*)&ret._odata[ss]._internal)[l]; - } - - } - - template<class T> - void vcscale(iScalar<T>& r,const vCoeff_t& a,const iScalar<T>& x) { - vcscale(r._internal,a,x._internal); - } - - template<class T,int N> - void vcscale(iVector<T,N>& r,const vCoeff_t& a,const iVector<T,N>& x) { - for (int i=0;i<N;i++) - vcscale(r._internal[i],a,x._internal[i]); - } - - void vcscale(vCoeff_t& r,const vCoeff_t& a,const vCoeff_t& x) { - r = a*x; - } - - void block_cscale(int b, const vCoeff_t& a, Field& ret) { - - std::vector<int> x0; - block_to_coor(b,x0); - - for (int i=0;i<_block_sites;i++) { // only odd sites - int ss = block_site_to_o_site(x0,i); - vcscale(ret._odata[ss],a,ret._odata[ss]); - } - } - - void getCanonicalBlockOffset(int cb, std::vector<int>& x0) { - const int ndim = 5; - assert(_nb.size() == ndim); - std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; - std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; - x0.resize(ndim); - - assert(cb >= 0); - assert(cb < _nbc[0]*_nbc[1]*_nbc[2]*_nbc[3]*_nbc[4]); - - Lexicographic::CoorFromIndex(x0,cb,_nbc); - int i; - - for (i=0;i<ndim;i++) { - x0[i] *= _bsc[i]; - } - - //if (cb < 2) - // std::cout << GridLogMessage << "Map: " << cb << " To: " << x0 << std::endl; - } - - void pokeBlockOfVectorCanonical(int cb,Field& v,const std::vector<float>& buf) { - std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; - std::vector<int> ldim = v._grid->LocalDimensions(); - std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; - const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; - // take canonical block cb of v and put it in canonical ordering in buf - std::vector<int> cx0; - getCanonicalBlockOffset(cb,cx0); - -#pragma omp parallel - { - std::vector<int> co0,cl0; - co0=cx0; cl0=cx0; - -#pragma omp for - for (int i=0;i<_nbsc;i++) { - Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo - for (int j=0;j<(int)_bsc.size();j++) - cl0[j] = cx0[j] + co0[j]; - - std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; - int oi = v._grid->oIndex(l0); - int ii = v._grid->iIndex(l0); - int lti = i; - - //if (cb < 2 && i<2) - // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; - - for (int s=0;s<4;s++) - for (int c=0;c<3;c++) { - Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; - int ti = 12*lti + 3*s + c; - ld = Coeff_t(buf[2*ti+0], buf[2*ti+1]); - } - } - } - } - - void peekBlockOfVectorCanonical(int cb,const Field& v,std::vector<float>& buf) { - std::vector<int> _bsc = { _bs[1], _bs[2], _bs[3], _bs[4], _bs[0] }; - std::vector<int> ldim = v._grid->LocalDimensions(); - std::vector<int> cldim = { ldim[1], ldim[2], ldim[3], ldim[4], ldim[0] }; - const int _nbsc = _bs_cb[0]*_bs_cb[1]*_bs_cb[2]*_bs_cb[3]*_bs_cb[4]; - // take canonical block cb of v and put it in canonical ordering in buf - std::vector<int> cx0; - getCanonicalBlockOffset(cb,cx0); - - buf.resize(_cf_block_size * 2); - -#pragma omp parallel - { - std::vector<int> co0,cl0; - co0=cx0; cl0=cx0; - -#pragma omp for - for (int i=0;i<_nbsc;i++) { - Lexicographic::CoorFromIndex(co0,2*i,_bsc); // 2* for eo - for (int j=0;j<(int)_bsc.size();j++) - cl0[j] = cx0[j] + co0[j]; - - std::vector<int> l0 = { cl0[4], cl0[0], cl0[1], cl0[2], cl0[3] }; - int oi = v._grid->oIndex(l0); - int ii = v._grid->iIndex(l0); - int lti = i; - - //if (cb < 2 && i<2) - // std::cout << GridLogMessage << "Map: " << cb << ", " << i << " To: " << cl0 << ", " << cx0 << ", " << oi << ", " << ii << std::endl; - - for (int s=0;s<4;s++) - for (int c=0;c<3;c++) { - Coeff_t& ld = ((Coeff_t*)&v._odata[oi]._internal._internal[s]._internal[c])[ii]; - int ti = 12*lti + 3*s + c; - buf[2*ti+0] = ld.real(); - buf[2*ti+1] = ld.imag(); - } - } - } - } - - int globalToLocalCanonicalBlock(int slot,const std::vector<int>& src_nodes,int nb) { - // processor coordinate - int _nd = (int)src_nodes.size(); - std::vector<int> _src_nodes = src_nodes; - std::vector<int> pco(_nd); - Lexicographic::CoorFromIndex(pco,slot,_src_nodes); - std::vector<int> cpco = { pco[1], pco[2], pco[3], pco[4], pco[0] }; - - // get local block - std::vector<int> _nbc = { _nb[1], _nb[2], _nb[3], _nb[4], _nb[0] }; - assert(_nd == 5); - std::vector<int> c_src_local_blocks(_nd); - for (int i=0;i<_nd;i++) { - assert(_grid->_fdimensions[i] % (src_nodes[i] * _bs[i]) == 0); - c_src_local_blocks[(i+4) % 5] = _grid->_fdimensions[i] / src_nodes[i] / _bs[i]; - } - std::vector<int> cbcoor(_nd); // coordinate of block in slot in canonical form - Lexicographic::CoorFromIndex(cbcoor,nb,c_src_local_blocks); - - // cpco, cbcoor - std::vector<int> clbcoor(_nd); - for (int i=0;i<_nd;i++) { - int cgcoor = cpco[i] * c_src_local_blocks[i] + cbcoor[i]; // global block coordinate - int pcoor = cgcoor / _nbc[i]; // processor coordinate in my Grid - int tpcoor = _grid->_processor_coor[(i+1)%5]; - if (pcoor != tpcoor) - return -1; - clbcoor[i] = cgcoor - tpcoor * _nbc[i]; // canonical local block coordinate for canonical dimension i - } - - int lnb; - Lexicographic::IndexFromCoor(clbcoor,lnb,_nbc); - //std::cout << "Mapped slot = " << slot << " nb = " << nb << " to " << lnb << std::endl; - return lnb; - } - - - }; - -} diff --git a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h b/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h deleted file mode 100644 index 3ad516ef..00000000 --- a/lib/algorithms/iterative/BlockImplicitlyRestartedLanczos/FieldBasisVector.h +++ /dev/null @@ -1,162 +0,0 @@ -namespace Grid { - -template<class Field> -class BasisFieldVector { - public: - int _Nm; - - typedef typename Field::scalar_type Coeff_t; - typedef typename Field::vector_type vCoeff_t; - typedef typename Field::vector_object vobj; - typedef typename vobj::scalar_object sobj; - - std::vector<Field> _v; // _Nfull vectors - - void report(int n,GridBase* value) { - - std::cout << GridLogMessage << "BasisFieldVector allocated:\n"; - std::cout << GridLogMessage << " Delta N = " << n << "\n"; - std::cout << GridLogMessage << " Size of full vectors (size) = " << - ((double)n*sizeof(vobj)*value->oSites() / 1024./1024./1024.) << " GB\n"; - std::cout << GridLogMessage << " Size = " << _v.size() << " Capacity = " << _v.capacity() << std::endl; - - value->Barrier(); - - if (value->IsBoss()) { - system("cat /proc/meminfo"); - } - - value->Barrier(); - - } - - BasisFieldVector(int Nm,GridBase* value) : _Nm(Nm), _v(Nm,value) { - report(Nm,value); - } - - ~BasisFieldVector() { - } - - Field& operator[](int i) { - return _v[i]; - } - - void orthogonalize(Field& w, int k) { - for(int j=0; j<k; ++j){ - Coeff_t ip = (Coeff_t)innerProduct(_v[j],w); - w = w - ip*_v[j]; - } - } - - void rotate(Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { - - GridBase* grid = _v[0]._grid; - -#pragma omp parallel - { - std::vector < vobj > B(Nm); - -#pragma omp for - for(int ss=0;ss < grid->oSites();ss++){ - for(int j=j0; j<j1; ++j) B[j]=0.; - - for(int j=j0; j<j1; ++j){ - for(int k=k0; k<k1; ++k){ - B[j] +=Qt(j,k) * _v[k]._odata[ss]; - } - } - for(int j=j0; j<j1; ++j){ - _v[j]._odata[ss] = B[j]; - } - } - } - } - - size_t size() const { - return _Nm; - } - - void resize(int n) { - if (n > _Nm) - _v.reserve(n); - - _v.resize(n,_v[0]._grid); - - if (n < _Nm) - _v.shrink_to_fit(); - - report(n - _Nm,_v[0]._grid); - - _Nm = n; - } - - std::vector<int> getIndex(std::vector<RealD>& sort_vals) { - - std::vector<int> idx(sort_vals.size()); - iota(idx.begin(), idx.end(), 0); - - // sort indexes based on comparing values in v - sort(idx.begin(), idx.end(), - [&sort_vals](int i1, int i2) {return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);}); - - return idx; - } - - void reorderInPlace(std::vector<RealD>& sort_vals, std::vector<int>& idx) { - GridStopWatch gsw; - gsw.Start(); - - int nswaps = 0; - for (size_t i=0;i<idx.size();i++) { - if (idx[i] != i) { - - // find proper place (this could be done in logarithmic time, don't bother for now) - size_t j; - for (j=i;j<idx.size();j++) - if (idx[j]==i) - break; - assert(j!=idx.size()); - - Field _t(_v[0]._grid); - _t = _v[idx[j]]; - _v[idx[j]] = _v[idx[i]]; - _v[idx[i]] = _t; - - RealD _td = sort_vals[idx[j]]; - sort_vals[idx[j]] = sort_vals[idx[i]]; - sort_vals[idx[i]] = _td; - - int _tt = idx[i]; - idx[i] = idx[j]; - idx[j] = _tt; - - nswaps++; - } - } - - // sort values - gsw.Stop(); - std::cout << GridLogMessage << "Sorted eigenspace in place in " << gsw.Elapsed() << " using " << nswaps << " swaps" << std::endl; - } - - void sortInPlace(std::vector<RealD>& sort_vals, bool reverse) { - - std::vector<int> idx = getIndex(sort_vals); - if (reverse) - std::reverse(idx.begin(), idx.end()); - - reorderInPlace(sort_vals,idx); - - } - - void deflate(const std::vector<RealD>& eval,const Field& src_orig,Field& result) { - result = zero; - int N = (int)_v.size(); - for (int i=0;i<N;i++) { - Field& tmp = _v[i]; - axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result); - } - } - - }; -} From 2db05ac2141ef4beb3d25f4d20e43180d225be21 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 07:48:03 +0100 Subject: [PATCH 23/45] Test for split/unsplit in isolation --- tests/solver/Test_split_grid.cc | 144 ++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 tests/solver/Test_split_grid.cc diff --git a/tests/solver/Test_split_grid.cc b/tests/solver/Test_split_grid.cc new file mode 100644 index 00000000..90969b85 --- /dev/null +++ b/tests/solver/Test_split_grid.cc @@ -0,0 +1,144 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_mrhs_cg.cc + + Copyright (C) 2015 + +Author: Peter Boyle <paboyle@ph.ed.ac.uk> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include <Grid/Grid.h> +#include <Grid/algorithms/iterative/BlockConjugateGradient.h> + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + typedef typename DomainWallFermionR::FermionField FermionField; + typedef typename DomainWallFermionR::ComplexField ComplexField; + typename DomainWallFermionR::ImplParams params; + + const int Ls=4; + + Grid_init(&argc,&argv); + + std::vector<int> latt_size = GridDefaultLatt(); + std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector<int> mpi_layout = GridDefaultMpi(); + std::vector<int> mpi_split (mpi_layout.size(),1); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + int nrhs = UGrid->RankCount() ; + + ///////////////////////////////////////////// + // Split into 1^4 mpi communicators + ///////////////////////////////////////////// + GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + mpi_split, + *UGrid); + + GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); + GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); + GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,SGrid); + + /////////////////////////////////////////////// + // Set up the problem as a 4d spreadout job + /////////////////////////////////////////////// + std::vector<int> seeds({1,2,3,4}); + + GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); + GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); + std::vector<FermionField> src(nrhs,FGrid); + std::vector<FermionField> src_chk(nrhs,FGrid); + std::vector<FermionField> result(nrhs,FGrid); + FermionField tmp(FGrid); + + for(int s=0;s<nrhs;s++) random(pRNG5,src[s]); + for(int s=0;s<nrhs;s++) result[s]=zero; + + LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); + + ///////////////// + // MPI only sends + ///////////////// + int me = UGrid->ThisRank(); + + LatticeGaugeField s_Umu(SGrid); + FermionField s_src(SFGrid); + FermionField s_tmp(SFGrid); + FermionField s_res(SFGrid); + + /////////////////////////////////////////////////////////////// + // split the source out using MPI instead of I/O + /////////////////////////////////////////////////////////////// + Grid_split (Umu,s_Umu); + Grid_split (src,s_src); + + /////////////////////////////////////////////////////////////// + // Set up N-solvers as trivially parallel + /////////////////////////////////////////////////////////////// + RealD mass=0.01; + RealD M5=1.8; + DomainWallFermionR Dchk(Umu,*FGrid,*FrbGrid,*UGrid,*rbGrid,mass,M5); + DomainWallFermionR Ddwf(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5); + + std::cout << GridLogMessage << "****************************************************************** "<<std::endl; + std::cout << GridLogMessage << " Calling DWF CG "<<std::endl; + std::cout << GridLogMessage << "****************************************************************** "<<std::endl; + + MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); + MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); + ConjugateGradient<FermionField> CG((1.0e-8/(me+1)),10000); + s_res = zero; + CG(HermOp,s_src,s_res); + + ///////////////////////////////////////////////////////////// + // Report how long they all took + ///////////////////////////////////////////////////////////// + std::vector<uint32_t> iterations(nrhs,0); + iterations[me] = CG.IterationsToComplete; + + for(int n=0;n<nrhs;n++){ + UGrid->GlobalSum(iterations[n]); + std::cout << GridLogMessage<<" Rank "<<n<<" "<< iterations[n]<<" CG iterations"<<std::endl; + } + + ///////////////////////////////////////////////////////////// + // Gather and residual check on the results + ///////////////////////////////////////////////////////////// + std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl; + Grid_unsplit(result,s_res); + + std::cout << GridLogMessage<< "Checking the residuals"<<std::endl; + for(int n=0;n<nrhs;n++){ + HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n]; + std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)<<std::endl; + } + + Grid_finalize(); +} From 14507fd6e4c8807e752406ec61aff1b09434f71c Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 16:25:01 +0100 Subject: [PATCH 24/45] Final? candidate for push back on the lanczos reorg feature --- .../Test_dwf_compressed_lanczos_reorg.cc | 33 ++----------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index 8fbbacbc..ad1aaa47 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -374,20 +374,6 @@ public: for (int i=0;i<Nstop;i++){ std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl; } - // We got the evalues of the Cheby operator; - // Reconstruct eigenvalues of original operator via Chebyshev inverse - for (int i=0;i<Nstop;i++){ - - RealD eval_guess; - if (i==0) eval_guess = 0; - else eval_guess = evals_coarse[i-1]; - - RealD eval_poly = evals_coarse[i]; - RealD eval_op = Cheby.approxInv(eval_poly,eval_guess,100,1e-10); - std::cout << i << " Reconstructed eval = " << eval_op << " from guess " <<eval_guess<< " Cheby poly " << eval_poly << std::endl; - evals_coarse[i] = eval_op; - } - } }; @@ -481,9 +467,8 @@ int main (int argc, char ** argv) { fine.resid,fine.MaxIt, fine.betastp,fine.MinRes); - std::cout << GridLogIRL<<"checkpointing"<<std::endl; + std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl; IRL.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml")); - std::cout << GridLogIRL<<"checkpoint written"<<std::endl; } else { // IRL.testFine(); IRL.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml")); @@ -498,22 +483,10 @@ int main (int argc, char ** argv) { coarse.resid, coarse.MaxIt, coarse.betastp,coarse.MinRes); + + std::cout << GridLogIRL<<"Checkpointing coarse evecs"<<std::endl; IRL.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml")); - // IRL.smoothedCoarseEigenvalues(); - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Questions pending - // -- i) Mixed Precision sensitivity discussion. - // -- ii) Stopping condition and checks on the convergence of all evecs; ordering - // -- iii) Total matmul count compared to no compression. - // -- iv) Log tree walk back from maximal mode - // -- v) betastp? - // -- vi) eval2, eval2_copy annoying - // -- vii) Smoothing and checking. - // -- viii) Different poly in convergence check vs. IRL restart+ logging of which have converged; locking, assume no deconverge? - // -- xi) CG 10 iters inverse iteration 1 pass. vs. Chebyshev. vs. Result *after* convergence declaration for each, apply H. - // i.e. coarse2fine - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Grid_finalize(); } From 0f3e9ae57d4a0cc6f7f8ec1d0fa8e922335aab72 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 23:29:59 +0100 Subject: [PATCH 25/45] Gsites error. Only appeared (so far) in I/O code for even odd fields --- lib/cartesian/Cartesian_red_black.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index f89cacc5..5c50f062 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -205,6 +205,7 @@ public: { assert((_gdimensions[d] & 0x1) == 0); _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard + _gsites /= 2; } _ldimensions[d] = _gdimensions[d] / _processors[d]; assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); From 00ebc150ad6a6db27000829c6830ea8b855bacfe Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 23:30:37 +0100 Subject: [PATCH 26/45] Mistake in string parse; interface is ambiguous and must fix. Is char * a file, or a XML buffer ? --- lib/serialisation/XmlIO.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index c0c45adc..260611a5 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -68,10 +68,10 @@ std::string XmlWriter::XmlString(void) XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("") { pugi::xml_parse_result result; - result = doc_.load_file(xmlstring); + result = doc_.load_string(xmlstring); if ( !result ) { - cerr << "XML error description: char * " << result.description() << " "<< xmlstring << "\n"; - cerr << "XML error offset : char * " << result.offset << " "<<xmlstring <<"\n"; + cerr << "XML error description (from char *): " << result.description() << "\nXML\n"<< xmlstring << "\n"; + cerr << "XML error offset (from char *) " << result.offset << "\nXML\n"<< xmlstring <<"\n"; abort(); } if ( toplev == std::string("") ) { From 0c4ddaea0b8ae49ff75a9c380b710ca225673a48 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 23:31:46 +0100 Subject: [PATCH 27/45] Cleaning up --- .../iterative/ImplicitlyRestartedLanczos.h | 48 +++++++++++-------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 4be2715a..089e7ff3 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -37,6 +37,9 @@ Author: Christoph Lehner <clehner@bnl.gov> namespace Grid { + //////////////////////////////////////////////////////// + // Move following 100 LOC to lattice/Lattice_basis.h + //////////////////////////////////////////////////////// template<class Field> void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k) { @@ -101,7 +104,6 @@ void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, s if (idx[i] != i) { - assert(idx[i] > i); ////////////////////////////////////// // idx[i] is a table of desired sources giving a permutation. // Swap v[i] with v[idx[i]]. @@ -114,8 +116,7 @@ void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, s if (idx[j]==i) break; - assert(j!=idx.size()); - assert(idx[j]==i); + assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i); std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy std::swap(sort_vals[i],sort_vals[idx[i]]); @@ -161,12 +162,6 @@ void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,co } } -enum IRLdiagonalisation { - IRLdiagonaliseWithDSTEGR, - IRLdiagonaliseWithQR, - IRLdiagonaliseWithEigen -}; - ///////////////////////////////////////////////////////////// // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// @@ -177,6 +172,12 @@ template<class Field> class ImplicitlyRestartedLanczosTester virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox); }; +enum IRLdiagonalisation { + IRLdiagonaliseWithDSTEGR, + IRLdiagonaliseWithQR, + IRLdiagonaliseWithEigen +}; + template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field> { public: @@ -242,6 +243,17 @@ class ImplicitlyRestartedLanczos { ///////////////////////// public: + ////////////////////////////////////////////////////////////////// + // PAB: + ////////////////////////////////////////////////////////////////// + // Too many options & knobs. Do we really need orth_period + // What is the theoretical basis & guarantees of betastp ? + // Nstop=Nk viable? + // MinRestart avoidable with new convergence test? + // Could cut to HermOp, HermOpTest, Tester, Nk, Nm, resid, maxiter (+diagonalisation) + // HermOpTest could be eliminated if we dropped the Power method for max eval. + // -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear + ////////////////////////////////////////////////////////////////// ImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, LinearFunction<Field> & HermOpTest, ImplicitlyRestartedLanczosTester<Field> & Tester, @@ -413,16 +425,14 @@ until convergence // sorting ////////////////////////////////// eval2_copy = eval2; - std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>()); - std::cout<<GridLogIRL <<" evals sorted "<<std::endl; const int chunk=8; for(int io=0; io<k2;io+=chunk){ - std::cout<<GridLogIRL << "eval "<< io ; + std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ; for(int ii=0;ii<chunk;ii++){ if ( (io+ii)<k2 ) - std::cout<< " "<< std::setw(10)<< eval2[io+ii]; + std::cout<< " "<< std::setw(12)<< eval2[io+ii]; } std::cout << std::endl; } @@ -431,16 +441,15 @@ until convergence // Implicitly shifted QR transformations ////////////////////////////////// Qt = Eigen::MatrixXd::Identity(Nm,Nm); - std::cout<<GridLogIRL << "QR decompose " << std::endl; for(int ip=k2; ip<Nm; ++ip){ QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); } - std::cout<<GridLogIRL <<"QR decompose done "<<std::endl; + std::cout<<GridLogIRL <<"QR decomposed "<<std::endl; assert(k2<Nm); assert(k2<Nm); assert(k1>0); basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis - std::cout<<GridLogIRL <<"QR rotation done "<<std::endl; + std::cout<<GridLogIRL <<"basisRotated by Qt"<<std::endl; //////////////////////////////////////////////////// // Compressed vector f and beta(k2) @@ -461,7 +470,6 @@ until convergence for(int k=0; k<Nm; ++k){ eval2[k] = eval[k]; lme2[k] = lme[k]; - // std::cout<<GridLogIRL << "eval2[" << k << "] = " << eval2[k] << std::endl; } Qt = Eigen::MatrixXd::Identity(Nm,Nm); diagonalize(eval2,lme2,Nk,Nm,Qt,grid); @@ -509,7 +517,6 @@ until convergence abort(); converged: - { Field B(grid); B.checkerboard = evec[0].checkerboard; basisRotate(evec,Qt,0,Nk,0,Nk,Nm); @@ -583,7 +590,7 @@ until convergence if (k>0 && k % orth_period == 0) { orthogonalize(w,evec,k); // orthonormalise - std::cout<<GridLogIRL << "orthogonalised " <<std::endl; + std::cout<<GridLogIRL << "Orthogonalised " <<std::endl; } if(k < Nm-1) evec[k+1] = w; @@ -617,9 +624,8 @@ until convergence } /////////////////////////////////////////////////////////////////////////// - // File could end here if settle on Eigen ??? + // File could end here if settle on Eigen ??? !!! /////////////////////////////////////////////////////////////////////////// - void QR_decomp(std::vector<RealD>& lmd, // Nm std::vector<RealD>& lme, // Nm int Nk, int Nm, // Nk, Nm From 9ec9850bdb49548238b1cb253c82bfeee3823683 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 26 Oct 2017 23:34:31 +0100 Subject: [PATCH 28/45] 64bit ftello update --- lib/parallelIO/IldgIO.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 1f2b7c90..36ecbd1b 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -224,7 +224,7 @@ class GridLimeReader : public BinaryIO { assert(PayloadSize == file_bytes);// Must match or user error - off_t offset= ftell(File); + uint64_t offset= ftello(File); // std::cout << " ReadLatticeObject from offset "<<offset << std::endl; BinarySimpleMunger<sobj,sobj> munge; BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); @@ -253,16 +253,13 @@ class GridLimeReader : public BinaryIO { while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { // std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl; - uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) { // std::cout << GridLogMessage<< " readLimeObject matches ! " << record_name <<std::endl; - std::vector<char> xmlc(nbytes+1,'\0'); limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); - // std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl; XmlReader RD(&xmlc[0],""); @@ -332,7 +329,7 @@ class GridLimeWriter : public BinaryIO { err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); err=limeWriterCloseRecord(LimeW); assert(err>=0); limeDestroyHeader(h); - // std::cout << " File offset is now"<<ftell(File) << std::endl; + // std::cout << " File offset is now"<<ftello(File) << std::endl; } //////////////////////////////////////////// // Write a generic lattice field and csum @@ -360,18 +357,20 @@ class GridLimeWriter : public BinaryIO { // These are both buffered, so why I think this code is right is as follows. // // i) write record header to FILE *File, telegraphing the size. - // ii) ftell reads the offset from FILE *File . + // ii) ftello reads the offset from FILE *File . // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. // Closes iostream and flushes. // iv) fseek on FILE * to end of this disjoint section. // v) Continue writing scidac record. //////////////////////////////////////////////////////////////////// - off_t offset = ftell(File); + uint64_t offset = ftello(File); // std::cout << " Writing to offset "<<offset << std::endl; std::string format = getFormatString<vobj>(); BinarySimpleMunger<sobj,sobj> munge; BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + // fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl; err=limeWriterCloseRecord(LimeW); assert(err>=0); + //////////////////////////////////////// // Write checksum element, propagaing forward from the BinaryIO // Always pair a checksum with a binary object, and close message @@ -703,8 +702,7 @@ class IldgReader : public GridLimeReader { // Binary data ///////////////////////////////// std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl; - off_t offset= ftell(File); - + uint64_t offset= ftello(File); if ( format == std::string("IEEE64BIG") ) { GaugeSimpleMunger<dobj, sobj> munge; BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); From 7fab183c0eebfd82e006eca2130d809131a36074 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 27 Oct 2017 08:17:49 +0100 Subject: [PATCH 29/45] Better read test --- lib/parallelIO/IldgIO.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 36ecbd1b..b86e250f 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -159,7 +159,7 @@ namespace QCD { uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); if ( scidac_csuma !=scidac_checksuma) return 0; if ( scidac_csumb !=scidac_checksumb) return 0; - return 1; + return 1; } //////////////////////////////////////////////////////////////////////////////////// @@ -237,7 +237,7 @@ class GridLimeReader : public BinaryIO { ///////////////////////////////////////////// // Verify checksums ///////////////////////////////////////////// - scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); + assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1); return; } } From fa04b6d3c233d6057fb5133c8e5627bc2d941aba Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 27 Oct 2017 08:18:29 +0100 Subject: [PATCH 30/45] Finished ? Verifying coarse evec restore --- .../Test_dwf_compressed_lanczos_reorg.cc | 145 +++++++++++++----- 1 file changed, 109 insertions(+), 36 deletions(-) diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index ad1aaa47..42814e2f 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -50,9 +50,13 @@ struct LanczosParams : Serializable { int, MinRes); // Must restart }; -struct CompressedLanczosParams : Serializable { +struct LocalCoherenceLanczosParams : Serializable { public: - GRID_SERIALIZABLE_CLASS_MEMBERS(CompressedLanczosParams, + GRID_SERIALIZABLE_CLASS_MEMBERS(bool, doFine, + bool, doFineRead, + bool, doCoarse, + bool, doCoarseRead, + LocalCoherenceLanczosParams, LanczosParams, FineParams, LanczosParams, CoarseParams, ChebyParams, Smoother, @@ -61,8 +65,7 @@ struct CompressedLanczosParams : Serializable { std::string, config, std::vector < std::complex<double> >, omega, RealD, mass, - RealD, M5 - ); + RealD, M5); }; // Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function @@ -209,7 +212,7 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc // Make serializable Lanczos params //////////////////////////////////////////// template<class Fobj,class CComplex,int nbasis> -class CoarseFineIRL +class LocalCoherenceLanczos { public: typedef iVector<CComplex,nbasis > CoarseSiteVector; @@ -230,7 +233,7 @@ private: std::vector<RealD> evals_coarse; std::vector<CoarseField> evec_coarse; public: - CoarseFineIRL(GridBase *FineGrid, + LocalCoherenceLanczos(GridBase *FineGrid, GridBase *CoarseGrid, LinearOperatorBase<FineField> &FineOp, int checkerboard) : @@ -253,7 +256,7 @@ public: return nn; } - void testFine(void) + void fakeFine(void) { int Nk = nbasis; _Aggregate.subspace.resize(Nk,_FineGrid); @@ -286,6 +289,42 @@ public: write(WR,"evals",evals_fine); } } + + void checkpointFineRestore(std::string evecs_file,std::string evals_file) + { + evals_fine.resize(nbasis); + _Aggregate.subspace.resize(nbasis,_FineGrid); + { + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl; + XmlReader RD(evals_file); + read(RD,"evals",evals_fine); + } + assert(evals_fine.size()==nbasis); + + emptyUserRecord record; + { + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl; + ScidacReader RD ; + RD.open(evecs_file); + for(int k=0;k<nbasis;k++) { + _Aggregate.subspace[k].checkerboard=_checkerboard; + RD.readScidacFieldRecord(_Aggregate.subspace[k],record); + + } + RD.close(); + } + } + void testFine(RealD resid) + { + assert(evals_fine.size() == nbasis); + assert(_Aggregate.subspace.size() == nbasis); + PlainHermOp<FineField> Op(_FineOp); + ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op); + for(int k=0;k<nbasis;k++){ + assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1); + } + } + void checkpointCoarse(std::string evecs_file,std::string evals_file) { int n = evec_coarse.size(); @@ -303,26 +342,48 @@ public: write(WR,"evals",evals_coarse); } } - - void checkpointFineRestore(std::string evecs_file,std::string evals_file) + void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) { + std::cout << " resizing to " << nvec<< std::endl; + evals_coarse.resize(nvec); + evec_coarse.resize(nvec,_CoarseGrid); { + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl; XmlReader RD(evals_file); - read(RD,"evals",evals_fine); + read(RD,"evals",evals_coarse); } - assert(evals_fine.size()==nbasis); + std::cout << " sizes are " << evals_coarse.size()<<" / " <<nvec<< std::endl; + assert(evals_coarse.size()==nvec); emptyUserRecord record; { + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl; ScidacReader RD ; RD.open(evecs_file); for(int k=0;k<nbasis;k++) { - RD.readScidacFieldRecord(_Aggregate.subspace[k],record); + // evec_coarse[k].checkerboard=_checkerboard; ??? + RD.readScidacFieldRecord(evec_coarse[k],record); } RD.close(); } } + void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax) + { + assert(evals_fine.size() == nbasis); + assert(_Aggregate.subspace.size() == nbasis); + ////////////////////////////////////////////////////////////////////////////////////////////////// + // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL + ////////////////////////////////////////////////////////////////////////////////////////////////// + Chebyshev<FineField> ChebySmooth(cheby_smooth); + ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate); + ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + + for(int k=0;k<evec_coarse.size();k++){ + assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1); + } + } + void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid, RealD MaxIt, RealD betastp, int MinRes) { @@ -370,7 +431,8 @@ public: int Nconv=0; IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); assert(Nconv>=Nstop); - + evals_coarse.resize(Nstop); + evec_coarse.resize (Nstop,_CoarseGrid); for (int i=0;i<Nstop;i++){ std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl; } @@ -383,7 +445,7 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); GridLogIRL.TimingMode(1); - CompressedLanczosParams Params; + LocalCoherenceLanczosParams Params; { Params.omega.resize(10); Params.blockSize.resize(5); @@ -393,7 +455,7 @@ int main (int argc, char ** argv) { } { - XmlReader reader("./Params.xml"); + XmlReader reader(std::string("./Params.xml")); read(reader, "Params", Params); } @@ -454,39 +516,50 @@ int main (int argc, char ** argv) { const int nbasis= 60; assert(nbasis==Ns1); - CoarseFineIRL<vSpinColourVector,vTComplex,nbasis> IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd); - std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl; + LocalCoherenceLanczos<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); + std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; - int do_fine = 1; - int do_coarse = 0; - int do_smooth = 0; - if ( do_fine ) { + if ( Params.doCoarse ) { + assert( (Params.doFine)||(Params.doFineRead)); + } + + if ( Params.doFine ) { std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl; - IRL.calcFine(fine.Cheby, + _LocalCoherenceLanczos.calcFine(fine.Cheby, fine.Nstop,fine.Nk,fine.Nm, fine.resid,fine.MaxIt, fine.betastp,fine.MinRes); std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl; - IRL.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml")); - } else { - // IRL.testFine(); - IRL.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml")); + _LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml")); + _LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check } - - std::cout << GridLogMessage << "Orthogonalising " << nbasis<<" Nm "<<Nm2<< std::endl; - IRL.Orthogonalise(); - std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl; - IRL.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol, - coarse.Nstop, coarse.Nk,coarse.Nm, - coarse.resid, coarse.MaxIt, - coarse.betastp,coarse.MinRes); + if ( Params.doFineRead ) { + _LocalCoherenceLanczos.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml")); + _LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check + } + + if ( Params.doCoarse ) { + std::cout << GridLogMessage << "Orthogonalising " << nbasis<<" Nm "<<Nm2<< std::endl; + _LocalCoherenceLanczos.Orthogonalise(); + + std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl; + _LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol, + coarse.Nstop, coarse.Nk,coarse.Nm, + coarse.resid, coarse.MaxIt, + coarse.betastp,coarse.MinRes); - std::cout << GridLogIRL<<"Checkpointing coarse evecs"<<std::endl; - IRL.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml")); + std::cout << GridLogIRL<<"Checkpointing coarse evecs"<<std::endl; + _LocalCoherenceLanczos.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml")); + } + if ( Params.doCoarseRead ) { + // Verify we can reread ??? + _LocalCoherenceLanczos.checkpointCoarseRestore(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"),coarse.Nstop); + _LocalCoherenceLanczos.testCoarse(coarse.resid*100.0,Params.Smoother,Params.coarse_relax_tol); // Coarse check + } Grid_finalize(); } From 32a52d7583a999b2d9924c4aabcc553934d5e89d Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 27 Oct 2017 09:04:31 +0100 Subject: [PATCH 31/45] Move the local coherence lanczos into algorithms. Keep the I/O in the tester. Other people can copy this method to write other I/O formats. --- .../iterative/LocalCoherenceLanczos.h | 348 ++++++++++++++ .../Test_dwf_compressed_lanczos_reorg.cc | 436 +++--------------- 2 files changed, 410 insertions(+), 374 deletions(-) create mode 100644 lib/algorithms/iterative/LocalCoherenceLanczos.h diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h new file mode 100644 index 00000000..6b8fe62c --- /dev/null +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -0,0 +1,348 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h + + Copyright (C) 2015 + +Author: Christoph Lehner <clehner@bnl.gov> +Author: paboyle <paboyle@ph.ed.ac.uk> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_LOCAL_COHERENCE_IRL_H +#define GRID_LOCAL_COHERENCE_IRL_H +namespace Grid { +struct LanczosParams : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, + ChebyParams, Cheby,/*Chebyshev*/ + int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/ + int, Nk, /*Vecs in Lanczos seek converge*/ + int, Nm, /*Total vecs in Lanczos include restart*/ + RealD, resid, /*residual*/ + int, MaxIt, + RealD, betastp, /* ? */ + int, MinRes); // Must restart +}; + +struct LocalCoherenceLanczosParams : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, + bool, doFine, + bool, doFineRead, + bool, doCoarse, + bool, doCoarseRead, + LanczosParams, FineParams, + LanczosParams, CoarseParams, + ChebyParams, Smoother, + RealD , coarse_relax_tol, + std::vector<int>, blockSize, + std::string, config, + std::vector < std::complex<double> >, omega, + RealD, mass, + RealD, M5); +}; + +// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function +template<class Fobj,class CComplex,int nbasis> +class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { +public: + typedef iVector<CComplex,nbasis > CoarseSiteVector; + typedef Lattice<CoarseSiteVector> CoarseField; + typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field + typedef Lattice<Fobj> FineField; + + LinearOperatorBase<FineField> &_Linop; + Aggregation<Fobj,CComplex,nbasis> &_Aggregate; + + ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) : + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + FineField fin(FineGrid); + FineField fout(FineGrid); + + _Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl; + _Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl; + _Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl; + } +}; + +template<class Fobj,class CComplex,int nbasis> +class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { +public: + typedef iVector<CComplex,nbasis > CoarseSiteVector; + typedef Lattice<CoarseSiteVector> CoarseField; + typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field + typedef Lattice<Fobj> FineField; + + + OperatorFunction<FineField> & _poly; + LinearOperatorBase<FineField> &_Linop; + Aggregation<Fobj,CComplex,nbasis> &_Aggregate; + + ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop, + Aggregation<Fobj,CComplex,nbasis> &aggregate) : + _poly(poly), + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + + FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; + FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; + + _Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl; + _poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl; + _Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl; + } +}; + +template<class Fobj,class CComplex,int nbasis> +class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > > +{ + public: + typedef iVector<CComplex,nbasis > CoarseSiteVector; + typedef Lattice<CoarseSiteVector> CoarseField; + typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field + typedef Lattice<Fobj> FineField; + + LinearFunction<CoarseField> & _Poly; + OperatorFunction<FineField> & _smoother; + LinearOperatorBase<FineField> &_Linop; + Aggregation<Fobj,CComplex,nbasis> &_Aggregate; + RealD _coarse_relax_tol; + ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly, + OperatorFunction<FineField> &smoother, + LinearOperatorBase<FineField> &Linop, + Aggregation<Fobj,CComplex,nbasis> &Aggregate, + RealD coarse_relax_tol=5.0e3) + : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; + + int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) + { + CoarseField v(B); + RealD eval_poly = eval; + // Apply operator + _Poly(B,v); + + RealD vnum = real(innerProduct(B,v)); // HermOp. + RealD vden = norm2(B); + RealD vv0 = norm2(v); + eval = vnum/vden; + v -= eval*B; + + RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); + + std::cout.precision(13); + std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] " + <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" + <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv + <<std::endl; + + int conv=0; + if( (vv<eresid*eresid) ) conv = 1; + return conv; + } + int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) + { + GridBase *FineGrid = _Aggregate.FineGrid; + + int checkerboard = _Aggregate.checkerboard; + + FineField fB(FineGrid);fB.checkerboard =checkerboard; + FineField fv(FineGrid);fv.checkerboard =checkerboard; + + _Aggregate.PromoteFromSubspace(B,fv); + _smoother(_Linop,fv,fB); + + RealD eval_poly = eval; + _Linop.HermOp(fB,fv); + + RealD vnum = real(innerProduct(fB,fv)); // HermOp. + RealD vden = norm2(fB); + RealD vv0 = norm2(fv); + eval = vnum/vden; + fv -= eval*fB; + RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0); + + std::cout.precision(13); + std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] " + <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" + <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv + <<std::endl; + if ( j > nbasis ) eresid = eresid*_coarse_relax_tol; + if( (vv<eresid*eresid) ) return 1; + return 0; + } +}; + +//////////////////////////////////////////// +// Make serializable Lanczos params +//////////////////////////////////////////// +template<class Fobj,class CComplex,int nbasis> +class LocalCoherenceLanczos +{ +public: + typedef iVector<CComplex,nbasis > CoarseSiteVector; + typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field + typedef Lattice<CoarseSiteVector> CoarseField; + typedef Lattice<Fobj> FineField; + +protected: + GridBase *_CoarseGrid; + GridBase *_FineGrid; + int _checkerboard; + LinearOperatorBase<FineField> & _FineOp; + + // FIXME replace Aggregation with vector of fine; the code reuse is too small for + // the hassle and complexity of cross coupling. + Aggregation<Fobj,CComplex,nbasis> _Aggregate; + std::vector<RealD> evals_fine; + std::vector<RealD> evals_coarse; + std::vector<CoarseField> evec_coarse; +public: + LocalCoherenceLanczos(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase<FineField> &FineOp, + int checkerboard) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _Aggregate(CoarseGrid,FineGrid,checkerboard), + _FineOp(FineOp), + _checkerboard(checkerboard) + { + evals_fine.resize(0); + evals_coarse.resize(0); + }; + void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } + + template<typename T> static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = ::sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void fakeFine(void) + { + int Nk = nbasis; + _Aggregate.subspace.resize(Nk,_FineGrid); + _Aggregate.subspace[0]=1.0; + _Aggregate.subspace[0].checkerboard=_checkerboard; + normalise(_Aggregate.subspace[0]); + PlainHermOp<FineField> Op(_FineOp); + for(int k=1;k<Nk;k++){ + _Aggregate.subspace[k].checkerboard=_checkerboard; + Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]); + normalise(_Aggregate.subspace[k]); + } + } + + void testFine(RealD resid) + { + assert(evals_fine.size() == nbasis); + assert(_Aggregate.subspace.size() == nbasis); + PlainHermOp<FineField> Op(_FineOp); + ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op); + for(int k=0;k<nbasis;k++){ + assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1); + } + } + + void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax) + { + assert(evals_fine.size() == nbasis); + assert(_Aggregate.subspace.size() == nbasis); + ////////////////////////////////////////////////////////////////////////////////////////////////// + // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL + ////////////////////////////////////////////////////////////////////////////////////////////////// + Chebyshev<FineField> ChebySmooth(cheby_smooth); + ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate); + ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + + for(int k=0;k<evec_coarse.size();k++){ + assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1); + } + } + + void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid, + RealD MaxIt, RealD betastp, int MinRes) + { + assert(nbasis<=Nm); + Chebyshev<FineField> Cheby(cheby_parms); + FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp); + PlainHermOp<FineField> Op(_FineOp); + + evals_fine.resize(Nm); + _Aggregate.subspace.resize(Nm,_FineGrid); + + ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); + + FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; + + int Nconv; + IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); + + // Shrink down to number saved + assert(Nstop>=nbasis); + assert(Nconv>=nbasis); + evals_fine.resize(nbasis); + _Aggregate.subspace.resize(nbasis,_FineGrid); + } + void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, + int Nstop, int Nk, int Nm,RealD resid, + RealD MaxIt, RealD betastp, int MinRes) + { + Chebyshev<FineField> Cheby(cheby_op); + ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate); + ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate); + ////////////////////////////////////////////////////////////////////////////////////////////////// + // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL + ////////////////////////////////////////////////////////////////////////////////////////////////// + + Chebyshev<FineField> ChebySmooth(cheby_smooth); + ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); + + evals_coarse.resize(Nm); + evec_coarse.resize(Nm,_CoarseGrid); + + CoarseField src(_CoarseGrid); src=1.0; + + ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); + int Nconv=0; + IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); + assert(Nconv>=Nstop); + evals_coarse.resize(Nstop); + evec_coarse.resize (Nstop,_CoarseGrid); + for (int i=0;i<Nstop;i++){ + std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl; + } + } +}; + +} +#endif diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index 42814e2f..bb6441e8 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -32,414 +32,102 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> */ #include <Grid/Grid.h> #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> +#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h> using namespace std; using namespace Grid; using namespace Grid::QCD; -struct LanczosParams : Serializable { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, - ChebyParams, Cheby,/*Chebyshev*/ - int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/ - int, Nk, /*Vecs in Lanczos seek converge*/ - int, Nm, /*Total vecs in Lanczos include restart*/ - RealD, resid, /*residual*/ - int, MaxIt, - RealD, betastp, /* ? */ - int, MinRes); // Must restart -}; - -struct LocalCoherenceLanczosParams : Serializable { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(bool, doFine, - bool, doFineRead, - bool, doCoarse, - bool, doCoarseRead, - LocalCoherenceLanczosParams, - LanczosParams, FineParams, - LanczosParams, CoarseParams, - ChebyParams, Smoother, - RealD , coarse_relax_tol, - std::vector<int>, blockSize, - std::string, config, - std::vector < std::complex<double> >, omega, - RealD, mass, - RealD, M5); -}; - -// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function template<class Fobj,class CComplex,int nbasis> -class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { +class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis> +{ public: typedef iVector<CComplex,nbasis > CoarseSiteVector; typedef Lattice<CoarseSiteVector> CoarseField; typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field typedef Lattice<Fobj> FineField; - LinearOperatorBase<FineField> &_Linop; - Aggregation<Fobj,CComplex,nbasis> &_Aggregate; - - ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) : - _Linop(linop), - _Aggregate(aggregate) { }; - - void operator()(const CoarseField& in, CoarseField& out) { - - GridBase *FineGrid = _Aggregate.FineGrid; - FineField fin(FineGrid); - FineField fout(FineGrid); - - _Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl; - _Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl; - _Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl; - } -}; - -template<class Fobj,class CComplex,int nbasis> -class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > { -public: - typedef iVector<CComplex,nbasis > CoarseSiteVector; - typedef Lattice<CoarseSiteVector> CoarseField; - typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field - typedef Lattice<Fobj> FineField; - - - OperatorFunction<FineField> & _poly; - LinearOperatorBase<FineField> &_Linop; - Aggregation<Fobj,CComplex,nbasis> &_Aggregate; - - ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop, - Aggregation<Fobj,CComplex,nbasis> &aggregate) : - _poly(poly), - _Linop(linop), - _Aggregate(aggregate) { }; - - void operator()(const CoarseField& in, CoarseField& out) { - - GridBase *FineGrid = _Aggregate.FineGrid; - - FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; - FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; - - _Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl; - _poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl; - _Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl; - } -}; - -template<class Fobj,class CComplex,int nbasis> -class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > > -{ - public: - typedef iVector<CComplex,nbasis > CoarseSiteVector; - typedef Lattice<CoarseSiteVector> CoarseField; - typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field - typedef Lattice<Fobj> FineField; - - LinearFunction<CoarseField> & _Poly; - OperatorFunction<FineField> & _smoother; - LinearOperatorBase<FineField> &_Linop; - Aggregation<Fobj,CComplex,nbasis> &_Aggregate; - RealD _coarse_relax_tol; - ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly, - OperatorFunction<FineField> &smoother, - LinearOperatorBase<FineField> &Linop, - Aggregation<Fobj,CComplex,nbasis> &Aggregate, - RealD coarse_relax_tol=5.0e3) - : _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { }; - - int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) - { - CoarseField v(B); - RealD eval_poly = eval; - // Apply operator - _Poly(B,v); - - RealD vnum = real(innerProduct(B,v)); // HermOp. - RealD vden = norm2(B); - RealD vv0 = norm2(v); - eval = vnum/vden; - v -= eval*B; - - RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0); - - std::cout.precision(13); - std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] " - <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" - <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv - <<std::endl; - - int conv=0; - if( (vv<eresid*eresid) ) conv = 1; - return conv; - } - int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) - { - GridBase *FineGrid = _Aggregate.FineGrid; - - int checkerboard = _Aggregate.checkerboard; - - FineField fB(FineGrid);fB.checkerboard =checkerboard; - FineField fv(FineGrid);fv.checkerboard =checkerboard; - - _Aggregate.PromoteFromSubspace(B,fv); - _smoother(_Linop,fv,fB); - - RealD eval_poly = eval; - _Linop.HermOp(fB,fv); - - RealD vnum = real(innerProduct(fB,fv)); // HermOp. - RealD vden = norm2(fB); - RealD vv0 = norm2(fv); - eval = vnum/vden; - fv -= eval*fB; - RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0); - - std::cout.precision(13); - std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] " - <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")" - <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv - <<std::endl; - if ( j > nbasis ) eresid = eresid*_coarse_relax_tol; - if( (vv<eresid*eresid) ) return 1; - return 0; - } -}; - - -//////////////////////////////////////////// -// Make serializable Lanczos params -//////////////////////////////////////////// -template<class Fobj,class CComplex,int nbasis> -class LocalCoherenceLanczos -{ -public: - typedef iVector<CComplex,nbasis > CoarseSiteVector; - typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field - typedef Lattice<CoarseSiteVector> CoarseField; - typedef Lattice<Fobj> FineField; - -private: - GridBase *_CoarseGrid; - GridBase *_FineGrid; - int _checkerboard; - LinearOperatorBase<FineField> & _FineOp; - - // FIXME replace Aggregation with vector of fine; the code reuse is too small for - // the hassle and complexity of cross coupling. - Aggregation<Fobj,CComplex,nbasis> _Aggregate; - std::vector<RealD> evals_fine; - std::vector<RealD> evals_coarse; - std::vector<CoarseField> evec_coarse; -public: - LocalCoherenceLanczos(GridBase *FineGrid, - GridBase *CoarseGrid, - LinearOperatorBase<FineField> &FineOp, - int checkerboard) : - _CoarseGrid(CoarseGrid), - _FineGrid(FineGrid), - _Aggregate(CoarseGrid,FineGrid,checkerboard), - _FineOp(FineOp), - _checkerboard(checkerboard) - { - evals_fine.resize(0); - evals_coarse.resize(0); - }; - void Orthogonalise(void ) { _Aggregate.Orthogonalise(); } - - template<typename T> static RealD normalise(T& v) - { - RealD nn = norm2(v); - nn = ::sqrt(nn); - v = v * (1.0/nn); - return nn; - } - - void fakeFine(void) - { - int Nk = nbasis; - _Aggregate.subspace.resize(Nk,_FineGrid); - _Aggregate.subspace[0]=1.0; - _Aggregate.subspace[0].checkerboard=_checkerboard; - normalise(_Aggregate.subspace[0]); - PlainHermOp<FineField> Op(_FineOp); - for(int k=1;k<Nk;k++){ - _Aggregate.subspace[k].checkerboard=_checkerboard; - Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]); - normalise(_Aggregate.subspace[k]); - } - } - + LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid, + LinearOperatorBase<FineField> &FineOp, + int checkerboard) + // Base constructor + : LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard) + {}; void checkpointFine(std::string evecs_file,std::string evals_file) { - assert(_Aggregate.subspace.size()==nbasis); + assert(this->_Aggregate.subspace.size()==nbasis); emptyUserRecord record; - { - ScidacWriter WR; - WR.open(evecs_file); - for(int k=0;k<nbasis;k++) { - WR.writeScidacFieldRecord(_Aggregate.subspace[k],record); - } - WR.close(); - } - { - XmlWriter WR(evals_file); - write(WR,"evals",evals_fine); + Grid::QCD::ScidacWriter WR; + WR.open(evecs_file); + for(int k=0;k<nbasis;k++) { + WR.writeScidacFieldRecord(this->_Aggregate.subspace[k],record); } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_fine); } void checkpointFineRestore(std::string evecs_file,std::string evals_file) { - evals_fine.resize(nbasis); - _Aggregate.subspace.resize(nbasis,_FineGrid); - { - std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl; - XmlReader RD(evals_file); - read(RD,"evals",evals_fine); - } - assert(evals_fine.size()==nbasis); - + this->evals_fine.resize(nbasis); + this->_Aggregate.subspace.resize(nbasis,this->_FineGrid); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl; + XmlReader RDx(evals_file); + read(RDx,"evals",this->evals_fine); + + assert(this->evals_fine.size()==nbasis); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl; emptyUserRecord record; - { - std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl; - ScidacReader RD ; - RD.open(evecs_file); - for(int k=0;k<nbasis;k++) { - _Aggregate.subspace[k].checkerboard=_checkerboard; - RD.readScidacFieldRecord(_Aggregate.subspace[k],record); - - } - RD.close(); - } - } - void testFine(RealD resid) - { - assert(evals_fine.size() == nbasis); - assert(_Aggregate.subspace.size() == nbasis); - PlainHermOp<FineField> Op(_FineOp); - ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op); - for(int k=0;k<nbasis;k++){ - assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1); + Grid::QCD::ScidacReader RD ; + RD.open(evecs_file); + for(int k=0;k<nbasis;k++) { + this->_Aggregate.subspace[k].checkerboard=this->_checkerboard; + RD.readScidacFieldRecord(this->_Aggregate.subspace[k],record); + } + RD.close(); } void checkpointCoarse(std::string evecs_file,std::string evals_file) { - int n = evec_coarse.size(); + int n = this->evec_coarse.size(); emptyUserRecord record; - { - ScidacWriter WR; - WR.open(evecs_file); - for(int k=0;k<n;k++) { - WR.writeScidacFieldRecord(evec_coarse[k],record); - } - WR.close(); - } - { - XmlWriter WR(evals_file); - write(WR,"evals",evals_coarse); + Grid::QCD::ScidacWriter WR; + WR.open(evecs_file); + for(int k=0;k<n;k++) { + WR.writeScidacFieldRecord(this->evec_coarse[k],record); } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_coarse); } + void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) { std::cout << " resizing to " << nvec<< std::endl; - evals_coarse.resize(nvec); - evec_coarse.resize(nvec,_CoarseGrid); - { - std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl; - XmlReader RD(evals_file); - read(RD,"evals",evals_coarse); - } - std::cout << " sizes are " << evals_coarse.size()<<" / " <<nvec<< std::endl; - assert(evals_coarse.size()==nvec); + this->evals_coarse.resize(nvec); + this->evec_coarse.resize(nvec,this->_CoarseGrid); + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl; + XmlReader RDx(evals_file); + read(RDx,"evals",this->evals_coarse); + assert(this->evals_coarse.size()==nvec); emptyUserRecord record; - { - std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl; - ScidacReader RD ; - RD.open(evecs_file); - for(int k=0;k<nbasis;k++) { - // evec_coarse[k].checkerboard=_checkerboard; ??? - RD.readScidacFieldRecord(evec_coarse[k],record); - } - RD.close(); - } - } - - void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax) - { - assert(evals_fine.size() == nbasis); - assert(_Aggregate.subspace.size() == nbasis); - ////////////////////////////////////////////////////////////////////////////////////////////////// - // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL - ////////////////////////////////////////////////////////////////////////////////////////////////// - Chebyshev<FineField> ChebySmooth(cheby_smooth); - ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate); - ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); - - for(int k=0;k<evec_coarse.size();k++){ - assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1); - } - } - - void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid, - RealD MaxIt, RealD betastp, int MinRes) - { - assert(nbasis<=Nm); - Chebyshev<FineField> Cheby(cheby_parms); - FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp); - PlainHermOp<FineField> Op(_FineOp); - - evals_fine.resize(Nm); - _Aggregate.subspace.resize(Nm,_FineGrid); - - ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); - - FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; - - int Nconv; - IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false); - - // Shrink down to number saved - assert(Nstop>=nbasis); - assert(Nconv>=nbasis); - evals_fine.resize(nbasis); - _Aggregate.subspace.resize(nbasis,_FineGrid); - } - void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax, - int Nstop, int Nk, int Nm,RealD resid, - RealD MaxIt, RealD betastp, int MinRes) - { - Chebyshev<FineField> Cheby(cheby_op); - ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate); - ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate); - ////////////////////////////////////////////////////////////////////////////////////////////////// - // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL - ////////////////////////////////////////////////////////////////////////////////////////////////// - - Chebyshev<FineField> ChebySmooth(cheby_smooth); - ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); - - evals_coarse.resize(Nm); - evec_coarse.resize(Nm,_CoarseGrid); - - CoarseField src(_CoarseGrid); src=1.0; - - ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); - int Nconv=0; - IRL.calc(evals_coarse,evec_coarse,src,Nconv,false); - assert(Nconv>=Nstop); - evals_coarse.resize(Nstop); - evec_coarse.resize (Nstop,_CoarseGrid); - for (int i=0;i<Nstop;i++){ - std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl; + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl; + Grid::QCD::ScidacReader RD ; + RD.open(evecs_file); + for(int k=0;k<nbasis;k++) { + RD.readScidacFieldRecord(this->evec_coarse[k],record); } + RD.close(); } }; - int main (int argc, char ** argv) { Grid_init(&argc,&argv); @@ -465,7 +153,9 @@ int main (int argc, char ** argv) { std::vector<int> blockSize = Params.blockSize; // Grids - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); @@ -516,12 +206,10 @@ int main (int argc, char ** argv) { const int nbasis= 60; assert(nbasis==Ns1); - LocalCoherenceLanczos<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); + LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5rb,HermOp,Odd); std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; - if ( Params.doCoarse ) { - assert( (Params.doFine)||(Params.doFineRead)); - } + assert( (Params.doFine)||(Params.doFineRead)); if ( Params.doFine ) { std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl; From f96c800d256e90421d8572cc03ef6cf919312531 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 27 Oct 2017 09:43:22 +0100 Subject: [PATCH 32/45] Passes reload of coarse basis --- lib/algorithms/iterative/LocalCoherenceLanczos.h | 6 +++++- tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h index 6b8fe62c..d5d1bbc2 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -285,7 +285,11 @@ public: ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax); for(int k=0;k<evec_coarse.size();k++){ - assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1); + if ( k < nbasis ) { + assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1); + } else { + assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1); + } } } diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index bb6441e8..0824cfa4 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -221,16 +221,17 @@ int main (int argc, char ** argv) { std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl; _LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml")); _LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check + _LocalCoherenceLanczos.Orthogonalise(); } if ( Params.doFineRead ) { _LocalCoherenceLanczos.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml")); _LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check + _LocalCoherenceLanczos.Orthogonalise(); } if ( Params.doCoarse ) { std::cout << GridLogMessage << "Orthogonalising " << nbasis<<" Nm "<<Nm2<< std::endl; - _LocalCoherenceLanczos.Orthogonalise(); std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl; _LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol, From aa66f41c69595df50d74dca2b52930c24d96f47e Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Fri, 27 Oct 2017 10:29:34 +0100 Subject: [PATCH 33/45] Bug fix in the coarse restore... Think this is nearly there --- tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index 0824cfa4..4c702a33 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -109,7 +109,7 @@ public: void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) { - std::cout << " resizing to " << nvec<< std::endl; + std::cout << "resizing coarse vecs to " << nvec<< std::endl; this->evals_coarse.resize(nvec); this->evec_coarse.resize(nvec,this->_CoarseGrid); std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl; @@ -121,7 +121,7 @@ public: std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl; Grid::QCD::ScidacReader RD ; RD.open(evecs_file); - for(int k=0;k<nbasis;k++) { + for(int k=0;k<nvec;k++) { RD.readScidacFieldRecord(this->evec_coarse[k],record); } RD.close(); From 689323f4eec85b159d82fe4b2b7097ff4312c70c Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:03:15 +0000 Subject: [PATCH 34/45] Reverse dim ordering lexico support --- lib/util/Lexicographic.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/util/Lexicographic.h b/lib/util/Lexicographic.h index b922dba5..f5c55b74 100644 --- a/lib/util/Lexicographic.h +++ b/lib/util/Lexicographic.h @@ -26,6 +26,25 @@ namespace Grid{ } } + static inline void IndexFromCoorReversed (const std::vector<int>& coor,int &index,const std::vector<int> &dims){ + int nd=dims.size(); + int stride=1; + index=0; + for(int d=nd-1;d>=0;d--){ + index = index+stride*coor[d]; + stride=stride*dims[d]; + } + } + static inline void CoorFromIndexReversed (std::vector<int>& coor,int index,const std::vector<int> &dims){ + int nd= dims.size(); + coor.resize(nd); + for(int d=nd-1;d>=0;d--){ + coor[d] = index % dims[d]; + index = index / dims[d]; + } + } + + }; } From 4a699b4da340280d0502fcaab6d31b598e924f93 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:04:14 +0000 Subject: [PATCH 35/45] New rank can be found out --- lib/cartesian/Cartesian_base.h | 9 +++++++-- lib/cartesian/Cartesian_full.h | 11 +++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 6aa0e3c7..acc870de 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -44,13 +44,18 @@ namespace Grid{ class GridBase : public CartesianCommunicator , public GridThread { public: - + int dummy; // Give Lattice access template<class object> friend class Lattice; GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; GridBase(const std::vector<int> & processor_grid, - const CartesianCommunicator &parent) : CartesianCommunicator(processor_grid,parent) {}; + const CartesianCommunicator &parent, + int &split_rank) + : CartesianCommunicator(processor_grid,parent,split_rank) {}; + GridBase(const std::vector<int> & processor_grid, + const CartesianCommunicator &parent) + : CartesianCommunicator(processor_grid,parent,dummy) {}; virtual ~GridBase() = default; diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index c7ea68c9..9273abf3 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -38,7 +38,7 @@ namespace Grid{ class GridCartesian: public GridBase { public: - + int dummy; virtual int CheckerBoardFromOindexTable (int Oindex) { return 0; } @@ -67,7 +67,14 @@ public: GridCartesian(const std::vector<int> &dimensions, const std::vector<int> &simd_layout, const std::vector<int> &processor_grid, - const GridCartesian &parent) : GridBase(processor_grid,parent) + const GridCartesian &parent) : GridBase(processor_grid,parent,dummy) + { + Init(dimensions,simd_layout,processor_grid); + } + GridCartesian(const std::vector<int> &dimensions, + const std::vector<int> &simd_layout, + const std::vector<int> &processor_grid, + const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank) { Init(dimensions,simd_layout,processor_grid); } From fe4d9b003ca9c38ff6ec15e7445c22b0f4a72ade Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:04:47 +0000 Subject: [PATCH 36/45] More digits --- lib/algorithms/iterative/ConjugateGradient.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index 5c968e04..0d4e51c7 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -78,12 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> { cp = a; ssq = norm2(src); - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: src " << ssq << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mp " << d << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mmp " << b << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: cp,r " << cp << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: p " << a << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: src " << ssq << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mp " << d << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mmp " << b << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: cp,r " << cp << std::endl; + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: p " << a << std::endl; RealD rsq = Tolerance * Tolerance * ssq; @@ -92,7 +92,7 @@ class ConjugateGradient : public OperatorFunction<Field> { return; } - std::cout << GridLogIterative << std::setprecision(4) + std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; GridStopWatch LinalgTimer; From 5bf42e1e150cb0e9116e427653955cb4398b1326 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:05:21 +0000 Subject: [PATCH 37/45] Update --- tests/solver/Test_dwf_hdcr.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/solver/Test_dwf_hdcr.cc b/tests/solver/Test_dwf_hdcr.cc index c553ba0a..b3373238 100644 --- a/tests/solver/Test_dwf_hdcr.cc +++ b/tests/solver/Test_dwf_hdcr.cc @@ -555,13 +555,13 @@ int main (int argc, char ** argv) std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl; MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf); - Subspace Aggregates(Coarse5d,FGrid); + Subspace Aggregates(Coarse5d,FGrid,0); // Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis); assert ( (nbasis & 0x1)==0); int nb=nbasis/2; std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl; - // Aggregates.CreateSubspace(RNG5,HermDefOp,nb); - Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb); + Aggregates.CreateSubspace(RNG5,HermDefOp,nb); + // Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb); for(int n=0;n<nb;n++){ G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]); std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl; From 501fa1614a0de6a6410c606142bfc458d13b488f Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:16:12 +0000 Subject: [PATCH 38/45] Communicator updates for split grid --- lib/communicator/Communicator_base.cc | 63 +++++++++++++++++++-------- lib/communicator/Communicator_base.h | 2 +- lib/communicator/Communicator_mpi.cc | 3 +- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index ce9a3cf0..a72c75fe 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -97,9 +97,9 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) } -#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) +#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) -CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent) +CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank) { _ndimension = processors.size(); assert(_ndimension = parent._ndimension); @@ -124,33 +124,51 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors, for(int d=0;d<_ndimension;d++){ ccoor[d] = parent._processor_coor[d] % processors[d]; scoor[d] = parent._processor_coor[d] / processors[d]; - ssize[d] = parent._processors[d]/ processors[d]; + ssize[d] = parent._processors[d] / processors[d]; } - int crank,srank; // rank within subcomm ; rank of subcomm within blocks of subcomms - Lexicographic::IndexFromCoor(ccoor,crank,processors); - Lexicographic::IndexFromCoor(scoor,srank,ssize); + int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms + // Mpi uses the reverse Lexico convention to us + Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); + Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); MPI_Comm comm_split; if ( Nchild > 1 ) { - // std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl; - // std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] "; - // for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " "; - // std::cout<<std::endl; + /* + std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl; + std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] "; + for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " "; + std::cout<<std::endl; - // std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] "; - // for(int d=0;d<processors.size();d++) std::cout << processors[d] << " "; - // std::cout<<std::endl; + std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] "; + for(int d=0;d<processors.size();d++) std::cout << processors[d] << " "; + std::cout<<std::endl; + + std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"] "; + for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " "; + std::cout<<std::endl; + + std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] "; + for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " "; + std::cout<<std::endl; + + std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"] "; + for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " "; + std::cout<<std::endl; + */ int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split); assert(ierr==0); ////////////////////////////////////////////////////////////////////////////////////////////////////// // Declare victory ////////////////////////////////////////////////////////////////////////////////////////////////////// - // std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into " - // << Nchild <<" communicators with " << childsize << " ranks"<<std::endl; + /* + std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into " + << Nchild <<" communicators with " << childsize << " ranks"<<std::endl; + */ } else { comm_split=parent.communicator; + srank = 0; } ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -163,9 +181,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors, ////////////////////////////////////////////////////////////////////////////////////////////////////// void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base) { - // if ( communicator_base != communicator_world ) { - // std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl; - // } _ndimension = processors.size(); _processor_coor.resize(_ndimension); @@ -179,10 +194,20 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc } std::vector<int> periodic(_ndimension,1); - MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator); + MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); MPI_Comm_rank(communicator,&_processor); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); + if ( communicator_base != communicator_world ) { + std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl; + + std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] "; + for(int d=0;d<_processors.size();d++){ + std::cout << _processor_coor[d]<<" "; + } + std::cout << std::endl; + } + int Size; MPI_Comm_size(communicator,&Size); diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index ff054497..4374ac93 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -153,7 +153,7 @@ class CartesianCommunicator { // Constructors to sub-divide a parent communicator // and default to comm world //////////////////////////////////////////////// - CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent); + CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank); CartesianCommunicator(const std::vector<int> &pdimensions_in); virtual ~CartesianCommunicator(); diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index ef612f98..5593aa8b 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -205,7 +205,8 @@ void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words, // Split the communicator row[dim] = _processors[dim]; - CartesianCommunicator Comm(row,*this); + int me; + CartesianCommunicator Comm(row,*this,me); Comm.AllToAll(in,out,words,bytes); } void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) From a7f72eb9946d782e48fe315be066ca95b5c097b6 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:22:06 +0000 Subject: [PATCH 39/45] SHaking out --- lib/lattice/Lattice_transfer.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 962cdeb1..1b09217b 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -757,6 +757,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ // NB: Easiest to programme if keep in lex order. // ///////////////////////////////////////////////////////// + template<class Vobj> void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split) { @@ -805,6 +806,7 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split) std::vector<Sobj> tmpdata(sz); std::vector<Sobj> alldata(sz); std::vector<Sobj> scalardata(lsites); + for(int v=0;v<nvector;v++){ unvectorizeToLexOrdArray(scalardata,full[v]); parallel_for(int site=0;site<lsites;site++){ @@ -816,18 +818,23 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split) std::vector<int> ldims = full_grid->_ldimensions; std::vector<int> lcoor(ndim); - for(int d=0;d<ndim;d++){ + for(int d=ndim-1;d>=0;d--){ if ( ratio[d] != 1 ) { full_grid ->AllToAll(d,alldata,tmpdata); - + // std::cout << GridLogMessage << "Grid_split: dim " <<d<<" ratio "<<ratio[d]<<" nvec "<<nvec<<" procs "<<split_grid->_processors[d]<<std::endl; + // for(int v=0;v<nvec;v++){ + // std::cout << "Grid_split: alldata["<<v<<"] " << alldata[v] <<std::endl; + // std::cout << "Grid_split: tmpdata["<<v<<"] " << tmpdata[v] <<std::endl; + // } ////////////////////////////////////////// //Local volume for this dimension is expanded by ratio of processor extents // Number of vectors is decreased by same factor // Rearrange to lexico for bigger volume ////////////////////////////////////////// nvec /= ratio[d]; + auto rdims = ldims; rdims[d] *= ratio[d]; auto rsites= lsites*ratio[d]; for(int v=0;v<nvec;v++){ @@ -847,7 +854,9 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split) int rmul=nvec*lsites; int vmul= lsites; alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul]; - + // if ( lsite==0 ) { + // std::cout << "Grid_split: grow alldata["<<rsite<<"] " << alldata[rsite] << " <- tmpdata["<< lsite+r*rmul+v*vmul<<"] "<<tmpdata[lsite+r*rmul+v*vmul] <<std::endl; + // } } } } @@ -860,7 +869,6 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split) } } } - vectorizeFromLexOrdArray(alldata,split); } @@ -936,10 +944,12 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split) lsites = split_grid->lSites(); std::vector<int> ldims = split_grid->_ldimensions; - for(int d=ndim-1;d>=0;d--){ + // for(int d=ndim-1;d>=0;d--){ + for(int d=0;d<ndim;d++){ if ( ratio[d] != 1 ) { + if ( split_grid->_processors[d] > 1 ) { tmpdata = alldata; split_grid->AllToAll(d,tmpdata,alldata); @@ -985,13 +995,11 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split) lsites = full_grid->lSites(); for(int v=0;v<nvector;v++){ + assert(v<full.size()); parallel_for(int site=0;site<lsites;site++){ scalardata[site] = alldata[v*lsites+site]; } - assert(v<full.size()); - vectorizeFromLexOrdArray(scalardata,full[v]); - } } From 00164f5ce5cdc3bab11bfc2b4c0299062a1b0a52 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:22:52 +0000 Subject: [PATCH 40/45] : --- tests/solver/Test_dwf_mrhs_cg.cc | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index 079fa85a..207e1331 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -52,15 +52,28 @@ int main (int argc, char ** argv) GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - int nrhs = UGrid->RankCount() ; - ///////////////////////////////////////////// // Split into 1^4 mpi communicators ///////////////////////////////////////////// + for(int i=0;i<argc;i++){ + if(std::string(argv[i]) == "--split"){ + for(int k=0;k<mpi_layout.size();k++){ + std::stringstream ss; + ss << argv[i+1+k]; + ss >> mpi_split[k]; + } + break; + } + } + + int nrhs = 1; + int me; + for(int i=0;i<mpi_layout.size();i++) nrhs *= (mpi_layout[i]/mpi_split[i]); + GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), mpi_split, - *UGrid); + *UGrid,me); GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); @@ -70,7 +83,6 @@ int main (int argc, char ** argv) // Set up the problem as a 4d spreadout job /////////////////////////////////////////////// std::vector<int> seeds({1,2,3,4}); - GridParallelRNG pRNG(UGrid ); pRNG.SeedFixedIntegers(seeds); GridParallelRNG pRNG5(FGrid); pRNG5.SeedFixedIntegers(seeds); std::vector<FermionField> src(nrhs,FGrid); @@ -93,7 +105,7 @@ int main (int argc, char ** argv) emptyUserRecord record; std::string file("./scratch.scidac"); std::string filef("./scratch.scidac.ferm"); - int me = UGrid->ThisRank(); + LatticeGaugeField s_Umu(SGrid); FermionField s_src(SFGrid); FermionField s_src_split(SFGrid); @@ -169,7 +181,7 @@ int main (int argc, char ** argv) for(int n=0;n<nrhs;n++){ FGrid->Barrier(); if ( n==me ) { - std::cerr << GridLogMessage<<"Split "<< me << " " << norm2(s_src_split) << " " << norm2(s_src)<< " diff " << norm2(s_tmp)<<std::endl; + std::cout << GridLogMessage<<"Split "<< me << " " << norm2(s_src_split) << " " << norm2(s_src)<< " diff " << norm2(s_tmp)<<std::endl; } FGrid->Barrier(); } @@ -218,7 +230,6 @@ int main (int argc, char ** argv) std::cout << " diff " <<tmp<<std::endl; } */ - std::cout << GridLogMessage<< "Checking the residuals"<<std::endl; for(int n=0;n<nrhs;n++){ HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n]; From 615a9448b9a70e0e44ef28ce1be80f9fce8fd298 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:23:34 +0000 Subject: [PATCH 41/45] Extended sub comm supported --- tests/solver/Test_split_grid.cc | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/solver/Test_split_grid.cc b/tests/solver/Test_split_grid.cc index 90969b85..2b6a4bf7 100644 --- a/tests/solver/Test_split_grid.cc +++ b/tests/solver/Test_split_grid.cc @@ -52,11 +52,24 @@ int main (int argc, char ** argv) GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - int nrhs = UGrid->RankCount() ; - ///////////////////////////////////////////// // Split into 1^4 mpi communicators ///////////////////////////////////////////// + + for(int i=0;i<argc;i++){ + if(std::string(argv[i]) == "--split"){ + for(int k=0;k<mpi_layout.size();k++){ + std::stringstream ss; + ss << argv[i+1+k]; + ss >> mpi_split[k]; + } + break; + } + } + + int nrhs = 1; + for(int i=0;i<mpi_layout.size();i++) nrhs *= (mpi_layout[i]/mpi_split[i]); + GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), mpi_split, From 67131d82f2561c490b2fb5ce4a8e7566882b0be9 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:24:11 +0000 Subject: [PATCH 42/45] Get subrank info from communicator constructor --- tests/solver/Test_dwf_mrhs_cg_mpieo.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg_mpieo.cc b/tests/solver/Test_dwf_mrhs_cg_mpieo.cc index 14115b59..a6dfcd57 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpieo.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpieo.cc @@ -47,7 +47,9 @@ int main (int argc, char ** argv) std::vector<int> mpi_layout = GridDefaultMpi(); std::vector<int> mpi_split (mpi_layout.size(),1); - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); @@ -57,10 +59,11 @@ int main (int argc, char ** argv) ///////////////////////////////////////////// // Split into 1^4 mpi communicators ///////////////////////////////////////////// + int me; GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), mpi_split, - *UGrid); + *UGrid,me); GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); @@ -89,8 +92,6 @@ int main (int argc, char ** argv) ///////////////// // MPI only sends ///////////////// - int me = UGrid->ThisRank(); - LatticeGaugeField s_Umu(SGrid); FermionField s_src(SFGrid); FermionField s_src_e(SFrbGrid); From 78e8704eacb41fae706e50c24ae0baa6b17b9481 Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 00:25:31 +0000 Subject: [PATCH 43/45] Shaking out --- tests/solver/Test_dwf_mrhs_cg_mpi.cc | 99 +++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 10 deletions(-) diff --git a/tests/solver/Test_dwf_mrhs_cg_mpi.cc b/tests/solver/Test_dwf_mrhs_cg_mpi.cc index fbc6dd32..f640edff 100644 --- a/tests/solver/Test_dwf_mrhs_cg_mpi.cc +++ b/tests/solver/Test_dwf_mrhs_cg_mpi.cc @@ -1,4 +1,4 @@ - /************************************************************************************* + /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -47,20 +47,36 @@ int main (int argc, char ** argv) std::vector<int> mpi_layout = GridDefaultMpi(); std::vector<int> mpi_split (mpi_layout.size(),1); - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - int nrhs = UGrid->RankCount() ; - ///////////////////////////////////////////// // Split into 1^4 mpi communicators ///////////////////////////////////////////// + + for(int i=0;i<argc;i++){ + if(std::string(argv[i]) == "--split"){ + for(int k=0;k<mpi_layout.size();k++){ + std::stringstream ss; + ss << argv[i+1+k]; + ss >> mpi_split[k]; + } + break; + } + } + + int nrhs = 1; + int me; + for(int i=0;i<mpi_layout.size();i++) nrhs *= (mpi_layout[i]/mpi_split[i]); + GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), mpi_split, - *UGrid); + *UGrid,me); GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,SGrid); GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid); @@ -78,16 +94,46 @@ int main (int argc, char ** argv) std::vector<FermionField> result(nrhs,FGrid); FermionField tmp(FGrid); - for(int s=0;s<nrhs;s++) random(pRNG5,src[s]); for(int s=0;s<nrhs;s++) result[s]=zero; +#undef LEXICO_TEST +#ifdef LEXICO_TEST + { + LatticeFermion lex(FGrid); lex = zero; + LatticeFermion ftmp(FGrid); + Integer stride =10000; + double nrm; + LatticeComplex coor(FGrid); + for(int d=0;d<5;d++){ + LatticeCoordinate(coor,d); + ftmp = stride; + ftmp = ftmp * coor; + lex = lex + ftmp; + stride=stride/10; + } + for(int s=0;s<nrhs;s++) { + src[s]=lex; + ftmp = 1000*1000*s; + src[s] = src[s] + ftmp; + } + } +#else + for(int s=0;s<nrhs;s++) { + random(pRNG5,src[s]); + tmp = 100.0*s; + src[s] = (src[s] * 0.1) + tmp; + std::cout << " src ]"<<s<<"] "<<norm2(src[s])<<std::endl; + } +#endif + + for(int n =0 ; n< nrhs ; n++) { + std::cout << " src"<<n<<"\n"<< src[n] <<std::endl; + } LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); ///////////////// // MPI only sends ///////////////// - int me = UGrid->ThisRank(); - LatticeGaugeField s_Umu(SGrid); FermionField s_src(SFGrid); FermionField s_tmp(SFGrid); @@ -98,6 +144,36 @@ int main (int argc, char ** argv) /////////////////////////////////////////////////////////////// Grid_split (Umu,s_Umu); Grid_split (src,s_src); + std::cout << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl; + std::cout << " s_src\n "<< s_src <<std::endl; + +#ifdef LEXICO_TEST + FermionField s_src_tmp(SFGrid); + FermionField s_src_diff(SFGrid); + { + LatticeFermion lex(SFGrid); lex = zero; + LatticeFermion ftmp(SFGrid); + Integer stride =10000; + double nrm; + LatticeComplex coor(SFGrid); + for(int d=0;d<5;d++){ + LatticeCoordinate(coor,d); + ftmp = stride; + ftmp = ftmp * coor; + lex = lex + ftmp; + stride=stride/10; + } + s_src_tmp=lex; + ftmp = 1000*1000*me; + s_src_tmp = s_src_tmp + ftmp; + } + s_src_diff = s_src_tmp - s_src; + std::cout << " s_src_diff " << norm2(s_src_diff)<<std::endl; + + std::cout << " s_src \n" << s_src << std::endl; + std::cout << " s_src_tmp \n" << s_src_tmp << std::endl; + std::cout << " s_src_diff \n" << s_src_diff << std::endl; +#endif /////////////////////////////////////////////////////////////// // Set up N-solvers as trivially parallel @@ -113,10 +189,11 @@ int main (int argc, char ** argv) MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); - ConjugateGradient<FermionField> CG((1.0e-5/(me+1)),10000); + ConjugateGradient<FermionField> CG((1.0e-5),10000); s_res = zero; CG(HermOp,s_src,s_res); + std::cout << " s_res norm "<<norm2(s_res)<<std::endl; ///////////////////////////////////////////////////////////// // Report how long they all took ///////////////////////////////////////////////////////////// @@ -134,10 +211,12 @@ int main (int argc, char ** argv) std::cout << GridLogMessage<< "Unsplitting the result"<<std::endl; Grid_unsplit(result,s_res); + std::cout << GridLogMessage<< "Checking the residuals"<<std::endl; for(int n=0;n<nrhs;n++){ + std::cout << " res["<<n<<"] norm "<<norm2(result[n])<<std::endl; HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n]; - std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)<<std::endl; + std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl; } Grid_finalize(); From 27ea2afe8681d0094b78396f7a8dbdd8f6c52dec Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Mon, 30 Oct 2017 01:14:11 +0000 Subject: [PATCH 44/45] No compile on comms == none fix --- lib/communicator/Communicator_none.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index a862d52a..26b330a7 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -38,8 +38,8 @@ void CartesianCommunicator::Init(int *argc, char *** arv) ShmInitGeneric(); } -CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent) - : CartesianCommunicator(processors) {} +CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank) + : CartesianCommunicator(processors) { srank=0;} CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) { From 360efd0088847727e2fabe034dc5f18a09430cff Mon Sep 17 00:00:00 2001 From: paboyle <paboyle@ph.ed.ac.uk> Date: Thu, 2 Nov 2017 22:05:31 +0000 Subject: [PATCH 45/45] Improved treatment of reverse asked for by chris. Truncate the basis. Power method renormalises --- .../iterative/ImplicitlyRestartedLanczos.h | 44 ++++++++++++------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 089e7ff3..7a0760c9 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -181,8 +181,8 @@ enum IRLdiagonalisation { template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field> { public: - LinearFunction<Field> &_HermOpTest; - ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOpTest) : _HermOpTest(HermOpTest) { }; + LinearFunction<Field> &_HermOp; + ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { }; int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox) { return TestConvergence(j,resid,B,eval,evalMaxApprox); @@ -192,7 +192,7 @@ template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public Imp Field v(B); RealD eval_poly = eval; // Apply operator - _HermOpTest(B,v); + _HermOp(B,v); RealD vnum = real(innerProduct(B,v)); // HermOp. RealD vden = norm2(B); @@ -233,8 +233,8 @@ class ImplicitlyRestartedLanczos { //////////////////////////////// // Embedded objects //////////////////////////////// + LinearFunction<Field> &_PolyOp; LinearFunction<Field> &_HermOp; - LinearFunction<Field> &_HermOpTest; ImplicitlyRestartedLanczosTester<Field> &_Tester; // Default tester provided (we need a ref to something in default case) ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester; @@ -246,16 +246,22 @@ public: ////////////////////////////////////////////////////////////////// // PAB: ////////////////////////////////////////////////////////////////// - // Too many options & knobs. Do we really need orth_period + // Too many options & knobs. + // Eliminate: + // orth_period + // betastp + // MinRestart + // + // Do we really need orth_period // What is the theoretical basis & guarantees of betastp ? // Nstop=Nk viable? // MinRestart avoidable with new convergence test? - // Could cut to HermOp, HermOpTest, Tester, Nk, Nm, resid, maxiter (+diagonalisation) - // HermOpTest could be eliminated if we dropped the Power method for max eval. + // Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation) + // HermOp could be eliminated if we dropped the Power method for max eval. // -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear ////////////////////////////////////////////////////////////////// - ImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, - LinearFunction<Field> & HermOpTest, + ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp, + LinearFunction<Field> & HermOp, ImplicitlyRestartedLanczosTester<Field> & Tester, int _Nstop, // sought vecs int _Nk, // sought vecs @@ -265,14 +271,14 @@ public: RealD _betastp=0.0, // if beta(k) < betastp: converged int _MinRestart=1, int _orth_period = 1, IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : - SimpleTester(HermOpTest), _HermOp(HermOp), _HermOpTest(HermOpTest), _Tester(Tester), + SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), MaxIter(_MaxIter) , MinRestart(_MinRestart), orth_period(_orth_period), diagonalisation(_diagonalisation) { }; - ImplicitlyRestartedLanczos(LinearFunction<Field> & HermOp, - LinearFunction<Field> & HermOpTest, + ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp, + LinearFunction<Field> & HermOp, int _Nstop, // sought vecs int _Nk, // sought vecs int _Nm, // spare vecs @@ -281,7 +287,7 @@ public: RealD _betastp=0.0, // if beta(k) < betastp: converged int _MinRestart=1, int _orth_period = 1, IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : - SimpleTester(HermOpTest), _HermOp(HermOp), _HermOpTest(HermOpTest), _Tester(SimpleTester), + SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), eresid(_eresid), betastp(_betastp), MaxIter(_MaxIter) , MinRestart(_MinRestart), @@ -323,7 +329,7 @@ repeat →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM until convergence */ - void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=true) + void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=false) { GridBase *grid = src._grid; assert(grid == evec[0]._grid); @@ -355,7 +361,8 @@ until convergence auto tmp = src; const int _MAX_ITER_IRL_MEVAPP_ = 50; for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) { - _HermOpTest(src_n,tmp); + normalise(src_n); + _HermOp(src_n,tmp); RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. RealD vden = norm2(src_n); RealD na = vnum/vden; @@ -536,7 +543,10 @@ until convergence std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl; eval=eval2; - + + //Keep only converged + eval.resize(Nconv);// Nstop? + evec.resize(Nconv,grid);// Nstop? basisSortInPlace(evec,eval,reverse); } @@ -573,7 +583,7 @@ until convergence Field& evec_k = evec[k]; - _HermOp(evec_k,w); std::cout<<GridLogIRL << "Poly(HermOp)" <<std::endl; + _PolyOp(evec_k,w); std::cout<<GridLogIRL << "PolyOp" <<std::endl; if(k>0) w -= lme[k-1] * evec[k-1];