From 82c1ecf60f8128c6399fcc86217e086aba934146 Mon Sep 17 00:00:00 2001 From: Chulwoo Jung Date: Wed, 30 Nov 2022 16:08:40 -0500 Subject: [PATCH 1/2] Block lanczos added --- .../ImplicitlyRestartedBlockLanczos.h | 1555 +++++++++++++++++ Grid/lattice/Lattice_rng.h | 10 + Grid/util/Init.cc | 3 +- Grid/util/Init.h | 2 +- tests/lanczos/Test_dwf_block_lanczos.README | 73 + tests/lanczos/Test_dwf_block_lanczos.cc | 408 +++++ .../lanczos/Test_dwf_block_lanczos.cc.double | 401 +++++ .../lanczos/Test_dwf_block_lanczos.cc.single | 408 +++++ tests/lanczos/Test_dwf_lanczos.cc | 93 +- 9 files changed, 2924 insertions(+), 29 deletions(-) create mode 100644 Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h create mode 100644 tests/lanczos/Test_dwf_block_lanczos.README create mode 100644 tests/lanczos/Test_dwf_block_lanczos.cc create mode 100644 tests/lanczos/Test_dwf_block_lanczos.cc.double create mode 100644 tests/lanczos/Test_dwf_block_lanczos.cc.single diff --git a/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h b/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h new file mode 100644 index 00000000..fe388c19 --- /dev/null +++ b/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h @@ -0,0 +1,1555 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Chulwoo Jung +Author: Yong-Chull Jang +Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_IRBL_H +#define GRID_IRBL_H + +#include //memset +#ifdef USE_LAPACK +#include +#endif + +#undef USE_LAPACK +#define Glog std::cout << GridLogMessage + +#ifdef GRID_CUDA +#include "cublas_v2.h" +#endif + +#if 0 +#define CUDA_COMPLEX cuDoubleComplex +#define CUDA_FLOAT double +#define MAKE_CUDA_COMPLEX make_cuDoubleComplex +#define CUDA_GEMM cublasZgemm +#else +#define CUDA_COMPLEX cuComplex +#define CUDA_FLOAT float +#define MAKE_CUDA_COMPLEX make_cuComplex +#define CUDA_GEMM cublasCgemm +#endif + +namespace Grid { + +//////////////////////////////////////////////////////////////////////////////// +// Helper class for sorting the evalues AND evectors by Field +// Use pointer swizzle on vectors SHOULD GET RID OF IT SOON! +//////////////////////////////////////////////////////////////////////////////// +template +class SortEigen { + private: + static bool less_lmd(RealD left,RealD right){ + return left > right; + } + static bool less_pair(std::pair& left, + std::pair& right){ + return left.first > (right.first); + } + + public: + void push(std::vector& lmd,std::vector& evec,int N) { + + //////////////////////////////////////////////////////////////////////// + // PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set. + // : The vector reorder should be done by pointer swizzle somehow + //////////////////////////////////////////////////////////////////////// + std::vector cpy(lmd.size(),evec[0].Grid()); + for(int i=0;i > emod(lmd.size()); + + for(int i=0;i(lmd[i],&cpy[i]); + + partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); + + typename std::vector >::iterator it = emod.begin(); + for(int i=0;ifirst; + evec[i]=*(it->second); + ++it; + } + } + void push(std::vector& lmd,int N) { + std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); + } + bool saturated(RealD lmd, RealD thrs) { + return fabs(lmd) > fabs(thrs); + } +}; + +enum class LanczosType { irbl, rbl }; + +enum IRBLdiagonalisation { + IRBLdiagonaliseWithDSTEGR, + IRBLdiagonaliseWithQR, + IRBLdiagonaliseWithEigen +}; + +///////////////////////////////////////////////////////////// +// Implicitly restarted block lanczos +///////////////////////////////////////////////////////////// +template +class ImplicitlyRestartedBlockLanczos { + +private: + + std::string cname = std::string("ImplicitlyRestartedBlockLanczos"); + int MaxIter; // Max iterations + int Nstop; // Number of evecs checked for convergence + int Nu; // Number of vecs in the unit block + int Nk; // Number of converged sought + int Nm; // total number of vectors + int Nblock_k; // Nk/Nu + int Nblock_m; // Nm/Nu + int Nconv_test_interval; // Number of skipped vectors when checking a convergence + RealD eresid; + IRBLdiagonalisation diagonalisation; + //////////////////////////////////// + // Embedded objects + //////////////////////////////////// + SortEigen _sort; + LinearOperatorBase &_Linop; + LinearOperatorBase &_SLinop;//for split + OperatorFunction &_poly; + GridRedBlackCartesian * f_grid; + GridRedBlackCartesian * sf_grid; + int mrhs; + ///////////////////////// + // BLAS objects + ///////////////////////// +#ifdef GRID_CUDA + cudaError_t cudaStat; + CUDA_COMPLEX *w_acc, *evec_acc, *c_acc; +#endif + int Nevec_acc; // Number of eigenvectors stored in the buffer evec_acc + + ///////////////////////// + // Constructor + ///////////////////////// +public: + int split_test; //test split in the first iteration + ImplicitlyRestartedBlockLanczos(LinearOperatorBase &Linop, // op + LinearOperatorBase &SLinop, // op + GridRedBlackCartesian * FrbGrid, + GridRedBlackCartesian * SFrbGrid, + int _mrhs, + OperatorFunction & poly, // polynomial + int _Nstop, // really sought vecs + int _Nconv_test_interval, // conv check interval + int _Nu, // vecs in the unit block + int _Nk, // sought vecs + int _Nm, // total vecs + RealD _eresid, // resid in lmd deficit + int _MaxIter, // Max iterations + IRBLdiagonalisation _diagonalisation = IRBLdiagonaliseWithEigen) + : _Linop(Linop), _SLinop(SLinop), _poly(poly),sf_grid(SFrbGrid),f_grid(FrbGrid), + Nstop(_Nstop), Nconv_test_interval(_Nconv_test_interval), mrhs(_mrhs), + Nu(_Nu), Nk(_Nk), Nm(_Nm), + Nblock_m(_Nm/_Nu), Nblock_k(_Nk/_Nu), + //eresid(_eresid), MaxIter(10), + eresid(_eresid), MaxIter(_MaxIter), + diagonalisation(_diagonalisation),split_test(0), + Nevec_acc(_Nu) + { assert( (Nk%Nu==0) && (Nm%Nu==0) ); }; + + //////////////////////////////// + // Helpers + //////////////////////////////// + static RealD normalize(Field& v, int if_print=0) + { + RealD nn = norm2(v); + nn = sqrt(nn); +#if 0 + if(if_print && nn < 1e20) + Glog<<"normalize: "<& evec, int k, int if_print=0) + { + typedef typename Field::scalar_type MyComplex; +// MyComplex ip; + ComplexD ip; + + for(int j=0; j 1e-14) + Glog<<"orthogonalize before: "< 1e-14) + Glog<<"orthogonalize after: "<& evec, int k) + { + orthogonalize(w, evec, k,1); + } + + void orthogonalize(std::vector& w, int _Nu, std::vector& evec, int k, int if_print=0) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; +// ComplexD ip; + + for(int j=0; j 1e-14) + Glog<<"orthogonalize before: "< 1e-14) + Glog<<"orthogonalize after: "<& w, std::vector& evec, int R, int do_print=0) + { +#ifdef GRID_CUDA + Glog << "cuBLAS orthogonalize" << std::endl; + + typedef typename Field::vector_object vobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef typename Field::scalar_type MyComplex; + + GridBase *grid = w[0].Grid(); + const uint64_t sites = grid->lSites(); + + int Nbatch = R/Nevec_acc; + assert( R%Nevec_acc == 0 ); +// Glog << "nBatch, Nevec_acc, R, Nu = " +// << Nbatch << "," << Nevec_acc << "," << R << "," << Nu << std::endl; + +#if 0 // a trivial test + for (int col=0; col(&w_v[0]); +// Glog << "col= "<(&evec_v[0]); +// Glog << "col= "<[" << j << "," << i << "] = " + << z.x << " + i " << z.y << std::endl; + } + w[i] = w[i] - ip * evec[b*Nevec_acc+j]; + } + } +#else + alpha = MAKE_CUDA_COMPLEX(-1.0,0.0); + beta = MAKE_CUDA_COMPLEX(1.0,0.0); + stat = CUDA_GEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, 12*sites, Nu, Nevec_acc, + &alpha, + evec_acc, 12*sites, c_acc, Nevec_acc, + &beta, + w_acc, 12*sites); + //Glog << stat << std::endl; +#endif + } +#if 1 + for (int col=0; col(&w_v[0]); + for (size_t row=0; row &inner, std::vector& lhs, int llhs, std::vector& rhs, int lrhs) +{ + typedef typename Field:vector_object vobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_typeD vector_type; + GridBase *grid = lhs[0]._grid; + assert(grid == rhs[0]._grid; + const int pad = 8; + int total = llhs*lrhs; + assert(inner.size()==total); + int sum_size=grid->SumArraySize(); + +// std::vector inner(total); + Vector sumarray(sum_size*pad*total); + + parallel_for(int thr=0;throSites(),thr,mywork,myoff); + + std::vector< decltype(innerProductD(lhs[0]._odata[0],rhs[0]._odata[0])) > vinner(total,zero); // private to thread; sub summation + for(int ss=myoff;ssGlobalSum(tmp); + inner[i]=tmp; + } +// return inner; +} +#endif + + + void orthogonalize_blockhead(Field& w, std::vector& evec, int k, int Nu) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; + + for(int j=0; j& eval, + std::vector& evec, + const std::vector& src, int& Nconv, LanczosType Impl) + { +#ifdef GRID_CUDA + GridBase *grid = src[0].Grid(); + grid->show_decomposition(); + +// printf("GRID_CUDA\n"); + + // set eigenvector buffers for the cuBLAS calls + //const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites = grid->lSites(); + + cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(CUDA_COMPLEX)); +// Glog << "w_acc= "<& eval, + std::vector& evec, + const std::vector& src, int& Nconv) + { + std::string fname = std::string(cname+"::calc_irbl()"); + GridBase *grid = evec[0].Grid(); + assert(grid == src[0].Grid()); + assert( Nu = src.size() ); + + Glog << std::string(74,'*') << std::endl; + Glog << fname + " starting iteration 0 / "<< MaxIter<< std::endl; + Glog << std::string(74,'*') << std::endl; + Glog <<" -- seek Nk = "<< Nk <<" vectors"<< std::endl; + Glog <<" -- accept Nstop = "<< Nstop <<" vectors"<< std::endl; + Glog <<" -- total Nm = "<< Nm <<" vectors"<< std::endl; + Glog <<" -- size of eval = "<< eval.size() << std::endl; + Glog <<" -- size of evec = "<< evec.size() << std::endl; + if ( diagonalisation == IRBLdiagonaliseWithEigen ) { + Glog << "Diagonalisation is Eigen "<< std::endl; +#ifdef USE_LAPACK + } else if ( diagonalisation == IRBLdiagonaliseWithLAPACK ) { + Glog << "Diagonalisation is LAPACK "<< std::endl; +#endif + } else { + abort(); + } + Glog << std::string(74,'*') << std::endl; + + assert(Nm == evec.size() && Nm == eval.size()); + + std::vector> lmd(Nu,std::vector(Nm,0.0)); + std::vector> lme(Nu,std::vector(Nm,0.0)); + std::vector> lmd2(Nu,std::vector(Nm,0.0)); + std::vector> lme2(Nu,std::vector(Nm,0.0)); + std::vector eval2(Nm); + std::vector resid(Nk); + + Eigen::MatrixXcd Qt = Eigen::MatrixXcd::Zero(Nm,Nm); + Eigen::MatrixXcd Q = Eigen::MatrixXcd::Zero(Nm,Nm); + + std::vector Iconv(Nm); + std::vector B(Nm,grid); // waste of space replicating + + std::vector f(Nu,grid); + std::vector f_copy(Nu,grid); + Field v(grid); + + Nconv = 0; + + RealD beta_k; + + // set initial vector + for (int i=0; i& eval, + std::vector& evec, + const std::vector& src, int& Nconv) + { + std::string fname = std::string(cname+"::calc_rbl()"); + GridBase *grid = evec[0].Grid(); + assert(grid == src[0].Grid()); + assert( Nu = src.size() ); + + int Np = (Nm-Nk); + if (Np > 0 && MaxIter > 1) Np /= MaxIter; + int Nblock_p = Np/Nu; + for(int i=0;i< evec.size();i++) evec[0].Advise()=AdviseInfrequentUse; + + Glog << std::string(74,'*') << std::endl; + Glog << fname + " starting iteration 0 / "<< MaxIter<< std::endl; + Glog << std::string(74,'*') << std::endl; + Glog <<" -- seek (min) Nk = "<< Nk <<" vectors"<< std::endl; + Glog <<" -- seek (inc) Np = "<< Np <<" vectors"<< std::endl; + Glog <<" -- seek (max) Nm = "<< Nm <<" vectors"<< std::endl; + Glog <<" -- accept Nstop = "<< Nstop <<" vectors"<< std::endl; + Glog <<" -- size of eval = "<< eval.size() << std::endl; + Glog <<" -- size of evec = "<< evec.size() << std::endl; + if ( diagonalisation == IRBLdiagonaliseWithEigen ) { + Glog << "Diagonalisation is Eigen "<< std::endl; +#ifdef USE_LAPACK + } else if ( diagonalisation == IRBLdiagonaliseWithLAPACK ) { + Glog << "Diagonalisation is LAPACK "<< std::endl; +#endif + } else { + abort(); + } + Glog << std::string(74,'*') << std::endl; + + assert(Nm == evec.size() && Nm == eval.size()); + + std::vector> lmd(Nu,std::vector(Nm,0.0)); + std::vector> lme(Nu,std::vector(Nm,0.0)); + std::vector> lmd2(Nu,std::vector(Nm,0.0)); + std::vector> lme2(Nu,std::vector(Nm,0.0)); + std::vector eval2(Nk); + std::vector resid(Nm); + + Eigen::MatrixXcd Qt = Eigen::MatrixXcd::Zero(Nm,Nm); + Eigen::MatrixXcd Q = Eigen::MatrixXcd::Zero(Nm,Nm); + + std::vector Iconv(Nm); +// int Ntest=Nu; +// std::vector B(Nm,grid); // waste of space replicating + std::vector B(1,grid); // waste of space replicating + + std::vector f(Nu,grid); + std::vector f_copy(Nu,grid); + Field v(grid); + + Nconv = 0; + +// RealD beta_k; + + // set initial vector + for (int i=0; i Btmp(Nstop,grid); // waste of space replicating +#if 0 + for(int i=0; i>& lmd, + std::vector>& lme, + std::vector& evec, + std::vector& w, + std::vector& w_copy, + int b) + { + const RealD tiny = 1.0e-20; + + int Nu = w.size(); + int Nm = evec.size(); + assert( b < Nm/Nu ); +// GridCartesian *grid = evec[0]._grid; + + // converts block index to full indicies for an interval [L,R) + int L = Nu*b; + int R = Nu*(b+1); + + Real beta; + + Glog << "Using split grid"<< std::endl; +// LatticeGaugeField s_Umu(SGrid); + assert((Nu%mrhs)==0); + std::vector in(mrhs,f_grid); + + Field s_in(sf_grid); + Field s_out(sf_grid); + // unnecessary copy. Can or should it be avoided? + int k_start = 0; +while ( k_start < Nu) { + Glog << "k_start= "<0) { + for (int u=0; u0) { + for (int u=0; u0) { + // orthogonalize_blockhead(w[0],evec,b,Nu); + // for (int u=1; u& eval, + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nk, int Nm, + Eigen::MatrixXcd & Qt, // Nm x Nm + GridBase *grid) + { + assert( Nk%Nu == 0 && Nm%Nu == 0 ); + assert( Nk <= Nm ); + Eigen::MatrixXcd BlockTriDiag = Eigen::MatrixXcd::Zero(Nk,Nk); + + for ( int u=0; u eigensolver(BlockTriDiag); + + for (int i = 0; i < Nk; i++) { + eval[Nk-1-i] = eigensolver.eigenvalues()(i); + } + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { + Qt(j,Nk-1-i) = eigensolver.eigenvectors()(j,i); + //Qt(Nk-1-i,j) = eigensolver.eigenvectors()(i,j); + //Qt(i,j) = eigensolver.eigenvectors()(i,j); + } + } + } + +#ifdef USE_LAPACK + void diagonalize_lapack(std::vector& eval, + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nk, int Nm, + Eigen::MatrixXcd & Qt, // Nm x Nm + GridBase *grid) + { + Glog << "diagonalize_lapack: Nu= "<_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + MKL_INT il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + Glog << "node "<= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i*NN+j] = evec_tmp[(i - (il-1))*NN+j]; + if (il>1) { + evec_tmp[(i-(il-1))*NN+j].imag=0.; + evec_tmp[(i-(il-1))*NN+j].real=0.; + } + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,2*NN*NN); + } + } + // Safer to sort instead of just reversing it, + // but the document of the routine says evals are sorted in increasing order. + // qr gives evals in decreasing order. +// for(int i=0;i + ( evec_tmp[i*Nk+j].real, + evec_tmp[i*Nk+j].imag); +// ( evec_tmp[(Nk-1-j)*Nk+Nk-1-i].real, +// evec_tmp[(Nk-1-j)*Nk+Nk-1-i].imag); + + } + } + +if (1){ + Eigen::SelfAdjointEigenSolver eigensolver(BlockTriDiag); + + for (int i = 0; i < Nk; i++) { + Glog << "eval = "<& eval, + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nk, int Nm, + Eigen::MatrixXcd & Qt, + GridBase *grid) + { + Qt = Eigen::MatrixXcd::Identity(Nm,Nm); + if ( diagonalisation == IRBLdiagonaliseWithEigen ) { + diagonalize_Eigen(eval,lmd,lme,Nu,Nk,Nm,Qt,grid); +#ifdef USE_LAPACK + } else if ( diagonalisation == IRBLdiagonaliseWithLAPACK ) { + diagonalize_lapack(eval,lmd,lme,Nu,Nk,Nm,Qt,grid); +#endif + } else { + assert(0); + } + } + + + void unpackHermitBlockTriDiagMatToEigen( + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nb, int Nk, int Nm, + Eigen::MatrixXcd& M) + { + //Glog << "unpackHermitBlockTriDiagMatToEigen() begin" << '\n'; + assert( Nk%Nu == 0 && Nm%Nu == 0 ); + assert( Nk <= Nm ); + M = Eigen::MatrixXcd::Zero(Nk,Nk); + + // rearrange + for ( int u=0; u>& lmd, + std::vector>& lme, + int Nu, int Nb, int Nk, int Nm, + Eigen::MatrixXcd& M) + { + //Glog << "packHermitBlockTriDiagMatfromEigen() begin" << '\n'; + assert( Nk%Nu == 0 && Nm%Nu == 0 ); + assert( Nk <= Nm ); + + // rearrange + for ( int u=0; u QRD(Mtmp); + Q = QRD.householderQ(); + R = QRD.matrixQR(); // upper triangular part is the R matrix. + // lower triangular part used to represent series + // of Q sequence. + + // equivalent operation of Qprod *= Q + //M = Eigen::MatrixXcd::Zero(Nm,Nm); + + //for (int i=0; i Nm) kmax = Nm; + for (int k=i; ki) M(i,j) = conj(M(j,i)); + // if (i-j > Nu || j-i > Nu) M(i,j) = 0.; + // } + //} + + //Glog << "shiftedQRDecompEigen() end" << endl; + } + + void exampleQRDecompEigen(void) + { + Eigen::MatrixXd A = Eigen::MatrixXd::Zero(3,3); + Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(3,3); + Eigen::MatrixXd R = Eigen::MatrixXd::Zero(3,3); + Eigen::MatrixXd P = Eigen::MatrixXd::Zero(3,3); + + A(0,0) = 12.0; + A(0,1) = -51.0; + A(0,2) = 4.0; + A(1,0) = 6.0; + A(1,1) = 167.0; + A(1,2) = -68.0; + A(2,0) = -4.0; + A(2,1) = 24.0; + A(2,2) = -41.0; + + Glog << "matrix A before ColPivHouseholder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + + Eigen::ColPivHouseholderQR QRD(A); + + Glog << "matrix A after ColPivHouseholder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "HouseholderQ with sequence lenth = nonzeroPiviots" << std::endl; + Q = QRD.householderQ().setLength(QRD.nonzeroPivots()); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "HouseholderQ with sequence lenth = 1" << std::endl; + Q = QRD.householderQ().setLength(1); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "HouseholderQ with sequence lenth = 2" << std::endl; + Q = QRD.householderQ().setLength(2); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "matrixR" << std::endl; + R = QRD.matrixR(); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "R[" << i << "," << j << "] = " << R(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "rank = " << QRD.rank() << std::endl; + Glog << "threshold = " << QRD.threshold() << std::endl; + + Glog << "matrixP" << std::endl; + P = QRD.colsPermutation(); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "P[" << i << "," << j << "] = " << P(i,j) << '\n'; + } + } + Glog << std::endl; + + + Glog << "QR decomposition without column pivoting" << std::endl; + + A(0,0) = 12.0; + A(0,1) = -51.0; + A(0,2) = 4.0; + A(1,0) = 6.0; + A(1,1) = 167.0; + A(1,2) = -68.0; + A(2,0) = -4.0; + A(2,1) = 24.0; + A(2,2) = -41.0; + + Glog << "matrix A before Householder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + + Eigen::HouseholderQR QRDplain(A); + + Glog << "HouseholderQ" << std::endl; + Q = QRDplain.householderQ(); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "matrix A after Householder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + } + + }; +} +#undef Glog +#undef USE_LAPACK +#undef CUDA_COMPLEX +#undef CUDA_FLOAT +#undef MAKE_CUDA_COMPLEX +#undef CUDA_GEMM +#endif diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index 6857dc84..180b8437 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -440,7 +440,17 @@ public: _grid->GlobalCoorToGlobalIndex(gcoor,gidx); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); +#if 1 assert(rank == _grid->ThisRank() ); +#else +// + if (rank != _grid->ThisRank() ){ + std::cout <<"rank "<ThisRank() "<<_grid->ThisRank()<< std::endl; +// exit(-42); +// assert(0); + } +#endif + int l_idx=generator_idx(o_idx,i_idx); _generators[l_idx] = master_engine; diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index c3ac2424..d013763a 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -167,14 +167,13 @@ void GridCmdOptionInt(std::string &str,int & val) return; } -void GridCmdOptionFloat(std::string &str,float & val) +void GridCmdOptionFloat(std::string &str,double & val) { std::stringstream ss(str); ss>>val; return; } - void GridParseLayout(char **argv,int argc, Coordinate &latt_c, Coordinate &mpi_c) diff --git a/Grid/util/Init.h b/Grid/util/Init.h index 585660a1..bdf0bcac 100644 --- a/Grid/util/Init.h +++ b/Grid/util/Init.h @@ -57,7 +57,7 @@ void GridCmdOptionCSL(std::string str,std::vector & vec); template void GridCmdOptionIntVector(const std::string &str,VectorInt & vec); void GridCmdOptionInt(std::string &str,int & val); -void GridCmdOptionFloat(std::string &str,float & val); +void GridCmdOptionFloat(std::string &str,double & val); void GridParseLayout(char **argv,int argc, diff --git a/tests/lanczos/Test_dwf_block_lanczos.README b/tests/lanczos/Test_dwf_block_lanczos.README new file mode 100644 index 00000000..179f9037 --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.README @@ -0,0 +1,73 @@ +#Example script +DIR=/gpfs/alpine/phy157/proj-shared/phy157dwf/chulwoo/Grid/BL/build/tests/lanczos +BIN=${DIR}/Test_dwf_block_lanczos + +VOL='--grid 16.16.16.32 ' +GRID='--mpi 1.1.1.4 ' +CONF='--gconf ckpoint_lat.IEEE64BIG.2000 ' +OPT='--mass 0.01 --M5 1.8 --phase in.params --omega in.params --shm 4096' +#BL='--rbl 16.1024.128.1000.10 --split 1.1.4.4 --check_int 100 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51' +BL='--rbl 4.128.16.100.10 --split 1.1.1.4 --check_int 25 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51' + +ARGS=${CONF}" "${OPT}" "${BL}" "${VOL}" "${GRID} +export APP="${BIN} ${ARGS}" +echo APP=${APP} +#export JS="jsrun --nrs 32 -a4 -g4 -c42 -dpacked -b packed:7 --smpiargs="-gpu" " +export JS="jsrun --nrs 1 -a4 -g4 -c42 -dpacked -b packed:10 --smpiargs="-gpu" " +$JS $APP + +#sample in.param + +boundary_phase 0 1 0 +boundary_phase 1 1 0 +boundary_phase 2 1 0 +boundary_phase 3 -1 0 + +omega 0 0.5 0 +omega 1 0.5 0 +omega 2 0.5 0 +omega 3 0.5 0 +omega 4 0.5 0 +omega 5 0.5 0 +omega 6 0.5 0 +omega 7 0.5 0 +omega 8 0.5 0 +omega 9 0.5 0 +omega 10 0.5 0 +omega 11 0.5 0 + + +#output + +Grid : Message : 1.717474 s : Gauge Configuration ckpoint_lat.IEEE64BIG.2000 +Grid : Message : 1.717478 s : boundary_phase[0] = (1,0) +Grid : Message : 1.717497 s : boundary_phase[1] = (1,0) +Grid : Message : 1.717500 s : boundary_phase[2] = (1,0) +Grid : Message : 1.717503 s : boundary_phase[3] = (-1,0) +Grid : Message : 1.717506 s : Ls 12 +Grid : Message : 1.717507 s : mass 0.01 +Grid : Message : 1.717510 s : M5 1.8 +Grid : Message : 1.717512 s : mob_b 1.5 +Grid : Message : 1.717514 s : omega[0] = (0.5,0) +Grid : Message : 1.717517 s : omega[1] = (0.5,0) +Grid : Message : 1.717520 s : omega[2] = (0.5,0) +Grid : Message : 1.717523 s : omega[3] = (0.5,0) +Grid : Message : 1.717526 s : omega[4] = (0.5,0) +Grid : Message : 1.717529 s : omega[5] = (0.5,0) +Grid : Message : 1.717532 s : omega[6] = (0.5,0) +Grid : Message : 1.717535 s : omega[7] = (0.5,0) +Grid : Message : 1.717538 s : omega[8] = (0.5,0) +Grid : Message : 1.717541 s : omega[9] = (0.5,0) +Grid : Message : 1.717544 s : omega[10] = (0.5,0) +Grid : Message : 1.717547 s : omega[11] = (0.5,0) +Grid : Message : 1.717550 s : Nu 4 +Grid : Message : 1.717551 s : Nk 128 +Grid : Message : 1.717552 s : Np 16 +Grid : Message : 1.717553 s : Nm 288 +Grid : Message : 1.717554 s : Nstop 100 +Grid : Message : 1.717555 s : Ntest 25 +Grid : Message : 1.717557 s : MaxIter 10 +Grid : Message : 1.717558 s : resid 1e-05 +Grid : Message : 1.717560 s : Cheby Poly 0.007,7,51 + + diff --git a/tests/lanczos/Test_dwf_block_lanczos.cc b/tests/lanczos/Test_dwf_block_lanczos.cc new file mode 100644 index 00000000..7449e32a --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.cc @@ -0,0 +1,408 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_block_lanczos.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +using namespace std; +using namespace Grid; +//using namespace Grid::QCD; + +//typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename ZMobiusFermionF::FermionField FermionField; + +RealD AllZero(RealD x){ return 0.;} + +class CmdJobParams +{ + public: + std::string gaugefile; + + int Ls; + double mass; + double M5; + double mob_b; + std::vector omega; + std::vector boundary_phase; + std::vector mpi_split; + + LanczosType Impl; + int Nu; + int Nk; + int Np; + int Nm; + int Nstop; + int Ntest; + int MaxIter; + double resid; + + double low; + double high; + int order; + + CmdJobParams() + : gaugefile("Hot"), + Ls(8), mass(0.01), M5(1.8), mob_b(1.5), + Impl(LanczosType::irbl),mpi_split(4,1), + Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8), + low(0.2), high(5.5), order(11) + {Nm=Nk+Np;}; + + void Parse(char **argv, int argc); +}; + + +void CmdJobParams::Parse(char **argv,int argc) +{ + std::string arg; + std::vector vi; + double re,im; + int expect, idx; + std::string vstr; + std::ifstream pfile; + + if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){ + gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf"); + } + + if( GridCmdOptionExists(argv,argv+argc,"--phase") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--phase"); + pfile.open(arg); + assert(pfile); + expect = 0; + while( pfile >> vstr ) { + if ( vstr.compare("boundary_phase") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(expect==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + boundary_phase.push_back({re,im}); + expect++; + } + } + pfile.close(); + } else { + for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.}); + } + + if( GridCmdOptionExists(argv,argv+argc,"--omega") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--omega"); + pfile.open(arg); + assert(pfile); + Ls = 0; + while( pfile >> vstr ) { + if ( vstr.compare("omega") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(Ls==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + omega.push_back({re,im}); + Ls++; + } + } + pfile.close(); + } else { + if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--Ls"); + GridCmdOptionInt(arg,Ls); + } + } + + if( GridCmdOptionExists(argv,argv+argc,"--mass") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mass"); + GridCmdOptionFloat(arg,mass); + } + + if( GridCmdOptionExists(argv,argv+argc,"--M5") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--M5"); + GridCmdOptionFloat(arg,M5); + } + + if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b"); + GridCmdOptionFloat(arg,mob_b); + } + + if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--irbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::irbl; + Nm = Nk+Np; + } + + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--rbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; // vector space is enlarged by adding Np vectors + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::rbl; + Nm = Nk+Np*MaxIter; + } + +#if 1 + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--split") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--split"); + GridCmdOptionIntVector(arg,vi); + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + // ypj [note] why seed RNG5 again? bug? In this case, run with a default seed(). + GridParallelRNG RNG5rb(FrbGridF); RNG5rb.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + std::vector U(4,UGrid); + LatticeGaugeFieldF UmuF(UGridF); + std::vector UF(4,UGridF); + + if ( JP.gaugefile.compare("Hot") == 0 ) { + SU3::HotConfiguration(RNG4, Umu); + } else { + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,JP.gaugefile); + // ypj [fixme] additional checks for the loaded configuration? + } + precisionChange (UmuF,Umu); + + for(int mu=0;mu(Umu,mu); + } + + RealD mass = JP.mass; + RealD M5 = JP.M5; + +// ypj [fixme] flexible support for a various Fermions +// RealD mob_b = JP.mob_b; // Gparity +// std::vector omega; // ZMobius + +// GparityMobiusFermionD ::ImplParams params; +// std::vector twists({1,1,1,0}); +// params.twists = twists; +// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// SchurDiagTwoOperator HermOp(Ddwf); + + +// int mrhs = JP.Nu; + int Ndir=4; + auto mpi_layout = GridDefaultMpi(); + std::vector mpi_split (Ndir,1); +#if 0 + int tmp=mrhs, dir=0; + std::cout << GridLogMessage << "dir= "<_processor,re); + src_tmp=re; + pickCheckerboard(Odd,src[i],src_tmp); + } + RNG5.Report(); +} else { + std::cout << GridLogMessage << "Using RNG5rb"< evec(JP.Nm,FrbGridF); + for(int i=0;i<1;++i){ + std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl; + }; + + int Nconv; + IRBL.calc(eval,evec,src,Nconv,JP.Impl); + + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_dwf_block_lanczos.cc.double b/tests/lanczos/Test_dwf_block_lanczos.cc.double new file mode 100644 index 00000000..c71b80ec --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.cc.double @@ -0,0 +1,401 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_block_lanczos.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +using namespace std; +using namespace Grid; +//using namespace Grid::QCD; + +//typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename ZMobiusFermionR::FermionField FermionField; + +RealD AllZero(RealD x){ return 0.;} + +class CmdJobParams +{ + public: + std::string gaugefile; + + int Ls; + double mass; + double M5; + double mob_b; + std::vector omega; + std::vector boundary_phase; + std::vector mpi_split; + + LanczosType Impl; + int Nu; + int Nk; + int Np; + int Nm; + int Nstop; + int Ntest; + int MaxIter; + double resid; + + double low; + double high; + int order; + + CmdJobParams() + : gaugefile("Hot"), + Ls(8), mass(0.01), M5(1.8), mob_b(1.5), + Impl(LanczosType::irbl),mpi_split(4,1), + Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8), + low(0.2), high(5.5), order(11) + {Nm=Nk+Np;}; + + void Parse(char **argv, int argc); +}; + + +void CmdJobParams::Parse(char **argv,int argc) +{ + std::string arg; + std::vector vi; + double re,im; + int expect, idx; + std::string vstr; + std::ifstream pfile; + + if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){ + gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf"); + } + + if( GridCmdOptionExists(argv,argv+argc,"--phase") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--phase"); + pfile.open(arg); + assert(pfile); + expect = 0; + while( pfile >> vstr ) { + if ( vstr.compare("boundary_phase") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(expect==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + boundary_phase.push_back({re,im}); + expect++; + } + } + pfile.close(); + } else { + for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.}); + } + + if( GridCmdOptionExists(argv,argv+argc,"--omega") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--omega"); + pfile.open(arg); + assert(pfile); + Ls = 0; + while( pfile >> vstr ) { + if ( vstr.compare("omega") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(Ls==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + omega.push_back({re,im}); + Ls++; + } + } + pfile.close(); + } else { + if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--Ls"); + GridCmdOptionInt(arg,Ls); + } + } + + if( GridCmdOptionExists(argv,argv+argc,"--mass") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mass"); + GridCmdOptionFloat(arg,mass); + } + + if( GridCmdOptionExists(argv,argv+argc,"--M5") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--M5"); + GridCmdOptionFloat(arg,M5); + } + + if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b"); + GridCmdOptionFloat(arg,mob_b); + } + + if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--irbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::irbl; + Nm = Nk+Np; + } + + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--rbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; // vector space is enlarged by adding Np vectors + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::rbl; + Nm = Nk+Np*MaxIter; + } + +#if 1 + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--split") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--split"); + GridCmdOptionIntVector(arg,vi); + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + // ypj [note] why seed RNG5 again? bug? In this case, run with a default seed(). + GridParallelRNG RNG5rb(FrbGrid); RNG5rb.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + std::vector U(4,UGrid); + + if ( JP.gaugefile.compare("Hot") == 0 ) { + SU3::HotConfiguration(RNG4, Umu); + } else { + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,JP.gaugefile); + // ypj [fixme] additional checks for the loaded configuration? + } + + for(int mu=0;mu(Umu,mu); + } + + RealD mass = JP.mass; + RealD M5 = JP.M5; + +// ypj [fixme] flexible support for a various Fermions +// RealD mob_b = JP.mob_b; // Gparity +// std::vector omega; // ZMobius + +// GparityMobiusFermionD ::ImplParams params; +// std::vector twists({1,1,1,0}); +// params.twists = twists; +// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// SchurDiagTwoOperator HermOp(Ddwf); + + +// int mrhs = JP.Nu; + int Ndir=4; + auto mpi_layout = GridDefaultMpi(); + std::vector mpi_split (Ndir,1); +#if 0 + int tmp=mrhs, dir=0; + std::cout << GridLogMessage << "dir= "<_processor,re); + src_tmp=re; + pickCheckerboard(Odd,src[i],src_tmp); + } + RNG5.Report(); +} else { + std::cout << GridLogMessage << "Using RNG5rb"< evec(JP.Nm,FrbGrid); + for(int i=0;i<1;++i){ + std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl; + }; + + int Nconv; + IRBL.calc(eval,evec,src,Nconv,JP.Impl); + + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_dwf_block_lanczos.cc.single b/tests/lanczos/Test_dwf_block_lanczos.cc.single new file mode 100644 index 00000000..7449e32a --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.cc.single @@ -0,0 +1,408 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_block_lanczos.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +using namespace std; +using namespace Grid; +//using namespace Grid::QCD; + +//typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename ZMobiusFermionF::FermionField FermionField; + +RealD AllZero(RealD x){ return 0.;} + +class CmdJobParams +{ + public: + std::string gaugefile; + + int Ls; + double mass; + double M5; + double mob_b; + std::vector omega; + std::vector boundary_phase; + std::vector mpi_split; + + LanczosType Impl; + int Nu; + int Nk; + int Np; + int Nm; + int Nstop; + int Ntest; + int MaxIter; + double resid; + + double low; + double high; + int order; + + CmdJobParams() + : gaugefile("Hot"), + Ls(8), mass(0.01), M5(1.8), mob_b(1.5), + Impl(LanczosType::irbl),mpi_split(4,1), + Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8), + low(0.2), high(5.5), order(11) + {Nm=Nk+Np;}; + + void Parse(char **argv, int argc); +}; + + +void CmdJobParams::Parse(char **argv,int argc) +{ + std::string arg; + std::vector vi; + double re,im; + int expect, idx; + std::string vstr; + std::ifstream pfile; + + if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){ + gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf"); + } + + if( GridCmdOptionExists(argv,argv+argc,"--phase") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--phase"); + pfile.open(arg); + assert(pfile); + expect = 0; + while( pfile >> vstr ) { + if ( vstr.compare("boundary_phase") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(expect==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + boundary_phase.push_back({re,im}); + expect++; + } + } + pfile.close(); + } else { + for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.}); + } + + if( GridCmdOptionExists(argv,argv+argc,"--omega") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--omega"); + pfile.open(arg); + assert(pfile); + Ls = 0; + while( pfile >> vstr ) { + if ( vstr.compare("omega") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(Ls==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + omega.push_back({re,im}); + Ls++; + } + } + pfile.close(); + } else { + if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--Ls"); + GridCmdOptionInt(arg,Ls); + } + } + + if( GridCmdOptionExists(argv,argv+argc,"--mass") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mass"); + GridCmdOptionFloat(arg,mass); + } + + if( GridCmdOptionExists(argv,argv+argc,"--M5") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--M5"); + GridCmdOptionFloat(arg,M5); + } + + if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b"); + GridCmdOptionFloat(arg,mob_b); + } + + if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--irbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::irbl; + Nm = Nk+Np; + } + + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--rbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; // vector space is enlarged by adding Np vectors + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::rbl; + Nm = Nk+Np*MaxIter; + } + +#if 1 + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--split") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--split"); + GridCmdOptionIntVector(arg,vi); + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + // ypj [note] why seed RNG5 again? bug? In this case, run with a default seed(). + GridParallelRNG RNG5rb(FrbGridF); RNG5rb.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + std::vector U(4,UGrid); + LatticeGaugeFieldF UmuF(UGridF); + std::vector UF(4,UGridF); + + if ( JP.gaugefile.compare("Hot") == 0 ) { + SU3::HotConfiguration(RNG4, Umu); + } else { + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,JP.gaugefile); + // ypj [fixme] additional checks for the loaded configuration? + } + precisionChange (UmuF,Umu); + + for(int mu=0;mu(Umu,mu); + } + + RealD mass = JP.mass; + RealD M5 = JP.M5; + +// ypj [fixme] flexible support for a various Fermions +// RealD mob_b = JP.mob_b; // Gparity +// std::vector omega; // ZMobius + +// GparityMobiusFermionD ::ImplParams params; +// std::vector twists({1,1,1,0}); +// params.twists = twists; +// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// SchurDiagTwoOperator HermOp(Ddwf); + + +// int mrhs = JP.Nu; + int Ndir=4; + auto mpi_layout = GridDefaultMpi(); + std::vector mpi_split (Ndir,1); +#if 0 + int tmp=mrhs, dir=0; + std::cout << GridLogMessage << "dir= "<_processor,re); + src_tmp=re; + pickCheckerboard(Odd,src[i],src_tmp); + } + RNG5.Report(); +} else { + std::cout << GridLogMessage << "Using RNG5rb"< evec(JP.Nm,FrbGridF); + for(int i=0;i<1;++i){ + std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl; + }; + + int Nconv; + IRBL.calc(eval,evec,src,Nconv,JP.Impl); + + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_dwf_lanczos.cc b/tests/lanczos/Test_dwf_lanczos.cc index 1fe29bb2..2ee5299b 100644 --- a/tests/lanczos/Test_dwf_lanczos.cc +++ b/tests/lanczos/Test_dwf_lanczos.cc @@ -35,26 +35,43 @@ template struct Setup{}; template<> -struct Setup{ - static GparityMobiusFermionR* getAction(LatticeGaugeField &Umu, +struct Setup{ + static GparityMobiusFermionF* getAction(LatticeGaugeFieldF &Umu, GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ - RealD mass=0.01; + RealD mass=0.00054; RealD M5=1.8; RealD mob_b=1.5; GparityMobiusFermionD ::ImplParams params; std::vector twists({1,1,1,0}); params.twists = twists; - return new GparityMobiusFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); + return new GparityMobiusFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); } }; template<> -struct Setup{ - static DomainWallFermionR* getAction(LatticeGaugeField &Umu, +struct Setup{ + static DomainWallFermionF* getAction(LatticeGaugeFieldF &Umu, GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ - RealD mass=0.01; + RealD mass=0.00054; RealD M5=1.8; - return new DomainWallFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + return new DomainWallFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + } +}; + +template<> +struct Setup{ + static MobiusFermionF* getAction(LatticeGaugeFieldF &Umu, + GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ + RealD mass=0.00054; + RealD M5=1.8; + RealD mob_b=1.5; + std::vector boundary = {1,1,1,-1}; + MobiusFermionF::ImplParams Params(boundary); + + std::cout << GridLogMessage << "mass "<{ template void run(){ typedef typename Action::FermionField FermionField; - const int Ls=8; + const int Ls=12; GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid); +// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid); + + GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian* FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls, UGridF); + GridRedBlackCartesian* FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGridF); + std::vector seeds4({1,2,3,4}); std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGridF); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5rb(FrbGridF); RNG5.SeedFixedIntegers(seeds5); LatticeGaugeField Umu(UGrid); - SU::HotConfiguration(RNG4, Umu); +// SU::HotConfiguration(RNG4, Umu); + FieldMetaData header; + std::string file("./config"); - Action *action = Setup::getAction(Umu,FGrid,FrbGrid,UGrid,UrbGrid); +// int precision32 = 0; +// int tworow = 0; +// NerscIO::writeConfiguration(Umu,file,tworow,precision32); + NerscIO::readConfiguration(Umu,header,file); + + LatticeGaugeFieldF UmuF(UGridF); + precisionChange(UmuF, Umu); + + Action *action = Setup::getAction(UmuF,FGridF,FrbGridF,UGridF,UrbGridF); //MdagMLinearOperator HermOp(Ddwf); - SchurDiagTwoOperator HermOp(*action); +// SchurDiagTwoOperator HermOp(*action); + SchurDiagOneOperator HermOp(*action); - const int Nstop = 30; - const int Nk = 40; + const int Nstop = 150; + const int Nk = 160; const int Np = 40; const int Nm = Nk+Np; const int MaxIt= 10000; - RealD resid = 1.0e-8; + RealD resid = 1.0e-6; + std::cout << GridLogMessage << "Nstop "< Coeffs { 0.,-1.}; Polynomial PolyX(Coeffs); - Chebyshev Cheby(0.2,5.,11); + Chebyshev Cheby(0.0000006,5.5,4001); + std::cout << GridLogMessage << "Cheby(0.0000006,5.5,4001) "< OpCheby(Cheby,HermOp); - PlainHermOp Op (HermOp); + PlainHermOp Op (HermOp); ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); std::vector eval(Nm); - FermionField src(FrbGrid); + FermionField src(FrbGridF); gaussian(RNG5rb,src); - std::vector evec(Nm,FrbGrid); + std::vector evec(Nm,FrbGridF); for(int i=0;i<1;i++){ std::cout << GridLogMessage <(); + run(); }else if(action == "DWF"){ - run(); + run(); + }else if(action == "Mobius"){ + run(); }else{ std::cout << "Unknown action" << std::endl; exit(1); From dc6a38f17739ab60a0ceb5d422e18b18ba05efb5 Mon Sep 17 00:00:00 2001 From: Chulwoo Jung Date: Wed, 30 Nov 2022 17:13:12 -0500 Subject: [PATCH 2/2] Minor cleanup --- .../ImplicitlyRestartedBlockLanczos.h | 145 +----------------- tests/lanczos/Test_dwf_block_lanczos.cc | 4 +- 2 files changed, 4 insertions(+), 145 deletions(-) diff --git a/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h b/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h index fe388c19..c5d00722 100644 --- a/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h +++ b/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h @@ -7,9 +7,8 @@ Copyright (C) 2015 Author: Peter Boyle -Author: Chulwoo Jung Author: Yong-Chull Jang -Author: Guido Cossu +Author: Chulwoo Jung This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -185,10 +184,6 @@ public: { RealD nn = norm2(v); nn = sqrt(nn); -#if 0 - if(if_print && nn < 1e20) - Glog<<"normalize: "< 1e-14) - Glog<<"orthogonalize before: "< 1e-14) - Glog<<"orthogonalize after: "<GlobalSumVector((CUDA_FLOAT*)c_acc,2*Nu*Nevec_acc); -#if 0 - for (int i=0; i[" << j << "," << i << "] = " - << z.x << " + i " << z.y << std::endl; - } - w[i] = w[i] - ip * evec[b*Nevec_acc+j]; - } - } -#else alpha = MAKE_CUDA_COMPLEX(-1.0,0.0); beta = MAKE_CUDA_COMPLEX(1.0,0.0); stat = CUDA_GEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, 12*sites, Nu, Nevec_acc, @@ -340,9 +292,7 @@ public: &beta, w_acc, 12*sites); //Glog << stat << std::endl; -#endif } -#if 1 for (int col=0; col &inner, std::vector& lhs, int llhs, std::vector& rhs, int lrhs) -{ - typedef typename Field:vector_object vobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_typeD vector_type; - GridBase *grid = lhs[0]._grid; - assert(grid == rhs[0]._grid; - const int pad = 8; - int total = llhs*lrhs; - assert(inner.size()==total); - int sum_size=grid->SumArraySize(); - -// std::vector inner(total); - Vector sumarray(sum_size*pad*total); - - parallel_for(int thr=0;throSites(),thr,mywork,myoff); - - std::vector< decltype(innerProductD(lhs[0]._odata[0],rhs[0]._odata[0])) > vinner(total,zero); // private to thread; sub summation - for(int ss=myoff;ssGlobalSum(tmp); - inner[i]=tmp; - } -// return inner; -} -#endif - void orthogonalize_blockhead(Field& w, std::vector& evec, int k, int Nu) { @@ -839,14 +738,6 @@ cudaStat = cudaMallocManaged((void **)&evec_acc, Nevec_acc*sites*12*sizeof(CUDA_ Glog << fname + " CONVERGED ; Summary :\n"; // Sort convered eigenpairs. std::vector Btmp(Nstop,grid); // waste of space replicating -#if 0 - for(int i=0; i0) { - for (int u=0; u0) { - // orthogonalize_blockhead(w[0],evec,b,Nu); - // for (int u=1; uGlobalSumVector((double*)evec_tmp,2*NN*NN); } } - // Safer to sort instead of just reversing it, - // but the document of the routine says evals are sorted in increasing order. - // qr gives evals in decreasing order. -// for(int i=0;i +Author: Yong-Chull Jang +Author: Chulwoo Jung This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by