diff --git a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h index 31ac55e0..27fee791 100644 --- a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h +++ b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h @@ -108,7 +108,10 @@ NAMESPACE_BEGIN(Grid); GridStopWatch PrecChangeTimer; Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count - + + precisionChangeWorkspace pc_wk_sp_to_dp(DoublePrecGrid, SinglePrecGrid); + precisionChangeWorkspace pc_wk_dp_to_sp(SinglePrecGrid, DoublePrecGrid); + for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ //Compute double precision rsd and also new RHS vector. Linop_d.HermOp(sol_d, tmp_d); @@ -123,7 +126,7 @@ NAMESPACE_BEGIN(Grid); while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ?? PrecChangeTimer.Start(); - precisionChange(src_f, src_d); + precisionChange(src_f, src_d, pc_wk_dp_to_sp); PrecChangeTimer.Stop(); sol_f = Zero(); @@ -142,7 +145,7 @@ NAMESPACE_BEGIN(Grid); //Convert sol back to double and add to double prec solution PrecChangeTimer.Start(); - precisionChange(tmp_d, sol_f); + precisionChange(tmp_d, sol_f, pc_wk_sp_to_dp); PrecChangeTimer.Stop(); axpy(sol_d, 1.0, tmp_d, sol_d); diff --git a/Grid/algorithms/iterative/ConjugateGradientMultiShiftCleanup.h b/Grid/algorithms/iterative/ConjugateGradientMultiShiftCleanup.h new file mode 100644 index 00000000..17714f09 --- /dev/null +++ b/Grid/algorithms/iterative/ConjugateGradientMultiShiftCleanup.h @@ -0,0 +1,373 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle +Author: Christopher Kelly + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision. +//The residual is stored in single precision, but the search directions and solution are stored in double precision. +//Every update_freq iterations the residual is corrected in double precision. +//For safety the a final regular CG is applied to clean up if necessary + +//PB Pure single, then double fixup + +template::value == 2, int>::type = 0, + typename std::enable_if< getPrecision::value == 1, int>::type = 0> +class ConjugateGradientMultiShiftMixedPrecCleanup : public OperatorMultiFunction, + public OperatorFunction +{ +public: + + using OperatorFunction::operator(); + + RealD Tolerance; + Integer MaxIterationsMshift; + Integer MaxIterations; + Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion + std::vector IterationsToCompleteShift; // Iterations for this shift + int verbose; + MultiShiftFunction shifts; + std::vector TrueResidualShift; + + int ReliableUpdateFreq; //number of iterations between reliable updates + + GridBase* SinglePrecGrid; //Grid for single-precision fields + LinearOperatorBase &Linop_f; //single precision + + ConjugateGradientMultiShiftMixedPrecCleanup(Integer maxit, const MultiShiftFunction &_shifts, + GridBase* _SinglePrecGrid, LinearOperatorBase &_Linop_f, + int _ReliableUpdateFreq) : + MaxIterationsMshift(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq), + MaxIterations(20000) + { + verbose=1; + IterationsToCompleteShift.resize(_shifts.order); + TrueResidualShift.resize(_shifts.order); + } + + void operator() (LinearOperatorBase &Linop, const FieldD &src, FieldD &psi) + { + GridBase *grid = src.Grid(); + int nshift = shifts.order; + std::vector results(nshift,grid); + (*this)(Linop,src,results,psi); + } + void operator() (LinearOperatorBase &Linop, const FieldD &src, std::vector &results, FieldD &psi) + { + int nshift = shifts.order; + + (*this)(Linop,src,results); + + psi = shifts.norm*src; + for(int i=0;i &Linop_d, const FieldD &src_d, std::vector &psi_d) + { + GRID_TRACE("ConjugateGradientMultiShiftMixedPrecCleanup"); + GridBase *DoublePrecGrid = src_d.Grid(); + + //////////////////////////////////////////////////////////////////////// + // Convenience references to the info stored in "MultiShiftFunction" + //////////////////////////////////////////////////////////////////////// + int nshift = shifts.order; + + std::vector &mass(shifts.poles); // Make references to array in "shifts" + std::vector &mresidual(shifts.tolerances); + std::vector alpha(nshift,1.0); + + //Double precision search directions + FieldD p_d(DoublePrecGrid); + std::vector ps_f (nshift, SinglePrecGrid);// Search directions (single precision) + std::vector psi_f(nshift, SinglePrecGrid);// solutions (single precision) + + FieldD tmp_d(DoublePrecGrid); + FieldD r_d(DoublePrecGrid); + FieldF r_f(SinglePrecGrid); + FieldD mmp_d(DoublePrecGrid); + + assert(psi_d.size()==nshift); + assert(mass.size()==nshift); + assert(mresidual.size()==nshift); + + // dynamic sized arrays on stack; 2d is a pain with vector + RealD bs[nshift]; + RealD rsq[nshift]; + RealD rsqf[nshift]; + RealD z[nshift][2]; + int converged[nshift]; + + const int primary =0; + + //Primary shift fields CG iteration + RealD a,b,c,d; + RealD cp,bp,qq; //prev + + // Matrix mult fields + FieldF p_f(SinglePrecGrid); + FieldF mmp_f(SinglePrecGrid); + + // Check lightest mass + for(int s=0;s= mass[primary] ); + converged[s]=0; + } + + // Wire guess to zero + // Residuals "r" are src + // First search direction "p" is also src + cp = norm2(src_d); + + // Handle trivial case of zero src. + if( cp == 0. ){ + for(int s=0;s= rsq[s]){ + CleanupTimer.Start(); + std::cout< Linop_shift_d(Linop_d, mass[s]); + ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop Linop_shift_f(Linop_f, mass[s]); + + MixedPrecisionConjugateGradient cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d); + cg(src_d, psi_d[s]); + + TrueResidualShift[s] = cg.TrueResidual; + CleanupTimer.Stop(); + } + } + + std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrecCleanup: Time Breakdown for body"<::operator(); RealD Tolerance; + Integer MaxIterationsMshift; Integer MaxIterations; Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion std::vector IterationsToCompleteShift; // Iterations for this shift @@ -95,9 +96,9 @@ public: ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts, GridBase* _SinglePrecGrid, LinearOperatorBase &_Linop_f, - int _ReliableUpdateFreq - ) : - MaxIterations(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq) + int _ReliableUpdateFreq) : + MaxIterationsMshift(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq), + MaxIterations(20000) { verbose=1; IterationsToCompleteShift.resize(_shifts.order); @@ -130,6 +131,9 @@ public: GRID_TRACE("ConjugateGradientMultiShiftMixedPrec"); GridBase *DoublePrecGrid = src_d.Grid(); + precisionChangeWorkspace pc_wk_s_to_d(DoublePrecGrid,SinglePrecGrid); + precisionChangeWorkspace pc_wk_d_to_s(SinglePrecGrid,DoublePrecGrid); + //////////////////////////////////////////////////////////////////////// // Convenience references to the info stored in "MultiShiftFunction" //////////////////////////////////////////////////////////////////////// @@ -200,10 +204,10 @@ public: r_d = p_d; //MdagM+m[0] - precisionChangeFast(p_f,p_d); + precisionChange(p_f, p_d, pc_wk_d_to_s); Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp) - precisionChangeFast(tmp_d,mmp_f); + precisionChange(tmp_d, mmp_f, pc_wk_s_to_d); Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp) tmp_d = tmp_d - mmp_d; std::cout << " Testing operators match "< &Linop_f; LinearOperatorBase &Linop_d; GridBase* SinglePrecGrid; - RealD Delta; //reliable update parameter + RealD Delta; //reliable update parameter. A reliable update is performed when the residual drops by a factor of Delta relative to its value at the last update //Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single LinearOperatorBase *Linop_fallback; @@ -65,7 +65,9 @@ public: ErrorOnNoConverge(err_on_no_conv), DoFinalCleanup(true), Linop_fallback(NULL) - {}; + { + assert(Delta > 0. && Delta < 1. && "Expect 0 < Delta < 1"); + }; void setFallbackLinop(LinearOperatorBase &_Linop_fallback, const RealD _fallback_transition_tol){ Linop_fallback = &_Linop_fallback; @@ -116,9 +118,12 @@ public: } //Single prec initialization + precisionChangeWorkspace pc_wk_sp_to_dp(src.Grid(), SinglePrecGrid); + precisionChangeWorkspace pc_wk_dp_to_sp(SinglePrecGrid, src.Grid()); + FieldF r_f(SinglePrecGrid); r_f.Checkerboard() = r.Checkerboard(); - precisionChange(r_f, r); + precisionChange(r_f, r, pc_wk_dp_to_sp); FieldF psi_f(r_f); psi_f = Zero(); @@ -134,7 +139,8 @@ public: GridStopWatch LinalgTimer; GridStopWatch MatrixTimer; GridStopWatch SolverTimer; - + GridStopWatch PrecChangeTimer; + SolverTimer.Start(); int k = 0; int l = 0; @@ -173,7 +179,9 @@ public: // Stopping condition if (cp <= rsq) { //Although not written in the paper, I assume that I have to add on the final solution - precisionChange(mmp, psi_f); + PrecChangeTimer.Start(); + precisionChange(mmp, psi_f, pc_wk_sp_to_dp); + PrecChangeTimer.Stop(); psi = psi + mmp; @@ -194,7 +202,10 @@ public: std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() < +Author: Yong-Chull Jang +Author: Chulwoo Jung + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_IRBL_H +#define GRID_IRBL_H + +#include //memset +#ifdef USE_LAPACK +#include +#endif + +#undef USE_LAPACK +#define Glog std::cout << GridLogMessage + +#ifdef GRID_CUDA +#include "cublas_v2.h" +#endif + +#if 0 +#define CUDA_COMPLEX cuDoubleComplex +#define CUDA_FLOAT double +#define MAKE_CUDA_COMPLEX make_cuDoubleComplex +#define CUDA_GEMM cublasZgemm +#else +#define CUDA_COMPLEX cuComplex +#define CUDA_FLOAT float +#define MAKE_CUDA_COMPLEX make_cuComplex +#define CUDA_GEMM cublasCgemm +#endif + +namespace Grid { + +//////////////////////////////////////////////////////////////////////////////// +// Helper class for sorting the evalues AND evectors by Field +// Use pointer swizzle on vectors SHOULD GET RID OF IT SOON! +//////////////////////////////////////////////////////////////////////////////// +template +class SortEigen { + private: + static bool less_lmd(RealD left,RealD right){ + return left > right; + } + static bool less_pair(std::pair& left, + std::pair& right){ + return left.first > (right.first); + } + + public: + void push(std::vector& lmd,std::vector& evec,int N) { + + //////////////////////////////////////////////////////////////////////// + // PAB: FIXME: VERY VERY VERY wasteful: takes a copy of the entire vector set. + // : The vector reorder should be done by pointer swizzle somehow + //////////////////////////////////////////////////////////////////////// + std::vector cpy(lmd.size(),evec[0].Grid()); + for(int i=0;i > emod(lmd.size()); + + for(int i=0;i(lmd[i],&cpy[i]); + + partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); + + typename std::vector >::iterator it = emod.begin(); + for(int i=0;ifirst; + evec[i]=*(it->second); + ++it; + } + } + void push(std::vector& lmd,int N) { + std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); + } + bool saturated(RealD lmd, RealD thrs) { + return fabs(lmd) > fabs(thrs); + } +}; + +enum class LanczosType { irbl, rbl }; + +enum IRBLdiagonalisation { + IRBLdiagonaliseWithDSTEGR, + IRBLdiagonaliseWithQR, + IRBLdiagonaliseWithEigen +}; + +///////////////////////////////////////////////////////////// +// Implicitly restarted block lanczos +///////////////////////////////////////////////////////////// +template +class ImplicitlyRestartedBlockLanczos { + +private: + + std::string cname = std::string("ImplicitlyRestartedBlockLanczos"); + int MaxIter; // Max iterations + int Nstop; // Number of evecs checked for convergence + int Nu; // Number of vecs in the unit block + int Nk; // Number of converged sought + int Nm; // total number of vectors + int Nblock_k; // Nk/Nu + int Nblock_m; // Nm/Nu + int Nconv_test_interval; // Number of skipped vectors when checking a convergence + RealD eresid; + IRBLdiagonalisation diagonalisation; + //////////////////////////////////// + // Embedded objects + //////////////////////////////////// + SortEigen _sort; + LinearOperatorBase &_Linop; + LinearOperatorBase &_SLinop;//for split + OperatorFunction &_poly; + GridRedBlackCartesian * f_grid; + GridRedBlackCartesian * sf_grid; + int mrhs; + ///////////////////////// + // BLAS objects + ///////////////////////// +#ifdef GRID_CUDA + cudaError_t cudaStat; + CUDA_COMPLEX *w_acc, *evec_acc, *c_acc; +#endif + int Nevec_acc; // Number of eigenvectors stored in the buffer evec_acc + + ///////////////////////// + // Constructor + ///////////////////////// +public: + int split_test; //test split in the first iteration + ImplicitlyRestartedBlockLanczos(LinearOperatorBase &Linop, // op + LinearOperatorBase &SLinop, // op + GridRedBlackCartesian * FrbGrid, + GridRedBlackCartesian * SFrbGrid, + int _mrhs, + OperatorFunction & poly, // polynomial + int _Nstop, // really sought vecs + int _Nconv_test_interval, // conv check interval + int _Nu, // vecs in the unit block + int _Nk, // sought vecs + int _Nm, // total vecs + RealD _eresid, // resid in lmd deficit + int _MaxIter, // Max iterations + IRBLdiagonalisation _diagonalisation = IRBLdiagonaliseWithEigen) + : _Linop(Linop), _SLinop(SLinop), _poly(poly),sf_grid(SFrbGrid),f_grid(FrbGrid), + Nstop(_Nstop), Nconv_test_interval(_Nconv_test_interval), mrhs(_mrhs), + Nu(_Nu), Nk(_Nk), Nm(_Nm), + Nblock_m(_Nm/_Nu), Nblock_k(_Nk/_Nu), + //eresid(_eresid), MaxIter(10), + eresid(_eresid), MaxIter(_MaxIter), + diagonalisation(_diagonalisation),split_test(0), + Nevec_acc(_Nu) + { assert( (Nk%Nu==0) && (Nm%Nu==0) ); }; + + //////////////////////////////// + // Helpers + //////////////////////////////// + static RealD normalize(Field& v, int if_print=0) + { + RealD nn = norm2(v); + nn = sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void orthogonalize(Field& w, std::vector& evec, int k, int if_print=0) + { + typedef typename Field::scalar_type MyComplex; +// MyComplex ip; + ComplexD ip; + + for(int j=0; j 1e-14) + Glog<<"orthogonalize before: "< 1e-14) + Glog<<"orthogonalize after: "<& evec, int k) + { + orthogonalize(w, evec, k,1); + } + + void orthogonalize(std::vector& w, int _Nu, std::vector& evec, int k, int if_print=0) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; +// ComplexD ip; + + for(int j=0; j& w, std::vector& evec, int R, int do_print=0) + { +#ifdef GRID_CUDA + Glog << "cuBLAS orthogonalize" << std::endl; + + typedef typename Field::vector_object vobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef typename Field::scalar_type MyComplex; + + GridBase *grid = w[0].Grid(); + const uint64_t sites = grid->lSites(); + + int Nbatch = R/Nevec_acc; + assert( R%Nevec_acc == 0 ); +// Glog << "nBatch, Nevec_acc, R, Nu = " +// << Nbatch << "," << Nevec_acc << "," << R << "," << Nu << std::endl; + + for (int col=0; col(&w_v[0]); +// Glog << "col= "<(&evec_v[0]); +// Glog << "col= "<& evec, int k, int Nu) + { + typedef typename Field::scalar_type MyComplex; + MyComplex ip; + + for(int j=0; j& eval, + std::vector& evec, + const std::vector& src, int& Nconv, LanczosType Impl) + { +#ifdef GRID_CUDA + GridBase *grid = src[0].Grid(); + grid->show_decomposition(); + +// printf("GRID_CUDA\n"); + + // set eigenvector buffers for the cuBLAS calls + //const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites = grid->lSites(); + + cudaStat = cudaMallocManaged((void **)&w_acc, Nu*sites*12*sizeof(CUDA_COMPLEX)); +// Glog << "w_acc= "<& eval, + std::vector& evec, + const std::vector& src, int& Nconv) + { + std::string fname = std::string(cname+"::calc_irbl()"); + GridBase *grid = evec[0].Grid(); + assert(grid == src[0].Grid()); + assert( Nu = src.size() ); + + Glog << std::string(74,'*') << std::endl; + Glog << fname + " starting iteration 0 / "<< MaxIter<< std::endl; + Glog << std::string(74,'*') << std::endl; + Glog <<" -- seek Nk = "<< Nk <<" vectors"<< std::endl; + Glog <<" -- accept Nstop = "<< Nstop <<" vectors"<< std::endl; + Glog <<" -- total Nm = "<< Nm <<" vectors"<< std::endl; + Glog <<" -- size of eval = "<< eval.size() << std::endl; + Glog <<" -- size of evec = "<< evec.size() << std::endl; + if ( diagonalisation == IRBLdiagonaliseWithEigen ) { + Glog << "Diagonalisation is Eigen "<< std::endl; +#ifdef USE_LAPACK + } else if ( diagonalisation == IRBLdiagonaliseWithLAPACK ) { + Glog << "Diagonalisation is LAPACK "<< std::endl; +#endif + } else { + abort(); + } + Glog << std::string(74,'*') << std::endl; + + assert(Nm == evec.size() && Nm == eval.size()); + + std::vector> lmd(Nu,std::vector(Nm,0.0)); + std::vector> lme(Nu,std::vector(Nm,0.0)); + std::vector> lmd2(Nu,std::vector(Nm,0.0)); + std::vector> lme2(Nu,std::vector(Nm,0.0)); + std::vector eval2(Nm); + std::vector resid(Nk); + + Eigen::MatrixXcd Qt = Eigen::MatrixXcd::Zero(Nm,Nm); + Eigen::MatrixXcd Q = Eigen::MatrixXcd::Zero(Nm,Nm); + + std::vector Iconv(Nm); + std::vector B(Nm,grid); // waste of space replicating + + std::vector f(Nu,grid); + std::vector f_copy(Nu,grid); + Field v(grid); + + Nconv = 0; + + RealD beta_k; + + // set initial vector + for (int i=0; i& eval, + std::vector& evec, + const std::vector& src, int& Nconv) + { + std::string fname = std::string(cname+"::calc_rbl()"); + GridBase *grid = evec[0].Grid(); + assert(grid == src[0].Grid()); + assert( Nu = src.size() ); + + int Np = (Nm-Nk); + if (Np > 0 && MaxIter > 1) Np /= MaxIter; + int Nblock_p = Np/Nu; + for(int i=0;i< evec.size();i++) evec[0].Advise()=AdviseInfrequentUse; + + Glog << std::string(74,'*') << std::endl; + Glog << fname + " starting iteration 0 / "<< MaxIter<< std::endl; + Glog << std::string(74,'*') << std::endl; + Glog <<" -- seek (min) Nk = "<< Nk <<" vectors"<< std::endl; + Glog <<" -- seek (inc) Np = "<< Np <<" vectors"<< std::endl; + Glog <<" -- seek (max) Nm = "<< Nm <<" vectors"<< std::endl; + Glog <<" -- accept Nstop = "<< Nstop <<" vectors"<< std::endl; + Glog <<" -- size of eval = "<< eval.size() << std::endl; + Glog <<" -- size of evec = "<< evec.size() << std::endl; + if ( diagonalisation == IRBLdiagonaliseWithEigen ) { + Glog << "Diagonalisation is Eigen "<< std::endl; +#ifdef USE_LAPACK + } else if ( diagonalisation == IRBLdiagonaliseWithLAPACK ) { + Glog << "Diagonalisation is LAPACK "<< std::endl; +#endif + } else { + abort(); + } + Glog << std::string(74,'*') << std::endl; + + assert(Nm == evec.size() && Nm == eval.size()); + + std::vector> lmd(Nu,std::vector(Nm,0.0)); + std::vector> lme(Nu,std::vector(Nm,0.0)); + std::vector> lmd2(Nu,std::vector(Nm,0.0)); + std::vector> lme2(Nu,std::vector(Nm,0.0)); + std::vector eval2(Nk); + std::vector resid(Nm); + + Eigen::MatrixXcd Qt = Eigen::MatrixXcd::Zero(Nm,Nm); + Eigen::MatrixXcd Q = Eigen::MatrixXcd::Zero(Nm,Nm); + + std::vector Iconv(Nm); +// int Ntest=Nu; +// std::vector B(Nm,grid); // waste of space replicating + std::vector B(1,grid); // waste of space replicating + + std::vector f(Nu,grid); + std::vector f_copy(Nu,grid); + Field v(grid); + + Nconv = 0; + +// RealD beta_k; + + // set initial vector + for (int i=0; i Btmp(Nstop,grid); // waste of space replicating + + for(int i=0; i>& lmd, + std::vector>& lme, + std::vector& evec, + std::vector& w, + std::vector& w_copy, + int b) + { + const RealD tiny = 1.0e-20; + + int Nu = w.size(); + int Nm = evec.size(); + assert( b < Nm/Nu ); +// GridCartesian *grid = evec[0]._grid; + + // converts block index to full indicies for an interval [L,R) + int L = Nu*b; + int R = Nu*(b+1); + + Real beta; + + Glog << "Using split grid"<< std::endl; +// LatticeGaugeField s_Umu(SGrid); + assert((Nu%mrhs)==0); + std::vector in(mrhs,f_grid); + + Field s_in(sf_grid); + Field s_out(sf_grid); + // unnecessary copy. Can or should it be avoided? + int k_start = 0; +while ( k_start < Nu) { + Glog << "k_start= "<0) { + for (int u=0; u& eval, + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nk, int Nm, + Eigen::MatrixXcd & Qt, // Nm x Nm + GridBase *grid) + { + assert( Nk%Nu == 0 && Nm%Nu == 0 ); + assert( Nk <= Nm ); + Eigen::MatrixXcd BlockTriDiag = Eigen::MatrixXcd::Zero(Nk,Nk); + + for ( int u=0; u eigensolver(BlockTriDiag); + + for (int i = 0; i < Nk; i++) { + eval[Nk-1-i] = eigensolver.eigenvalues()(i); + } + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { + Qt(j,Nk-1-i) = eigensolver.eigenvectors()(j,i); + //Qt(Nk-1-i,j) = eigensolver.eigenvectors()(i,j); + //Qt(i,j) = eigensolver.eigenvectors()(i,j); + } + } + } + +#ifdef USE_LAPACK + void diagonalize_lapack(std::vector& eval, + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nk, int Nm, + Eigen::MatrixXcd & Qt, // Nm x Nm + GridBase *grid) + { + Glog << "diagonalize_lapack: Nu= "<_Nprocessors; + int node = grid->_processor; + int interval = (NN/total)+1; + double vl = 0.0, vu = 0.0; + MKL_INT il = interval*node+1 , iu = interval*(node+1); + if (iu > NN) iu=NN; + Glog << "node "<= il-1; i--){ + evals_tmp[i] = evals_tmp[i - (il-1)]; + if (il>1) evals_tmp[i-(il-1)]=0.; + for (int j = 0; j< NN; j++){ + evec_tmp[i*NN+j] = evec_tmp[(i - (il-1))*NN+j]; + if (il>1) { + evec_tmp[(i-(il-1))*NN+j].imag=0.; + evec_tmp[(i-(il-1))*NN+j].real=0.; + } + } + } + } + { + grid->GlobalSumVector(evals_tmp,NN); + grid->GlobalSumVector((double*)evec_tmp,2*NN*NN); + } + } + for (int i = 0; i < Nk; i++) + eval[Nk-1-i] = evals_tmp[i]; + for (int i = 0; i < Nk; i++) { + for (int j = 0; j < Nk; j++) { +// Qt(j,Nk-1-i) = eigensolver.eigenvectors()(j,i); + Qt(j,Nk-1-i)=std::complex + ( evec_tmp[i*Nk+j].real, + evec_tmp[i*Nk+j].imag); +// ( evec_tmp[(Nk-1-j)*Nk+Nk-1-i].real, +// evec_tmp[(Nk-1-j)*Nk+Nk-1-i].imag); + + } + } + +if (1){ + Eigen::SelfAdjointEigenSolver eigensolver(BlockTriDiag); + + for (int i = 0; i < Nk; i++) { + Glog << "eval = "<& eval, + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nk, int Nm, + Eigen::MatrixXcd & Qt, + GridBase *grid) + { + Qt = Eigen::MatrixXcd::Identity(Nm,Nm); + if ( diagonalisation == IRBLdiagonaliseWithEigen ) { + diagonalize_Eigen(eval,lmd,lme,Nu,Nk,Nm,Qt,grid); +#ifdef USE_LAPACK + } else if ( diagonalisation == IRBLdiagonaliseWithLAPACK ) { + diagonalize_lapack(eval,lmd,lme,Nu,Nk,Nm,Qt,grid); +#endif + } else { + assert(0); + } + } + + + void unpackHermitBlockTriDiagMatToEigen( + std::vector>& lmd, + std::vector>& lme, + int Nu, int Nb, int Nk, int Nm, + Eigen::MatrixXcd& M) + { + //Glog << "unpackHermitBlockTriDiagMatToEigen() begin" << '\n'; + assert( Nk%Nu == 0 && Nm%Nu == 0 ); + assert( Nk <= Nm ); + M = Eigen::MatrixXcd::Zero(Nk,Nk); + + // rearrange + for ( int u=0; u>& lmd, + std::vector>& lme, + int Nu, int Nb, int Nk, int Nm, + Eigen::MatrixXcd& M) + { + //Glog << "packHermitBlockTriDiagMatfromEigen() begin" << '\n'; + assert( Nk%Nu == 0 && Nm%Nu == 0 ); + assert( Nk <= Nm ); + + // rearrange + for ( int u=0; u QRD(Mtmp); + Q = QRD.householderQ(); + R = QRD.matrixQR(); // upper triangular part is the R matrix. + // lower triangular part used to represent series + // of Q sequence. + + // equivalent operation of Qprod *= Q + //M = Eigen::MatrixXcd::Zero(Nm,Nm); + + //for (int i=0; i Nm) kmax = Nm; + for (int k=i; ki) M(i,j) = conj(M(j,i)); + // if (i-j > Nu || j-i > Nu) M(i,j) = 0.; + // } + //} + + //Glog << "shiftedQRDecompEigen() end" << endl; + } + + void exampleQRDecompEigen(void) + { + Eigen::MatrixXd A = Eigen::MatrixXd::Zero(3,3); + Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(3,3); + Eigen::MatrixXd R = Eigen::MatrixXd::Zero(3,3); + Eigen::MatrixXd P = Eigen::MatrixXd::Zero(3,3); + + A(0,0) = 12.0; + A(0,1) = -51.0; + A(0,2) = 4.0; + A(1,0) = 6.0; + A(1,1) = 167.0; + A(1,2) = -68.0; + A(2,0) = -4.0; + A(2,1) = 24.0; + A(2,2) = -41.0; + + Glog << "matrix A before ColPivHouseholder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + + Eigen::ColPivHouseholderQR QRD(A); + + Glog << "matrix A after ColPivHouseholder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "HouseholderQ with sequence lenth = nonzeroPiviots" << std::endl; + Q = QRD.householderQ().setLength(QRD.nonzeroPivots()); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "HouseholderQ with sequence lenth = 1" << std::endl; + Q = QRD.householderQ().setLength(1); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "HouseholderQ with sequence lenth = 2" << std::endl; + Q = QRD.householderQ().setLength(2); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "matrixR" << std::endl; + R = QRD.matrixR(); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "R[" << i << "," << j << "] = " << R(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "rank = " << QRD.rank() << std::endl; + Glog << "threshold = " << QRD.threshold() << std::endl; + + Glog << "matrixP" << std::endl; + P = QRD.colsPermutation(); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "P[" << i << "," << j << "] = " << P(i,j) << '\n'; + } + } + Glog << std::endl; + + + Glog << "QR decomposition without column pivoting" << std::endl; + + A(0,0) = 12.0; + A(0,1) = -51.0; + A(0,2) = 4.0; + A(1,0) = 6.0; + A(1,1) = 167.0; + A(1,2) = -68.0; + A(2,0) = -4.0; + A(2,1) = 24.0; + A(2,2) = -41.0; + + Glog << "matrix A before Householder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + + Eigen::HouseholderQR QRDplain(A); + + Glog << "HouseholderQ" << std::endl; + Q = QRDplain.householderQ(); + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "Q[" << i << "," << j << "] = " << Q(i,j) << '\n'; + } + } + Glog << std::endl; + + Glog << "matrix A after Householder" << std::endl; + for ( int i=0; i<3; i++ ) { + for ( int j=0; j<3; j++ ) { + Glog << "A[" << i << "," << j << "] = " << A(i,j) << '\n'; + } + } + Glog << std::endl; + } + + }; +} +#undef Glog +#undef USE_LAPACK +#undef CUDA_COMPLEX +#undef CUDA_FLOAT +#undef MAKE_CUDA_COMPLEX +#undef CUDA_GEMM +#endif diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index dc860d8e..b0b759b5 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -248,7 +248,7 @@ public: /////////////////////////////////////////// // user defined constructor /////////////////////////////////////////// - Lattice(GridBase *grid,ViewMode mode=AcceleratorWrite) { + Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) { this->_grid = grid; resize(this->_grid->oSites()); assert((((uint64_t)&this->_odata[0])&0xF) ==0); diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index 6857dc84..180b8437 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -440,7 +440,17 @@ public: _grid->GlobalCoorToGlobalIndex(gcoor,gidx); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); +#if 1 assert(rank == _grid->ThisRank() ); +#else +// + if (rank != _grid->ThisRank() ){ + std::cout <<"rank "<ThisRank() "<<_grid->ThisRank()<< std::endl; +// exit(-42); +// assert(0); + } +#endif + int l_idx=generator_idx(o_idx,i_idx); _generators[l_idx] = master_engine; diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 3cdb75d1..6f9fc480 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -1080,6 +1080,7 @@ vectorizeFromRevLexOrdArray( std::vector &in, Lattice &out) }); } +//Very fast precision change. Requires in/out objects to reside on same Grid (e.g. by using double2 for the double-precision field) template void precisionChangeFast(Lattice &out, const Lattice &in) { @@ -1097,9 +1098,9 @@ void precisionChangeFast(Lattice &out, const Lattice &in) precisionChange(vout,vin,N); }); } -//Convert a Lattice from one precision to another +//Convert a Lattice from one precision to another (original, slow implementation) template -void precisionChange(Lattice &out, const Lattice &in) +void precisionChangeOrig(Lattice &out, const Lattice &in) { assert(out.Grid()->Nd() == in.Grid()->Nd()); for(int d=0;dNd();d++){ @@ -1145,6 +1146,128 @@ void precisionChange(Lattice &out, const Lattice &in) }); } +//The workspace for a precision change operation allowing for the reuse of the mapping to save time on subsequent calls +class precisionChangeWorkspace{ + std::pair* fmap_device; //device pointer + //maintain grids for checking + GridBase* _out_grid; + GridBase* _in_grid; +public: + precisionChangeWorkspace(GridBase *out_grid, GridBase *in_grid): _out_grid(out_grid), _in_grid(in_grid){ + //Build a map between the sites and lanes of the output field and the input field as we cannot use the Grids on the device + assert(out_grid->Nd() == in_grid->Nd()); + for(int d=0;dNd();d++){ + assert(out_grid->FullDimensions()[d] == in_grid->FullDimensions()[d]); + } + int Nsimd_out = out_grid->Nsimd(); + + std::vector out_icorrs(out_grid->Nsimd()); //reuse these + for(int lane=0; lane < out_grid->Nsimd(); lane++) + out_grid->iCoorFromIindex(out_icorrs[lane], lane); + + std::vector > fmap_host(out_grid->lSites()); //lsites = osites*Nsimd + thread_for(out_oidx,out_grid->oSites(),{ + Coordinate out_ocorr; + out_grid->oCoorFromOindex(out_ocorr, out_oidx); + + Coordinate lcorr; //the local coordinate (common to both in and out as full coordinate) + for(int out_lane=0; out_lane < Nsimd_out; out_lane++){ + out_grid->InOutCoorToLocalCoor(out_ocorr, out_icorrs[out_lane], lcorr); + + //int in_oidx = in_grid->oIndex(lcorr), in_lane = in_grid->iIndex(lcorr); + //Note oIndex and OcorrFromOindex (and same for iIndex) are not inverse for checkerboarded lattice, the former coordinates being defined on the full lattice and the latter on the reduced lattice + //Until this is fixed we need to circumvent the problem locally. Here I will use the coordinates defined on the reduced lattice for simplicity + int in_oidx = 0, in_lane = 0; + for(int d=0;d_ndimension;d++){ + in_oidx += in_grid->_ostride[d] * ( lcorr[d] % in_grid->_rdimensions[d] ); + in_lane += in_grid->_istride[d] * ( lcorr[d] / in_grid->_rdimensions[d] ); + } + fmap_host[out_lane + Nsimd_out*out_oidx] = std::pair( in_oidx, in_lane ); + } + }); + + //Copy the map to the device (if we had a way to tell if an accelerator is in use we could avoid this copy for CPU-only machines) + size_t fmap_bytes = out_grid->lSites() * sizeof(std::pair); + fmap_device = (std::pair*)acceleratorAllocDevice(fmap_bytes); + acceleratorCopyToDevice(fmap_host.data(), fmap_device, fmap_bytes); + } + + //Prevent moving or copying + precisionChangeWorkspace(const precisionChangeWorkspace &r) = delete; + precisionChangeWorkspace(precisionChangeWorkspace &&r) = delete; + precisionChangeWorkspace &operator=(const precisionChangeWorkspace &r) = delete; + precisionChangeWorkspace &operator=(precisionChangeWorkspace &&r) = delete; + + std::pair const* getMap() const{ return fmap_device; } + + void checkGrids(GridBase* out, GridBase* in) const{ + conformable(out, _out_grid); + conformable(in, _in_grid); + } + + ~precisionChangeWorkspace(){ + acceleratorFreeDevice(fmap_device); + } +}; + + +//We would like to use precisionChangeFast when possible. However usage of this requires the Grids to be the same (runtime check) +//*and* the precisionChange(VobjOut::vector_type, VobjIn, int) function to be defined for the types; this requires an extra compile-time check which we do using some SFINAE trickery +template +auto _precisionChangeFastWrap(Lattice &out, const Lattice &in, int dummy)->decltype( precisionChange( ((typename VobjOut::vector_type*)0), ((typename VobjIn::vector_type*)0), 1), int()){ + if(out.Grid() == in.Grid()){ + precisionChangeFast(out,in); + return 1; + }else{ + return 0; + } +} +template +int _precisionChangeFastWrap(Lattice &out, const Lattice &in, long dummy){ //note long here is intentional; it means the above is preferred if available + return 0; +} + + +//Convert a lattice of one precision to another. Much faster than original implementation but requires a pregenerated workspace +//which contains the mapping data. +template +void precisionChange(Lattice &out, const Lattice &in, const precisionChangeWorkspace &workspace){ + if(_precisionChangeFastWrap(out,in,0)) return; + + static_assert( std::is_same::value == 1, "precisionChange: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same + + out.Checkerboard() = in.Checkerboard(); + constexpr int Nsimd_out = VobjOut::Nsimd(); + + workspace.checkGrids(out.Grid(),in.Grid()); + std::pair const* fmap_device = workspace.getMap(); + + //Do the copy/precision change + autoView( out_v , out, AcceleratorWrite); + autoView( in_v , in, AcceleratorRead); + + accelerator_for(out_oidx, out.Grid()->oSites(), 1,{ + std::pair const* fmap_osite = fmap_device + out_oidx*Nsimd_out; + for(int out_lane=0; out_lane < Nsimd_out; out_lane++){ + int in_oidx = fmap_osite[out_lane].first; + int in_lane = fmap_osite[out_lane].second; + copyLane(out_v[out_oidx], out_lane, in_v[in_oidx], in_lane); + } + }); +} + +//Convert a Lattice from one precision to another. Much faster than original implementation but slower than precisionChangeFast +//or precisionChange called with pregenerated workspace, as it needs to internally generate the workspace on the host and copy to device +template +void precisionChange(Lattice &out, const Lattice &in){ + if(_precisionChangeFastWrap(out,in,0)) return; + precisionChangeWorkspace workspace(out.Grid(), in.Grid()); + precisionChange(out, in, workspace); +} + + + + //////////////////////////////////////////////////////////////////////////////// // Communicate between grids //////////////////////////////////////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/WilsonCompressor.h b/Grid/qcd/action/fermion/WilsonCompressor.h index b2c07d18..72f8b810 100644 --- a/Grid/qcd/action/fermion/WilsonCompressor.h +++ b/Grid/qcd/action/fermion/WilsonCompressor.h @@ -36,7 +36,7 @@ NAMESPACE_BEGIN(Grid); // Wilson compressor will need FaceGather policies for: // Periodic, Dirichlet, and partial Dirichlet for DWF /////////////////////////////////////////////////////////////// -const int dwf_compressor_depth=1; +const int dwf_compressor_depth=2; #define DWF_COMPRESS class FaceGatherPartialDWF { diff --git a/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h b/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h index cb680f2f..f237dee4 100644 --- a/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h +++ b/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h @@ -127,6 +127,8 @@ NAMESPACE_BEGIN(Grid); ApproxNegPowerAction.tolerances[i] = action_tolerance[i]; ApproxHalfPowerAction.tolerances[i] = action_tolerance[i]; ApproxNegHalfPowerAction.tolerances[i]= action_tolerance[i]; + } + for(int i=0;i + NAMESPACE_BEGIN(Grid); ///////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -58,7 +60,7 @@ NAMESPACE_BEGIN(Grid); //Allow derived classes to override the multishift CG virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){ #if 0 - SchurDifferentiableOperator schurOp(numerator ? NumOp : DenOp); + SchurDifferentiableOperator schurOp(numerator ? NumOpD : DenOpD); ConjugateGradientMultiShift msCG(MaxIter, approx); msCG(schurOp,in, out); #else @@ -66,7 +68,8 @@ NAMESPACE_BEGIN(Grid); SchurDifferentiableOperator schurOpF(numerator ? NumOpF : DenOpF); FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid()); FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid()); - + + // Action better with higher precision? ConjugateGradientMultiShiftMixedPrec msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq); precisionChange(inD2,in); std::cout << "msCG single solve "< &out_elems, FermionFieldD &out){ SchurDifferentiableOperator schurOpD2(numerator ? NumOpD2 : DenOpD2); - SchurDifferentiableOperator schurOpF(numerator ? NumOpF : DenOpF); + SchurDifferentiableOperator schurOpF (numerator ? NumOpF : DenOpF); FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid()); FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid()); std::vector out_elemsD2(out_elems.size(),NumOpD2.FermionRedBlackGrid()); - ConjugateGradientMultiShiftMixedPrec msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq); + ConjugateGradientMultiShiftMixedPrecCleanup msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq); precisionChange(inD2,in); std::cout << "msCG in "<deriv_us*1.0e-6<<" s"<< std::endl; } } + std::cout << GridLogMessage << "--------------------------- "< > & table) { diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 2b43e71f..f052cb25 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -120,6 +120,12 @@ void Gather_plane_exchange_table(commVector >& table, } */ +void DslashResetCounts(void); +void DslashGetCounts(uint64_t &dirichlet,uint64_t &partial,uint64_t &full); +void DslashLogFull(void); +void DslashLogPartial(void); +void DslashLogDirichlet(void); + struct StencilEntry { #ifdef GRID_CUDA uint64_t _byte_offset; // 8 bytes @@ -312,6 +318,7 @@ public: int face_table_computed; int partialDirichlet; + int fullDirichlet; std::vector > > face_table ; Vector surface_list; @@ -442,6 +449,9 @@ public: void CommunicateComplete(std::vector > &reqs) { _grid->StencilSendToRecvFromComplete(MpiReqs,0); + if ( this->partialDirichlet ) DslashLogPartial(); + else if ( this->fullDirichlet ) DslashLogDirichlet(); + else DslashLogFull(); } //////////////////////////////////////////////////////////////////////// // Blocking send and receive. Either sequential or parallel. @@ -770,6 +780,10 @@ public: if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0); partialDirichlet = p.partialDirichlet; DirichletBlock(p.dirichlet); // comms send/recv set up + fullDirichlet=0; + for(int d=0;d accelerator_inline void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in) { - static_assert( std::is_same::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same + static_assert( std::is_same::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same typedef typename vobjOut::vector_type ovector_type; typedef typename vobjIn::vector_type ivector_type; @@ -251,9 +251,9 @@ void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __rest ovector_type * __restrict__ op = (ovector_type *)&vecOut; ivector_type * __restrict__ ip = (ivector_type *)&vecIn; for(int w=0;w>val; return; } - void GridParseLayout(char **argv,int argc, Coordinate &latt_c, Coordinate &mpi_c) diff --git a/Grid/util/Init.h b/Grid/util/Init.h index 585660a1..bdf0bcac 100644 --- a/Grid/util/Init.h +++ b/Grid/util/Init.h @@ -57,7 +57,7 @@ void GridCmdOptionCSL(std::string str,std::vector & vec); template void GridCmdOptionIntVector(const std::string &str,VectorInt & vec); void GridCmdOptionInt(std::string &str,int & val); -void GridCmdOptionFloat(std::string &str,float & val); +void GridCmdOptionFloat(std::string &str,double & val); void GridParseLayout(char **argv,int argc, diff --git a/HMC/Mobius2p1f_DD_EOFA_96I_mshift.cc b/HMC/Mobius2p1f_DD_EOFA_96I_mshift.cc index 0a924486..5572d11f 100644 --- a/HMC/Mobius2p1f_DD_EOFA_96I_mshift.cc +++ b/HMC/Mobius2p1f_DD_EOFA_96I_mshift.cc @@ -164,11 +164,6 @@ int main(int argc, char **argv) { typedef MobiusEOFAFermionF FermionEOFAActionF; typedef typename FermionActionF::FermionField FermionFieldF; - typedef WilsonImplD2 FermionImplPolicyD2; - typedef MobiusFermionD2 FermionActionD2; - typedef MobiusEOFAFermionD2 FermionEOFAActionD2; - typedef typename FermionActionD2::FermionField FermionFieldD2; - typedef Grid::XmlReader Serialiser; //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: @@ -232,31 +227,34 @@ int main(int argc, char **argv) { // std::vector hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated // std::vector hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); - OneFlavourRationalParams OFRp; // Up/down - OFRp.lo = 4.0e-5; + int SP_iters=10000; + + RationalActionParams OFRp; // Up/down + OFRp.lo = 6.0e-5; OFRp.hi = 90.0; - OFRp.MaxIter = 60000; - OFRp.tolerance= 1.0e-5; - OFRp.mdtolerance= 1.0e-3; + OFRp.inv_pow = 2; + OFRp.MaxIter = SP_iters; // get most shifts by 2000, stop sharing space + OFRp.action_tolerance= 1.0e-8; + OFRp.action_degree = 18; + OFRp.md_tolerance= 1.0e-5; + OFRp.md_degree = 14; // OFRp.degree = 20; converges // OFRp.degree = 16; - OFRp.degree = 18; OFRp.precision= 80; OFRp.BoundsCheckFreq=0; std::vector ActionTolByPole({ - 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-7,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8 }); std::vector MDTolByPole({ - 1.0e-5,5.0e-6,1.0e-6,1.0e-7, // soften convergence more more + 1.6e-5,5.0e-6,1.0e-6,3.0e-7, // soften convergence more more // 1.0e-6,3.0e-7,1.0e-7,1.0e-7, // 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence 1.0e-8,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8,1.0e-8,1.0e-8, - 1.0e-8,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8 }); @@ -265,10 +263,8 @@ int main(int argc, char **argv) { typedef SchurDiagMooeeOperator LinearOperatorF; typedef SchurDiagMooeeOperator LinearOperatorD; - typedef SchurDiagMooeeOperator LinearOperatorD2; typedef SchurDiagMooeeOperator LinearOperatorEOFAF; typedef SchurDiagMooeeOperator LinearOperatorEOFAD; - typedef SchurDiagMooeeOperator LinearOperatorEOFAD2; typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG_EOFA; @@ -321,7 +317,6 @@ int main(int argc, char **argv) { // temporarily need a gauge field LatticeGaugeFieldD U(GridPtr); U=Zero(); LatticeGaugeFieldF UF(GridPtrF); UF=Zero(); - LatticeGaugeFieldD2 UD2(GridPtrF); UD2=Zero(); std::cout << GridLogMessage << " Running the HMC "<< std::endl; TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file @@ -340,6 +335,7 @@ int main(int argc, char **argv) { ParamsDirF.dirichlet=Dirichlet; ParamsDir.partialDirichlet=1; ParamsDirF.partialDirichlet=1; + std::cout << GridLogMessage<< "Partial Dirichlet depth is "< Denominators; std::vector NumeratorsF; std::vector DenominatorsF; - std::vector NumeratorsD2; - std::vector DenominatorsD2; std::vector *> Quotients; std::vector ActionMPCG; std::vector MPCG; #define MIXED_PRECISION #ifdef MIXED_PRECISION - std::vector *> Bdys; + std::vector *> Bdys; #else - std::vector *> Bdys; + std::vector *> Bdys; #endif typedef SchurDiagMooeeOperator LinearOperatorF; @@ -532,31 +526,19 @@ int main(int argc, char **argv) { Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG)); } else { #ifdef MIXED_PRECISION - // Use the D2 data types and make them use same grid as single - FermionActionD2::ImplParams ParamsDenD2(boundary); - FermionActionD2::ImplParams ParamsNumD2(boundary); - - ParamsDenD2.dirichlet = ParamsDen.dirichlet; - ParamsDenD2.partialDirichlet = ParamsDen.partialDirichlet; - DenominatorsD2.push_back(new FermionActionD2(UD2,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenD2)); - - ParamsNumD2.dirichlet = ParamsNum.dirichlet; - ParamsNumD2.partialDirichlet = ParamsNum.partialDirichlet; - NumeratorsD2.push_back (new FermionActionD2(UD2,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumD2)); - - Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction( + Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction( *Numerators[h],*Denominators[h], *NumeratorsF[h],*DenominatorsF[h], - *NumeratorsD2[h],*DenominatorsD2[h], - OFRp, 400) ); - Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction( + *Numerators[h],*Denominators[h], + OFRp, SP_iters) ); + Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction( *Numerators[h],*Denominators[h], *NumeratorsF[h],*DenominatorsF[h], - *NumeratorsD2[h],*DenominatorsD2[h], - OFRp, 400) ); + *Numerators[h],*Denominators[h], + OFRp, SP_iters) ); #else - Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); - Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); #endif } } diff --git a/HMC/Mobius2p1f_EOFA_96I_hmc.cc b/HMC/Mobius2p1f_EOFA_96I_hmc.cc index d27d558e..6e7fb3cd 100644 --- a/HMC/Mobius2p1f_EOFA_96I_hmc.cc +++ b/HMC/Mobius2p1f_EOFA_96I_hmc.cc @@ -183,7 +183,7 @@ int main(int argc, char **argv) { // 4/2 => 0.6 dH // 3/3 => 0.8 dH .. depth 3, slower //MD.MDsteps = 4; - MD.MDsteps = 3; + MD.MDsteps = 12; MD.trajL = 0.5; HMCparameters HMCparams; @@ -200,8 +200,8 @@ int main(int argc, char **argv) { TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition CheckpointerParameters CPparams; - CPparams.config_prefix = "ckpoint_DDHMC_lat"; - CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.config_prefix = "ckpoint_HMC_lat"; + CPparams.rng_prefix = "ckpoint_HMC_rng"; CPparams.saveInterval = 1; CPparams.format = "IEEE64BIG"; TheHMC.Resources.LoadNerscCheckpointer(CPparams); @@ -228,7 +228,7 @@ int main(int argc, char **argv) { Real pv_mass = 1.0; // std::vector hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // std::vector hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); - std::vector hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated + std::vector hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated // std::vector hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); auto GridPtr = TheHMC.Resources.GetCartesian(); @@ -299,8 +299,8 @@ int main(int argc, char **argv) { //////////////////////////////////// // Collect actions //////////////////////////////////// - ActionLevel Level1(1); - ActionLevel Level2(3); + // ActionLevel Level1(1); + ActionLevel Level2(1); ActionLevel Level3(15); //////////////////////////////////// @@ -369,7 +369,7 @@ int main(int argc, char **argv) { ActionCGL, ActionCGR, DerivativeCGL, DerivativeCGR, SFRp, true); - // Level2.push_back(&EOFA); + Level2.push_back(&EOFA); //////////////////////////////////// // up down action @@ -477,7 +477,7 @@ int main(int argc, char **argv) { // Gauge action ///////////////////////////////////////////////////////////// Level3.push_back(&GaugeAction); - TheHMC.TheAction.push_back(Level1); + // TheHMC.TheAction.push_back(Level1); TheHMC.TheAction.push_back(Level2); TheHMC.TheAction.push_back(Level3); std::cout << GridLogMessage << " Action complete "<< std::endl; diff --git a/TODO b/TODO index 380af2b7..750deb55 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,12 @@ +- - Slice sum optimisation & A2A - atomic addition +- - Also faster non-atomic reduction +- - Remaining PRs +- - DDHMC + - - MixedPrec is the action eval, high precision + - - MixedPrecCleanup is the force eval, low precision + +================= +================= Lattice_basis.h -- > HIP and SYCL GPU code @@ -8,6 +17,7 @@ DDHMC -- Multishift Mixed Precision - DONE -- Pole dependent residual - DONE + ======= -- comms threads issue?? -- Part done: Staggered kernel performance on GPU diff --git a/benchmarks/Benchmark_prec_change.cc b/benchmarks/Benchmark_prec_change.cc new file mode 100644 index 00000000..ba34f87e --- /dev/null +++ b/benchmarks/Benchmark_prec_change.cc @@ -0,0 +1,189 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./benchmarks/Benchmark_prec_change.cc + + Copyright (C) 2015 + +Author: Christopher Kelly +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int Ls = 12; + Coordinate latt4 = GridDefaultLatt(); + + GridCartesian * UGridD = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGridD = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridD); + GridCartesian * FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD); + GridRedBlackCartesian * FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD); + + GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); + GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); + + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl; + GridParallelRNG RNG4(UGridD); RNG4.SeedFixedIntegers(seeds4); + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; + GridParallelRNG RNG5(FGridD); RNG5.SeedFixedIntegers(seeds5); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + LatticeFermionD field_d(FGridD), tmp_d(FGridD); + random(RNG5,field_d); tmp_d = field_d; + + LatticeFermionD2 field_d2(FGridF), tmp_d2(FGridF); + precisionChange(field_d2, field_d); tmp_d2 = field_d2; + + LatticeFermionF field_f(FGridF), tmp_f(FGridF); + precisionChange(field_f, field_d); tmp_f = field_f; + + int N = 500; + + double time_ds = 0, time_sd = 0; + + std::cout<double original implementation (fields initially device-resident)" << std::endl; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + + precisionChangeWorkspace wk_sp_to_dp(field_d.Grid(),field_f.Grid()); + precisionChangeWorkspace wk_dp_to_sp(field_f.Grid(),field_d.Grid()); + + std::cout<double with pregenerated workspace(fields initially device-resident)" << std::endl; + time_sd = time_ds = 0; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + std::cout<double with workspace generated on-the-fly (fields initially device-resident)" << std::endl; + time_sd = time_ds = 0; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + + std::cout<double2 (fields initially device-resident)" << std::endl; + time_sd = time_ds = 0; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + + std::cout<double2 through standard precisionChange call(fields initially device-resident) [NB: perf should be the same as the previous test!]" << std::endl; + time_sd = time_ds = 0; + for(int i=0;is " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl; + + Grid_finalize(); +} diff --git a/systems/Crusher/config-command b/systems/Crusher/config-command index 3965767f..d310ff55 100644 --- a/systems/Crusher/config-command +++ b/systems/Crusher/config-command @@ -1,12 +1,13 @@ CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-` ../../configure --enable-comms=mpi-auto \ --with-lime=$CLIME \ ---enable-unified=no \ +--enable-unified=yes \ --enable-shm=nvlink \ --enable-tracing=timer \ --enable-accelerator=hip \ --enable-gen-simd-width=64 \ --enable-simd=GPU \ +--disable-accelerator-cshift \ --with-gmp=$OLCF_GMP_ROOT \ --with-fftw=$FFTW_DIR/.. \ --with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ diff --git a/systems/PVC/benchmarks/run-2tile-mpi.sh b/systems/PVC/benchmarks/run-2tile-mpi.sh index cefab776..decbb6cd 100755 --- a/systems/PVC/benchmarks/run-2tile-mpi.sh +++ b/systems/PVC/benchmarks/run-2tile-mpi.sh @@ -23,12 +23,7 @@ export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0 -for i in 0 -do -mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768 -mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768 -done -#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.1x2.log -#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.2x1.log +#mpiexec -launcher ssh -n 1 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0 > 1tile.log +mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0 diff --git a/systems/PVC/config-command b/systems/PVC/config-command index f6d83f6d..c3523c2d 100644 --- a/systems/PVC/config-command +++ b/systems/PVC/config-command @@ -14,4 +14,3 @@ INSTALL=/nfs/site/home/paboylx/prereqs/ LDFLAGS="-fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$INSTALL/lib" \ CXXFLAGS="-fsycl-unnamed-lambda -fsycl -no-fma -I$INSTALL/include -Wno-tautological-compare" - diff --git a/tests/core/Test_prec_change.cc b/tests/core/Test_prec_change.cc new file mode 100644 index 00000000..06b9ae5c --- /dev/null +++ b/tests/core/Test_prec_change.cc @@ -0,0 +1,124 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/core/Test_prec_change.cc + + Copyright (C) 2015 + +Author: Christopher Kelly +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int Ls = 12; + Coordinate latt4 = GridDefaultLatt(); + + GridCartesian * UGridD = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGridD = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridD); + GridCartesian * FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD); + GridRedBlackCartesian * FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD); + + GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); + GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); + + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl; + GridParallelRNG RNG5(FGridD); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG5F(FGridF); RNG5F.SeedFixedIntegers(seeds5); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + LatticeFermionD field_d(FGridD), tmp_d(FGridD); + random(RNG5,field_d); + RealD norm2_field_d = norm2(field_d); + + LatticeFermionD2 field_d2(FGridF), tmp_d2(FGridF); + random(RNG5F,field_d2); + RealD norm2_field_d2 = norm2(field_d2); + + LatticeFermionF field_f(FGridF); + + //Test original implementation + { + std::cout << GridLogMessage << "Testing original implementation" << std::endl; + field_f = Zero(); + precisionChangeOrig(field_f,field_d); + RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl; + tmp_d = Zero(); + precisionChangeOrig(tmp_d, field_f); + Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl; + } + //Test new implementation with pregenerated workspace + { + std::cout << GridLogMessage << "Testing new implementation with pregenerated workspace" << std::endl; + precisionChangeWorkspace wk_sp_to_dp(field_d.Grid(),field_f.Grid()); + precisionChangeWorkspace wk_dp_to_sp(field_f.Grid(),field_d.Grid()); + + field_f = Zero(); + precisionChange(field_f,field_d,wk_dp_to_sp); + RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl; + tmp_d = Zero(); + precisionChange(tmp_d, field_f,wk_sp_to_dp); + Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl; + } + //Test new implementation without pregenerated workspace + { + std::cout << GridLogMessage << "Testing new implementation without pregenerated workspace" << std::endl; + field_f = Zero(); + precisionChange(field_f,field_d); + RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl; + tmp_d = Zero(); + precisionChange(tmp_d, field_f); + Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl; + } + //Test fast implementation + { + std::cout << GridLogMessage << "Testing fast (double2) implementation" << std::endl; + field_f = Zero(); + precisionChangeFast(field_f,field_d2); + RealD Ndiff = (norm2_field_d2 - norm2(field_f))/norm2_field_d2; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl; + tmp_d2 = Zero(); + precisionChangeFast(tmp_d2, field_f); + Ndiff = norm2( LatticeFermionD2(tmp_d2-field_d2) ) / norm2_field_d2; + std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl; + } + std::cout << "Done" << std::endl; + + Grid_finalize(); +} diff --git a/tests/forces/Test_bdy.cc b/tests/forces/Test_bdy.cc new file mode 100644 index 00000000..c2c97d0d --- /dev/null +++ b/tests/forces/Test_bdy.cc @@ -0,0 +1,305 @@ +/* + + 2f Full det MdagM 10^6 force ~ 1.3e7 +rid : Message : 1767.283471 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 1767.283476 s : S1 : 1.52885e+09 +Grid : Message : 1767.283480 s : S2 : 1.52886e+09 +Grid : Message : 1767.283482 s : dS : 8877.34 +Grid : Message : 1767.283483 s : dSpred : 8877.7 +Grid : Message : 1767.283484 s : diff : -0.360484 +Grid : Message : 1767.283485 s : ********************************************************* + + 2f Full det MpcdagMpc 10^6 force ~ 1.8e6 +Grid : Message : 2399.576962 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 2399.576968 s : S1 : 1.52885e+09 +Grid : Message : 2399.576972 s : S2 : 1.52886e+09 +Grid : Message : 2399.576974 s : dS : 9728.49 +Grid : Message : 2399.576975 s : dSpred : 9726.58 +Grid : Message : 2399.576976 s : diff : 1.90683 +Grid : Message : 2399.576977 s : ********************************************************* + + 2f bdy MdagM 1500 force Force ~ 2800 +Grid : Message : 4622.385061 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 4622.385067 s : S1 : 1.52885e+09 +Grid : Message : 4622.385071 s : S2 : 1.52885e+09 +Grid : Message : 4622.385072 s : dS : 25.4944 +Grid : Message : 4622.385073 s : dSpred : 25.4672 +Grid : Message : 4622.385074 s : diff : 0.0271414 +Grid : Message : 4622.385075 s : ********************************************************* + + 2f bdy MpcdagMpc 10^6 force Force ~ 2200 +Grid : Message : 4622.385061 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Grid : Message : 4622.385067 s : S1 : 1.52885e+09 +Grid : Message : 4622.385071 s : S2 : 1.52885e+09 +Grid : Message : 4622.385072 s : dS : 25.4944 +Grid : Message : 4622.385073 s : dSpred : 25.4672 +Grid : Message : 4622.385074 s : diff : 0.0271414 +Grid : Message : 4622.385075 s : ********************************************************* + + 1f Bdy Det + Optimisation log: looser rational AND MD tolerances sloppy +MobiusForce.221179 -- same as HMC. dS is mispredicted Forece ~2.8 +Grid : Message : 6582.258991 s : dS : 0.024478 +Grid : Message : 6582.258992 s : dSpred : 0.00791876 +Grid : Message : 6582.258994 s : diff : 0.0165592 + +MobiusForce.221193 -- tight rational AND MD tolerances to 1e-8 ~ 2.8 same +Grid : Message : 1964.939209 s : S1 : 7.64404e+08 +Grid : Message : 1964.939213 s : S2 : 7.64404e+08 +Grid : Message : 1964.939215 s : dS : -0.00775838 <--- too loose even on action +Grid : Message : 1964.939216 s : dSpred : -0.00416793 +Grid : Message : 1964.939217 s : diff : -0.00359045 + +MobiusForce.221394 -- looser rational, MD tol 1e-8 ~ 2.8 same +Grid : Message : 1198.346720 s : S1 : 764404649.48886 +Grid : Message : 1198.346760 s : S2 : 764404649.5133 +Grid : Message : 1198.346780 s : dS : 0.024440884590149 +Grid : Message : 1198.346800 s : dSpred : 0.0079145154465184 +Grid : Message : 1198.346810 s : diff : 0.016526369143631 + +MobiusForce.221394 -- tight rational, MD tol sloppy Force ~ 2.8 +Grid : Message : 2376.921950 s : S1 : 764404436.44069 +Grid : Message : 2376.921954 s : S2 : 764404436.43299 +Grid : Message : 2376.921956 s : dS : -0.0076971054077148 +Grid : Message : 2376.921958 s : dSpred : -0.0041610472282526 +Grid : Message : 2376.921959 s : diff : -0.0035360581794623 + +*/ + +// +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_double_ratio.cc + + Copyright (C) 2022 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +typedef MobiusFermionD FermionAction; +typedef WilsonImplD FimplD; +typedef WilsonImplD FermionImplPolicy; + +template +void ForceTest(Action &action,LatticeGaugeField & U,MomentumFilterBase &Filter) +{ + GridBase *UGrid = U.Grid(); + + std::vector seeds({1,2,3,5}); + GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds); + + LatticeColourMatrix Pmu(UGrid); + LatticeGaugeField P(UGrid); + LatticeGaugeField UdSdU(UGrid); + + std::cout << GridLogMessage << "*********************************************************"<(UdSdU,mu); + Pmu= PeekIndex(P,mu); + dS = dS - trace(Pmu*UdSdUmu)*eps*2.0*2.0; + } + ComplexD dSpred = sum(dS); + RealD diff = S2-S1-dSpred.real(); + + std::cout<< GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt_size[0]/mpi_layout[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt_size[1]/mpi_layout[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt_size[2]/mpi_layout[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt_size[3]/mpi_layout[3] * shm[3]; + + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + FermionAction::ImplParams ParamsDir(boundary); + Params.dirichlet=NonDirichlet; + ParamsDir.dirichlet=Dirichlet; + ParamsDir.partialDirichlet=1; + + ///////////////////// Gauge Field and Gauge Forces //////////////////////////// + LatticeGaugeField U(UGrid); + + RealD beta=6.0; + WilsonGaugeActionR PlaqAction(beta); + IwasakiGaugeActionR RectAction(beta); + + MomentumFilterNone FilterNone; + ForceTest(PlaqAction,U,FilterNone); + ForceTest(RectAction,U,FilterNone); + + //////////////////////////////////// + // Action + //////////////////////////////////// + RealD mass=0.00078; + RealD pvmass=1.0; + RealD M5=1.8; + RealD b=1.5; + RealD c=0.5; + + // Double versions + FermionAction DdwfPeriodic(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,Params); + FermionAction PVPeriodic (U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,pvmass,M5,b,c,Params); + FermionAction DdwfDirichlet(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,ParamsDir); + + double StoppingCondition = 1.0e-8; + double MaxCGIterations = 50000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + + //////////////////// Two Flavour Determinant Ratio /////////////////////////////// + TwoFlavourRatioPseudoFermionAction Nf2(PVPeriodic, DdwfPeriodic,CG,CG); + // ForceTest(Nf2,U,FilterNone); + + //////////////////// Two Flavour Determinant force test Even Odd /////////////////////////////// + TwoFlavourEvenOddRatioPseudoFermionAction Nf2eo(PVPeriodic, DdwfPeriodic,CG,CG); + // ForceTest(Nf2eo,U,FilterNone); + + //////////////////// Domain forces //////////////////// + int Width=4; + DDHMCFilter DDHMCFilter(Block4,Width); + + //////////////////// Two flavour boundary det //////////////////// + TwoFlavourRatioPseudoFermionAction BdyNf2(DdwfDirichlet, DdwfPeriodic,CG,CG); + // ForceTest(BdyNf2,U,DDHMCFilter); + + //////////////////// Two flavour eo boundary det //////////////////// + TwoFlavourEvenOddRatioPseudoFermionAction BdyNf2eo(DdwfDirichlet, DdwfPeriodic,CG,CG); + // ForceTest(BdyNf2eo,U,DDHMCFilter); + + //////////////////// One flavour boundary det //////////////////// + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 4.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-8; + OFRp.mdtolerance= 1.0e-6; + OFRp.degree = 18; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + std::vector ActionTolByPole({ + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8 + }); + std::vector MDTolByPole({ + 1.0e-6,3.0e-7,1.0e-7,1.0e-7, // Orig sloppy + // 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8 + }); + OneFlavourEvenOddRatioRationalPseudoFermionAction BdySqrt(DdwfDirichlet,DdwfPeriodic,OFRp); + ForceTest(BdySqrt,U,DDHMCFilter); + + Grid_finalize(); +} diff --git a/tests/forces/Test_double_ratio.cc b/tests/forces/Test_double_ratio.cc index 0a350692..dc6713f9 100644 --- a/tests/forces/Test_double_ratio.cc +++ b/tests/forces/Test_double_ratio.cc @@ -476,6 +476,20 @@ int main (int argc, char ** argv) // ForceTest(BdyNf2eo,U,DDHMCFilter); //////////////////// One flavour boundary det //////////////////// + RationalActionParams OFRp; // Up/down + OFRp.lo = 6.0e-5; + OFRp.hi = 90.0; + OFRp.inv_pow = 2; + OFRp.MaxIter = SP_iters; // get most shifts by 2000, stop sharing space + OFRp.action_tolerance= 1.0e-8; + OFRp.action_degree = 18; + OFRp.md_tolerance= 1.0e-5; + OFRp.md_degree = 14; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + /* OneFlavourRationalParams OFRp; // Up/down OFRp.lo = 4.0e-5; OFRp.hi = 90.0; @@ -485,6 +499,23 @@ int main (int argc, char ** argv) OFRp.degree = 18; OFRp.precision= 80; OFRp.BoundsCheckFreq=0; + */ + std::vector ActionTolByPole({ + 1.0e-7,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8 + }); + std::vector MDTolByPole({ + 1.6e-5,5.0e-6,1.0e-6,3.0e-7, // soften convergence more more + // 1.0e-6,3.0e-7,1.0e-7,1.0e-7, + // 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8,1.0e-8,1.0e-8, + 1.0e-8,1.0e-8 + }); + /* std::vector ActionTolByPole({ 1.0e-8,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8,1.0e-8,1.0e-8, @@ -499,9 +530,9 @@ int main (int argc, char ** argv) // 1.0e-8,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8,1.0e-8,1.0e-8, - 1.0e-8,1.0e-8,1.0e-8,1.0e-8, 1.0e-8,1.0e-8 }); + */ OneFlavourEvenOddRatioRationalPseudoFermionAction BdySqrt(DdwfDirichlet,DdwfPeriodic,OFRp); BdySqrt.SetTolerances(ActionTolByPole,MDTolByPole); ForceTest(BdySqrt,U,DDHMCFilter); diff --git a/tests/lanczos/Test_dwf_block_lanczos.README b/tests/lanczos/Test_dwf_block_lanczos.README new file mode 100644 index 00000000..179f9037 --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.README @@ -0,0 +1,73 @@ +#Example script +DIR=/gpfs/alpine/phy157/proj-shared/phy157dwf/chulwoo/Grid/BL/build/tests/lanczos +BIN=${DIR}/Test_dwf_block_lanczos + +VOL='--grid 16.16.16.32 ' +GRID='--mpi 1.1.1.4 ' +CONF='--gconf ckpoint_lat.IEEE64BIG.2000 ' +OPT='--mass 0.01 --M5 1.8 --phase in.params --omega in.params --shm 4096' +#BL='--rbl 16.1024.128.1000.10 --split 1.1.4.4 --check_int 100 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51' +BL='--rbl 4.128.16.100.10 --split 1.1.1.4 --check_int 25 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51' + +ARGS=${CONF}" "${OPT}" "${BL}" "${VOL}" "${GRID} +export APP="${BIN} ${ARGS}" +echo APP=${APP} +#export JS="jsrun --nrs 32 -a4 -g4 -c42 -dpacked -b packed:7 --smpiargs="-gpu" " +export JS="jsrun --nrs 1 -a4 -g4 -c42 -dpacked -b packed:10 --smpiargs="-gpu" " +$JS $APP + +#sample in.param + +boundary_phase 0 1 0 +boundary_phase 1 1 0 +boundary_phase 2 1 0 +boundary_phase 3 -1 0 + +omega 0 0.5 0 +omega 1 0.5 0 +omega 2 0.5 0 +omega 3 0.5 0 +omega 4 0.5 0 +omega 5 0.5 0 +omega 6 0.5 0 +omega 7 0.5 0 +omega 8 0.5 0 +omega 9 0.5 0 +omega 10 0.5 0 +omega 11 0.5 0 + + +#output + +Grid : Message : 1.717474 s : Gauge Configuration ckpoint_lat.IEEE64BIG.2000 +Grid : Message : 1.717478 s : boundary_phase[0] = (1,0) +Grid : Message : 1.717497 s : boundary_phase[1] = (1,0) +Grid : Message : 1.717500 s : boundary_phase[2] = (1,0) +Grid : Message : 1.717503 s : boundary_phase[3] = (-1,0) +Grid : Message : 1.717506 s : Ls 12 +Grid : Message : 1.717507 s : mass 0.01 +Grid : Message : 1.717510 s : M5 1.8 +Grid : Message : 1.717512 s : mob_b 1.5 +Grid : Message : 1.717514 s : omega[0] = (0.5,0) +Grid : Message : 1.717517 s : omega[1] = (0.5,0) +Grid : Message : 1.717520 s : omega[2] = (0.5,0) +Grid : Message : 1.717523 s : omega[3] = (0.5,0) +Grid : Message : 1.717526 s : omega[4] = (0.5,0) +Grid : Message : 1.717529 s : omega[5] = (0.5,0) +Grid : Message : 1.717532 s : omega[6] = (0.5,0) +Grid : Message : 1.717535 s : omega[7] = (0.5,0) +Grid : Message : 1.717538 s : omega[8] = (0.5,0) +Grid : Message : 1.717541 s : omega[9] = (0.5,0) +Grid : Message : 1.717544 s : omega[10] = (0.5,0) +Grid : Message : 1.717547 s : omega[11] = (0.5,0) +Grid : Message : 1.717550 s : Nu 4 +Grid : Message : 1.717551 s : Nk 128 +Grid : Message : 1.717552 s : Np 16 +Grid : Message : 1.717553 s : Nm 288 +Grid : Message : 1.717554 s : Nstop 100 +Grid : Message : 1.717555 s : Ntest 25 +Grid : Message : 1.717557 s : MaxIter 10 +Grid : Message : 1.717558 s : resid 1e-05 +Grid : Message : 1.717560 s : Cheby Poly 0.007,7,51 + + diff --git a/tests/lanczos/Test_dwf_block_lanczos.cc b/tests/lanczos/Test_dwf_block_lanczos.cc new file mode 100644 index 00000000..671f2fa6 --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.cc @@ -0,0 +1,410 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_block_lanczos.cc + + Copyright (C) 2022 + +Author: Peter Boyle +Author: Yong-Chull Jang +Author: Chulwoo Jung + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +using namespace std; +using namespace Grid; +//using namespace Grid::QCD; + +//typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename ZMobiusFermionF::FermionField FermionField; + +RealD AllZero(RealD x){ return 0.;} + +class CmdJobParams +{ + public: + std::string gaugefile; + + int Ls; + double mass; + double M5; + double mob_b; + std::vector omega; + std::vector boundary_phase; + std::vector mpi_split; + + LanczosType Impl; + int Nu; + int Nk; + int Np; + int Nm; + int Nstop; + int Ntest; + int MaxIter; + double resid; + + double low; + double high; + int order; + + CmdJobParams() + : gaugefile("Hot"), + Ls(8), mass(0.01), M5(1.8), mob_b(1.5), + Impl(LanczosType::irbl),mpi_split(4,1), + Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8), + low(0.2), high(5.5), order(11) + {Nm=Nk+Np;}; + + void Parse(char **argv, int argc); +}; + + +void CmdJobParams::Parse(char **argv,int argc) +{ + std::string arg; + std::vector vi; + double re,im; + int expect, idx; + std::string vstr; + std::ifstream pfile; + + if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){ + gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf"); + } + + if( GridCmdOptionExists(argv,argv+argc,"--phase") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--phase"); + pfile.open(arg); + assert(pfile); + expect = 0; + while( pfile >> vstr ) { + if ( vstr.compare("boundary_phase") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(expect==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + boundary_phase.push_back({re,im}); + expect++; + } + } + pfile.close(); + } else { + for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.}); + } + + if( GridCmdOptionExists(argv,argv+argc,"--omega") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--omega"); + pfile.open(arg); + assert(pfile); + Ls = 0; + while( pfile >> vstr ) { + if ( vstr.compare("omega") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(Ls==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + omega.push_back({re,im}); + Ls++; + } + } + pfile.close(); + } else { + if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--Ls"); + GridCmdOptionInt(arg,Ls); + } + } + + if( GridCmdOptionExists(argv,argv+argc,"--mass") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mass"); + GridCmdOptionFloat(arg,mass); + } + + if( GridCmdOptionExists(argv,argv+argc,"--M5") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--M5"); + GridCmdOptionFloat(arg,M5); + } + + if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b"); + GridCmdOptionFloat(arg,mob_b); + } + + if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--irbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::irbl; + Nm = Nk+Np; + } + + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--rbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; // vector space is enlarged by adding Np vectors + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::rbl; + Nm = Nk+Np*MaxIter; + } + +#if 1 + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--split") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--split"); + GridCmdOptionIntVector(arg,vi); + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + // ypj [note] why seed RNG5 again? bug? In this case, run with a default seed(). + GridParallelRNG RNG5rb(FrbGridF); RNG5rb.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + std::vector U(4,UGrid); + LatticeGaugeFieldF UmuF(UGridF); + std::vector UF(4,UGridF); + + if ( JP.gaugefile.compare("Hot") == 0 ) { + SU3::HotConfiguration(RNG4, Umu); + } else { + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,JP.gaugefile); + // ypj [fixme] additional checks for the loaded configuration? + } + precisionChange (UmuF,Umu); + + for(int mu=0;mu(Umu,mu); + } + + RealD mass = JP.mass; + RealD M5 = JP.M5; + +// ypj [fixme] flexible support for a various Fermions +// RealD mob_b = JP.mob_b; // Gparity +// std::vector omega; // ZMobius + +// GparityMobiusFermionD ::ImplParams params; +// std::vector twists({1,1,1,0}); +// params.twists = twists; +// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// SchurDiagTwoOperator HermOp(Ddwf); + + +// int mrhs = JP.Nu; + int Ndir=4; + auto mpi_layout = GridDefaultMpi(); + std::vector mpi_split (Ndir,1); +#if 0 + int tmp=mrhs, dir=0; + std::cout << GridLogMessage << "dir= "<_processor,re); + src_tmp=re; + pickCheckerboard(Odd,src[i],src_tmp); + } + RNG5.Report(); +} else { + std::cout << GridLogMessage << "Using RNG5rb"< evec(JP.Nm,FrbGridF); + for(int i=0;i<1;++i){ + std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl; + }; + + int Nconv; + IRBL.calc(eval,evec,src,Nconv,JP.Impl); + + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_dwf_block_lanczos.cc.double b/tests/lanczos/Test_dwf_block_lanczos.cc.double new file mode 100644 index 00000000..c71b80ec --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.cc.double @@ -0,0 +1,401 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_block_lanczos.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +using namespace std; +using namespace Grid; +//using namespace Grid::QCD; + +//typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename ZMobiusFermionR::FermionField FermionField; + +RealD AllZero(RealD x){ return 0.;} + +class CmdJobParams +{ + public: + std::string gaugefile; + + int Ls; + double mass; + double M5; + double mob_b; + std::vector omega; + std::vector boundary_phase; + std::vector mpi_split; + + LanczosType Impl; + int Nu; + int Nk; + int Np; + int Nm; + int Nstop; + int Ntest; + int MaxIter; + double resid; + + double low; + double high; + int order; + + CmdJobParams() + : gaugefile("Hot"), + Ls(8), mass(0.01), M5(1.8), mob_b(1.5), + Impl(LanczosType::irbl),mpi_split(4,1), + Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8), + low(0.2), high(5.5), order(11) + {Nm=Nk+Np;}; + + void Parse(char **argv, int argc); +}; + + +void CmdJobParams::Parse(char **argv,int argc) +{ + std::string arg; + std::vector vi; + double re,im; + int expect, idx; + std::string vstr; + std::ifstream pfile; + + if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){ + gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf"); + } + + if( GridCmdOptionExists(argv,argv+argc,"--phase") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--phase"); + pfile.open(arg); + assert(pfile); + expect = 0; + while( pfile >> vstr ) { + if ( vstr.compare("boundary_phase") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(expect==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + boundary_phase.push_back({re,im}); + expect++; + } + } + pfile.close(); + } else { + for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.}); + } + + if( GridCmdOptionExists(argv,argv+argc,"--omega") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--omega"); + pfile.open(arg); + assert(pfile); + Ls = 0; + while( pfile >> vstr ) { + if ( vstr.compare("omega") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(Ls==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + omega.push_back({re,im}); + Ls++; + } + } + pfile.close(); + } else { + if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--Ls"); + GridCmdOptionInt(arg,Ls); + } + } + + if( GridCmdOptionExists(argv,argv+argc,"--mass") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mass"); + GridCmdOptionFloat(arg,mass); + } + + if( GridCmdOptionExists(argv,argv+argc,"--M5") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--M5"); + GridCmdOptionFloat(arg,M5); + } + + if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b"); + GridCmdOptionFloat(arg,mob_b); + } + + if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--irbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::irbl; + Nm = Nk+Np; + } + + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--rbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; // vector space is enlarged by adding Np vectors + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::rbl; + Nm = Nk+Np*MaxIter; + } + +#if 1 + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--split") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--split"); + GridCmdOptionIntVector(arg,vi); + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + // ypj [note] why seed RNG5 again? bug? In this case, run with a default seed(). + GridParallelRNG RNG5rb(FrbGrid); RNG5rb.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + std::vector U(4,UGrid); + + if ( JP.gaugefile.compare("Hot") == 0 ) { + SU3::HotConfiguration(RNG4, Umu); + } else { + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,JP.gaugefile); + // ypj [fixme] additional checks for the loaded configuration? + } + + for(int mu=0;mu(Umu,mu); + } + + RealD mass = JP.mass; + RealD M5 = JP.M5; + +// ypj [fixme] flexible support for a various Fermions +// RealD mob_b = JP.mob_b; // Gparity +// std::vector omega; // ZMobius + +// GparityMobiusFermionD ::ImplParams params; +// std::vector twists({1,1,1,0}); +// params.twists = twists; +// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// SchurDiagTwoOperator HermOp(Ddwf); + + +// int mrhs = JP.Nu; + int Ndir=4; + auto mpi_layout = GridDefaultMpi(); + std::vector mpi_split (Ndir,1); +#if 0 + int tmp=mrhs, dir=0; + std::cout << GridLogMessage << "dir= "<_processor,re); + src_tmp=re; + pickCheckerboard(Odd,src[i],src_tmp); + } + RNG5.Report(); +} else { + std::cout << GridLogMessage << "Using RNG5rb"< evec(JP.Nm,FrbGrid); + for(int i=0;i<1;++i){ + std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl; + }; + + int Nconv; + IRBL.calc(eval,evec,src,Nconv,JP.Impl); + + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_dwf_block_lanczos.cc.single b/tests/lanczos/Test_dwf_block_lanczos.cc.single new file mode 100644 index 00000000..7449e32a --- /dev/null +++ b/tests/lanczos/Test_dwf_block_lanczos.cc.single @@ -0,0 +1,408 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_block_lanczos.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include + +using namespace std; +using namespace Grid; +//using namespace Grid::QCD; + +//typedef typename GparityDomainWallFermionR::FermionField FermionField; +typedef typename ZMobiusFermionF::FermionField FermionField; + +RealD AllZero(RealD x){ return 0.;} + +class CmdJobParams +{ + public: + std::string gaugefile; + + int Ls; + double mass; + double M5; + double mob_b; + std::vector omega; + std::vector boundary_phase; + std::vector mpi_split; + + LanczosType Impl; + int Nu; + int Nk; + int Np; + int Nm; + int Nstop; + int Ntest; + int MaxIter; + double resid; + + double low; + double high; + int order; + + CmdJobParams() + : gaugefile("Hot"), + Ls(8), mass(0.01), M5(1.8), mob_b(1.5), + Impl(LanczosType::irbl),mpi_split(4,1), + Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8), + low(0.2), high(5.5), order(11) + {Nm=Nk+Np;}; + + void Parse(char **argv, int argc); +}; + + +void CmdJobParams::Parse(char **argv,int argc) +{ + std::string arg; + std::vector vi; + double re,im; + int expect, idx; + std::string vstr; + std::ifstream pfile; + + if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){ + gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf"); + } + + if( GridCmdOptionExists(argv,argv+argc,"--phase") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--phase"); + pfile.open(arg); + assert(pfile); + expect = 0; + while( pfile >> vstr ) { + if ( vstr.compare("boundary_phase") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(expect==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + boundary_phase.push_back({re,im}); + expect++; + } + } + pfile.close(); + } else { + for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.}); + } + + if( GridCmdOptionExists(argv,argv+argc,"--omega") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--omega"); + pfile.open(arg); + assert(pfile); + Ls = 0; + while( pfile >> vstr ) { + if ( vstr.compare("omega") == 0 ) { + pfile >> vstr; + GridCmdOptionInt(vstr,idx); + assert(Ls==idx); + pfile >> vstr; + GridCmdOptionFloat(vstr,re); + pfile >> vstr; + GridCmdOptionFloat(vstr,im); + omega.push_back({re,im}); + Ls++; + } + } + pfile.close(); + } else { + if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--Ls"); + GridCmdOptionInt(arg,Ls); + } + } + + if( GridCmdOptionExists(argv,argv+argc,"--mass") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mass"); + GridCmdOptionFloat(arg,mass); + } + + if( GridCmdOptionExists(argv,argv+argc,"--M5") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--M5"); + GridCmdOptionFloat(arg,M5); + } + + if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b"); + GridCmdOptionFloat(arg,mob_b); + } + + if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--irbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::irbl; + Nm = Nk+Np; + } + + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--rbl"); + GridCmdOptionIntVector(arg,vi); + Nu = vi[0]; + Nk = vi[1]; + Np = vi[2]; // vector space is enlarged by adding Np vectors + Nstop = vi[3]; + MaxIter = vi[4]; + // ypj[fixme] mode overriding message is needed. + Impl = LanczosType::rbl; + Nm = Nk+Np*MaxIter; + } + +#if 1 + // block Lanczos with explicit extension of its dimensions + if( GridCmdOptionExists(argv,argv+argc,"--split") ){ + arg = GridCmdOptionPayload(argv,argv+argc,"--split"); + GridCmdOptionIntVector(arg,vi); + for(int i=0;i seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + // ypj [note] why seed RNG5 again? bug? In this case, run with a default seed(). + GridParallelRNG RNG5rb(FrbGridF); RNG5rb.SeedFixedIntegers(seeds5); + + LatticeGaugeField Umu(UGrid); + std::vector U(4,UGrid); + LatticeGaugeFieldF UmuF(UGridF); + std::vector UF(4,UGridF); + + if ( JP.gaugefile.compare("Hot") == 0 ) { + SU3::HotConfiguration(RNG4, Umu); + } else { + FieldMetaData header; + NerscIO::readConfiguration(Umu,header,JP.gaugefile); + // ypj [fixme] additional checks for the loaded configuration? + } + precisionChange (UmuF,Umu); + + for(int mu=0;mu(Umu,mu); + } + + RealD mass = JP.mass; + RealD M5 = JP.M5; + +// ypj [fixme] flexible support for a various Fermions +// RealD mob_b = JP.mob_b; // Gparity +// std::vector omega; // ZMobius + +// GparityMobiusFermionD ::ImplParams params; +// std::vector twists({1,1,1,0}); +// params.twists = twists; +// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); +// SchurDiagTwoOperator HermOp(Ddwf); + + +// int mrhs = JP.Nu; + int Ndir=4; + auto mpi_layout = GridDefaultMpi(); + std::vector mpi_split (Ndir,1); +#if 0 + int tmp=mrhs, dir=0; + std::cout << GridLogMessage << "dir= "<_processor,re); + src_tmp=re; + pickCheckerboard(Odd,src[i],src_tmp); + } + RNG5.Report(); +} else { + std::cout << GridLogMessage << "Using RNG5rb"< evec(JP.Nm,FrbGridF); + for(int i=0;i<1;++i){ + std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl; + }; + + int Nconv; + IRBL.calc(eval,evec,src,Nconv,JP.Impl); + + + Grid_finalize(); +} diff --git a/tests/lanczos/Test_dwf_lanczos.cc b/tests/lanczos/Test_dwf_lanczos.cc index 1723e756..d10c62d3 100644 --- a/tests/lanczos/Test_dwf_lanczos.cc +++ b/tests/lanczos/Test_dwf_lanczos.cc @@ -35,26 +35,45 @@ template struct Setup{}; template<> -struct Setup{ - static GparityMobiusFermionD* getAction(LatticeGaugeField &Umu, +struct Setup{ + static GparityMobiusFermionF* getAction(LatticeGaugeFieldF &Umu, GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ - RealD mass=0.01; + RealD mass=0.00054; RealD M5=1.8; RealD mob_b=1.5; GparityMobiusFermionD ::ImplParams params; std::vector twists({1,1,1,0}); params.twists = twists; - return new GparityMobiusFermionD(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); + return new GparityMobiusFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); } }; template<> +struct Setup{ + static DomainWallFermionF* getAction(LatticeGaugeFieldF &Umu, struct Setup{ static DomainWallFermionD* getAction(LatticeGaugeField &Umu, GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ - RealD mass=0.01; + RealD mass=0.00054; RealD M5=1.8; - return new DomainWallFermionD(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + return new DomainWallFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + } +}; + +template<> +struct Setup{ + static MobiusFermionF* getAction(LatticeGaugeFieldF &Umu, + GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ + RealD mass=0.00054; + RealD M5=1.8; + RealD mob_b=1.5; + std::vector boundary = {1,1,1,-1}; + MobiusFermionF::ImplParams Params(boundary); + + std::cout << GridLogMessage << "mass "<{ template void run(){ typedef typename Action::FermionField FermionField; - const int Ls=8; + const int Ls=12; GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid); +// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid); + + GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian* FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls, UGridF); + GridRedBlackCartesian* FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGridF); + std::vector seeds4({1,2,3,4}); std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGridF); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5rb(FrbGridF); RNG5.SeedFixedIntegers(seeds5); LatticeGaugeField Umu(UGrid); - SU::HotConfiguration(RNG4, Umu); +// SU::HotConfiguration(RNG4, Umu); + FieldMetaData header; + std::string file("./config"); - Action *action = Setup::getAction(Umu,FGrid,FrbGrid,UGrid,UrbGrid); +// int precision32 = 0; +// int tworow = 0; +// NerscIO::writeConfiguration(Umu,file,tworow,precision32); + NerscIO::readConfiguration(Umu,header,file); + + LatticeGaugeFieldF UmuF(UGridF); + precisionChange(UmuF, Umu); + + Action *action = Setup::getAction(UmuF,FGridF,FrbGridF,UGridF,UrbGridF); //MdagMLinearOperator HermOp(Ddwf); - SchurDiagTwoOperator HermOp(*action); +// SchurDiagTwoOperator HermOp(*action); + SchurDiagOneOperator HermOp(*action); - const int Nstop = 30; - const int Nk = 40; + const int Nstop = 150; + const int Nk = 160; const int Np = 40; const int Nm = Nk+Np; const int MaxIt= 10000; - RealD resid = 1.0e-8; + RealD resid = 1.0e-6; + std::cout << GridLogMessage << "Nstop "< Coeffs { 0.,-1.}; Polynomial PolyX(Coeffs); - Chebyshev Cheby(0.2,5.,11); + Chebyshev Cheby(0.0000006,5.5,4001); + std::cout << GridLogMessage << "Cheby(0.0000006,5.5,4001) "< OpCheby(Cheby,HermOp); - PlainHermOp Op (HermOp); + PlainHermOp Op (HermOp); ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); std::vector eval(Nm); - FermionField src(FrbGrid); + FermionField src(FrbGridF); gaussian(RNG5rb,src); - std::vector evec(Nm,FrbGrid); + std::vector evec(Nm,FrbGridF); for(int i=0;i<1;i++){ std::cout << GridLogMessage <(); + run(); }else if(action == "DWF"){ - run(); + run(); + }else if(action == "Mobius"){ + run(); }else{ std::cout << "Unknown action" << std::endl; exit(1); diff --git a/tests/solver/Test_dwf_mixedcg_prec.cc b/tests/solver/Test_dwf_mixedcg_prec.cc new file mode 100644 index 00000000..dc88018e --- /dev/null +++ b/tests/solver/Test_dwf_mixedcg_prec.cc @@ -0,0 +1,122 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_cg_prec.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +//using namespace std; +using namespace Grid; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=12; + + std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f); + GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f); + GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermionD src(FGrid); random(RNG5,src); + LatticeFermionD result(FGrid); result=Zero(); + LatticeGaugeFieldD Umu(UGrid); + LatticeGaugeFieldF Umu_f(UGrid_f); + + SU::HotConfiguration(RNG4,Umu); + + precisionChange(Umu_f,Umu); + + RealD mass=0.1; + RealD M5=1.8; + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionF Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5); + + LatticeFermionD src_o(FrbGrid); + LatticeFermionD result_o(FrbGrid); + LatticeFermionD result_o_2(FrbGrid); + pickCheckerboard(Odd,src_o,src); + result_o.Checkerboard() = Odd; + result_o = Zero(); + result_o_2.Checkerboard() = Odd; + result_o_2 = Zero(); + + SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); + + std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl; + MixedPrecisionConjugateGradient mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); + double t1,t2,flops; + double MdagMsiteflops = 1452; // Mobius (real coeffs) + // CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of) + double CGsiteflops = (8+4+8+4+4)*Nc*Ns ; + std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops< CG(1.0e-8,10000); + result_o_2 = Zero(); + t1=usecond(); + CG(HermOpEO,src_o,result_o_2); + t2=usecond(); + iters = CG.IterationsToComplete; + flops = MdagMsiteflops*4*FrbGrid->gSites()*iters; + flops+= CGsiteflops*FrbGrid->gSites()*iters; + + std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.< +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + double relup_delta = 0.2; + for(int i=1;i> relup_delta; + std::cout << GridLogMessage << "Set reliable update Delta to " << relup_delta << std::endl; + } + } + + const int Ls=12; + + { + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f); + GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f); + GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermionD src(FGrid); random(RNG5,src); + LatticeFermionD result(FGrid); result=Zero(); + LatticeGaugeFieldD Umu(UGrid); + LatticeGaugeFieldF Umu_f(UGrid_f); + + SU::HotConfiguration(RNG4,Umu); + + precisionChange(Umu_f,Umu); + + RealD mass=0.1; + RealD M5=1.8; + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionF Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5); + + LatticeFermionD src_o(FrbGrid); + LatticeFermionD result_o(FrbGrid); + LatticeFermionD result_o_2(FrbGrid); + pickCheckerboard(Odd,src_o,src); + result_o.Checkerboard() = Odd; + result_o = Zero(); + result_o_2.Checkerboard() = Odd; + result_o_2 = Zero(); + + SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); + + std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl; + ConjugateGradientReliableUpdate mCG(1e-8, 10000, relup_delta, FrbGrid_f, HermOpEO_f, HermOpEO); + double t1,t2,flops; + double MdagMsiteflops = 1452; // Mobius (real coeffs) + // CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of) + double CGsiteflops = (8+4+8+4+4)*Nc*Ns ; + std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops< CG(1.0e-8,10000); + for(int i=0;i<1;i++){ + result_o_2 = Zero(); + t1=usecond(); + CG(HermOpEO,src_o,result_o_2); + t2=usecond(); + iters = CG.IterationsToComplete; + flops = MdagMsiteflops*4*FrbGrid->gSites()*iters; + flops+= CGsiteflops*FrbGrid->gSites()*iters; + + std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<