diff --git a/Grid/algorithms/blas/BatchedBlas.h b/Grid/algorithms/blas/BatchedBlas.h index 3f4856f0..e376bb18 100644 --- a/Grid/algorithms/blas/BatchedBlas.h +++ b/Grid/algorithms/blas/BatchedBlas.h @@ -270,7 +270,6 @@ public: assert(err==CUBLAS_STATUS_SUCCESS); #endif #ifdef GRID_SYCL - // std::cerr << " Calling SYCL batched ZGEMM "<()); - synchronise(); + synchronise(); #endif #if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP) // Need a default/reference implementation; use Eigen diff --git a/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h b/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h index 8168aa9d..66d2812d 100644 --- a/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h +++ b/Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h @@ -279,11 +279,11 @@ public: Qt = Eigen::MatrixXcd::Identity(Nm,Nm); diagonalize(eval2,lmd2,lme2,Nu,Nm,Nm,Qt,grid); _sort.push(eval2,Nm); - // Glog << "#Ritz value before shift: "<< std::endl; + Glog << "#Ritz value before shift: "<< std::endl; for(int i=0; i Btmp(Nstop,grid); // waste of space replicating @@ -642,7 +644,7 @@ private: // for (int u=0; u0) { for (int u=0; uo + // + // Left precon by Moo^-1 + // b) (Doo^{dag} M_oo^-dag) (Moo^-1 Doo) psi_o = [ (D_oo)^dag M_oo^-dag ] Moo^-1 L^{-1} eta_o + // eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e) + /////////////////////////////////////////////////////////////////////////////////////////////////////// + template class SchurRedBlackDiagOneSolve : public SchurRedBlackBase { + public: + typedef CheckerBoardedSparseMatrixBase Matrix; + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations Schur trick + ///////////////////////////////////////////////////// + SchurRedBlackDiagOneSolve(OperatorFunction &HermitianRBSolver, const bool initSubGuess = false, + const bool _solnAsInitGuess = false) + : SchurRedBlackBase(HermitianRBSolver,initSubGuess,_solnAsInitGuess) {}; + + virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) + { + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + SchurDiagOneOperator _HermOpEO(_Matrix); + + Field tmp(grid); + Field Mtmp(grid); + + pickCheckerboard(Even,src_e,src); + pickCheckerboard(Odd ,src_o,src); + + ///////////////////////////////////////////////////// + // src_o = Mpcdag *MooeeInv * (source_o - Moe MeeInv source_e) + ///////////////////////////////////////////////////// + _Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd); + Mtmp=src_o-Mtmp; + _Matrix.MooeeInv(Mtmp,tmp); assert( tmp.Checkerboard() ==Odd); + + // get the right MpcDag + _HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd); + } + + virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) + { + GridBase *grid = _Matrix.RedBlackGrid(); + GridBase *fgrid= _Matrix.Grid(); + + Field tmp(grid); + Field sol_e(grid); + + + /////////////////////////////////////////////////// + // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... + /////////////////////////////////////////////////// + _Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even); + tmp = src_e-tmp; assert( src_e.Checkerboard() ==Even); + _Matrix.MooeeInv(tmp,sol_e); assert( sol_e.Checkerboard() ==Even); + + setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even); + setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd ); + }; + + virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) + { + SchurDiagOneOperator _HermOpEO(_Matrix); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); + }; + virtual void RedBlackSolve (Matrix & _Matrix,const std::vector &src_o, std::vector &sol_o) + { + SchurDiagOneOperator _HermOpEO(_Matrix); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); + } + }; + /////////////////////////////////////////////////////////////////////////////////////////////////////// // Site diagonal is identity, right preconditioned by Mee^inv // ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index 8a27f527..293ce2fb 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -54,6 +54,9 @@ public: size_type bytes = __n*sizeof(_Tp); profilerAllocate(bytes); _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes); + if ( (_Tp*)ptr == (_Tp *) NULL ) { + printf("Grid CPU Allocator got NULL for %lu bytes\n",(unsigned long) bytes ); + } assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); return ptr; } @@ -100,6 +103,9 @@ public: size_type bytes = __n*sizeof(_Tp); profilerAllocate(bytes); _Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes); + if ( (_Tp*)ptr == (_Tp *) NULL ) { + printf("Grid Shared Allocator got NULL for %lu bytes\n",(unsigned long) bytes ); + } assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); return ptr; } @@ -145,6 +151,9 @@ public: size_type bytes = __n*sizeof(_Tp); profilerAllocate(bytes); _Tp *ptr = (_Tp*) MemoryManager::AcceleratorAllocate(bytes); + if ( (_Tp*)ptr == (_Tp *) NULL ) { + printf("Grid Device Allocator got NULL for %lu bytes\n",(unsigned long) bytes ); + } assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); return ptr; } diff --git a/Grid/allocator/MemoryManager.h b/Grid/allocator/MemoryManager.h index af3b2c8b..e2d7b49e 100644 --- a/Grid/allocator/MemoryManager.h +++ b/Grid/allocator/MemoryManager.h @@ -212,6 +212,7 @@ private: #endif public: + static void DisplayMallinfo(void); static void NotifyDeletion(void * CpuPtr); static void Print(void); static void PrintAll(void); diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index a0abddd4..af62cefe 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -356,7 +356,8 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt nrm = real(TensorRemove(sum(inner_tmp_v,sites))); #else typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; - Vector inner_tmp(sites); + deviceVector inner_tmp; + inner_tmp.resize(sites); auto inner_tmp_v = &inner_tmp[0]; accelerator_for( ss, sites, nsimd,{ diff --git a/Grid/qcd/hmc/GenericHMCrunner.h b/Grid/qcd/hmc/GenericHMCrunner.h index 1429d848..b53755aa 100644 --- a/Grid/qcd/hmc/GenericHMCrunner.h +++ b/Grid/qcd/hmc/GenericHMCrunner.h @@ -90,6 +90,7 @@ public: exit(1); } Parameters.StartingType = arg; + std::cout < ivec(0); GridCmdOptionIntVector(arg, ivec); Parameters.StartTrajectory = ivec[0]; + std::cout < ivec(0); GridCmdOptionIntVector(arg, ivec); Parameters.Trajectories = ivec[0]; + std::cout << GridLogMessage<<" GenericHMCrunner Command Line --Trajectories "< ivec(0); GridCmdOptionIntVector(arg, ivec); Parameters.NoMetropolisUntil = ivec[0]; + std::cout << GridLogMessage<<" GenericHMCrunner --Thermalizations "<deriv_timer_start(); as[level].actions.at(a)->deriv(Smearer, force); // deriv should NOT include Ta as[level].actions.at(a)->deriv_timer_stop(); + MemoryManager::Print(); auto name = as[level].actions.at(a)->action_name(); @@ -246,7 +248,11 @@ public: } }; - virtual ~Integrator() {} + virtual ~Integrator() + { + // Pain in the ass to clean up the Level pointers + // Guido's design is at fault as per comment above in constructor + } virtual std::string integrator_name() = 0; @@ -460,6 +466,7 @@ public: for (int level = 0; level < as.size(); ++level) { for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { + MemoryManager::Print(); // get gauge field from the SmearingPolicy and // based on the boolean is_smeared in actionID std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl; @@ -468,6 +475,7 @@ public: as[level].actions.at(actionID)->S_timer_stop(); std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl; H += Hterm; + MemoryManager::Print(); } as[level].apply(S_hireps, Representations, level, H); diff --git a/HMC/Mobius2p1f.cc b/HMC/Mobius2p1f.cc index 4ab1f20f..8042d6e6 100644 --- a/HMC/Mobius2p1f.cc +++ b/HMC/Mobius2p1f.cc @@ -58,7 +58,7 @@ int main(int argc, char **argv) { HMCparameters HMCparams; HMCparams.StartTrajectory = 0; HMCparams.Trajectories = 200; - HMCparams.NoMetropolisUntil= 20; + HMCparams.NoMetropolisUntil= 0; // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; HMCparams.StartingType =std::string("ColdStart"); HMCparams.MD = MD; @@ -70,7 +70,7 @@ int main(int argc, char **argv) { CheckpointerParameters CPparams; CPparams.config_prefix = "ckpoint_EODWF_lat"; CPparams.rng_prefix = "ckpoint_EODWF_rng"; - CPparams.saveInterval = 10; + CPparams.saveInterval = 1; CPparams.format = "IEEE64BIG"; TheHMC.Resources.LoadNerscCheckpointer(CPparams); @@ -186,6 +186,8 @@ int main(int argc, char **argv) { ///////////////////////////////////////////////////////////// // HMC parameters are serialisable + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); std::cout << GridLogMessage << " Running the HMC "<< std::endl; TheHMC.Run(); // no smearing diff --git a/systems/Aurora/sourceme.sh b/systems/Aurora/sourceme.sh index 98f618ab..7abe667f 100644 --- a/systems/Aurora/sourceme.sh +++ b/systems/Aurora/sourceme.sh @@ -24,13 +24,12 @@ export INTELGT_AUTO_ATTACH_DISABLE=1 # -ftarget-register-alloc-mode=pvc:small # -ftarget-register-alloc-mode=pvc:large # -ftarget-register-alloc-mode=pvc:auto -# +#export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1 export HTTP_PROXY=http://proxy.alcf.anl.gov:3128 export HTTPS_PROXY=http://proxy.alcf.anl.gov:3128 export http_proxy=http://proxy.alcf.anl.gov:3128 export https_proxy=http://proxy.alcf.anl.gov:3128 -#export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1 git config --global http.proxy http://proxy.alcf.anl.gov:3128 #source ~/spack/share/spack/setup-env.sh diff --git a/systems/Frontier/sourceme.sh b/systems/Frontier/sourceme.sh index 652e5b45..a6f49d8c 100644 --- a/systems/Frontier/sourceme.sh +++ b/systems/Frontier/sourceme.sh @@ -3,7 +3,7 @@ spack load c-lime module load emacs module load PrgEnv-gnu module load rocm -module load cray-mpich/8.1.23 +module load cray-mpich module load gmp module load cray-fftw module load craype-accel-amd-gfx90a diff --git a/tests/debug/Test_8888.cc b/tests/debug/Test_8888.cc new file mode 100644 index 00000000..279bc8bd --- /dev/null +++ b/tests/debug/Test_8888.cc @@ -0,0 +1,118 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_general_coarse_hdcg.cc + + Copyright (C) 2023 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include +#include + +using namespace std; +using namespace Grid; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=8; + const int nbasis = 40; + const int cb = 0 ; + RealD mass=0.01; + RealD M5=1.8; + RealD b=1.0; + RealD c=0.0; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + ///////////////////////// RNGs ///////////////////////////////// + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + std::vector cseeds({5,6,7,8}); + + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + ///////////////////////// Configuration ///////////////////////////////// + LatticeGaugeField Umu(UGrid); + + FieldMetaData header; + std::string file("ckpoint_EODWF_lat.125"); + NerscIO::readConfiguration(Umu,header,file); + + //////////////////////// Fermion action ////////////////////////////////// + MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); + + MdagMLinearOperator HermOp(Ddwf); + + + std::cout << "**************************************"< fPM; + fPM(HermOp,pm_src); + + + std::cout << "**************************************"< IRLChebyLo(0.2,64.0,201); // 1 iter + Chebyshev IRLChebyLo(0.0,55.0,101); // 1 iter + FunctionHermOp PolyOp(IRLChebyLo,HermOp); + PlainHermOp Op(HermOp); + + ImplicitlyRestartedLanczos IRL(PolyOp, + Op, + Nk, // sought vecs + Nk, // sought vecs + Nm, // spare vecs + 1.0e-8, + 10 // Max iterations + ); + + int Nconv; + std::vector eval(Nm); + std::vector evec(Nm,FGrid); + LatticeFermionD irl_src(FGrid); + + IRL.calc(eval,evec,irl_src,Nconv); + + Grid_finalize(); + return 0; +} diff --git a/tests/debug/Test_cayley_cg.cc b/tests/debug/Test_cayley_cg.cc index 74492fd9..068c260f 100644 --- a/tests/debug/Test_cayley_cg.cc +++ b/tests/debug/Test_cayley_cg.cc @@ -392,9 +392,27 @@ void TestCGschur(What & Ddwf, GridParallelRNG *RNG5) { LatticeFermion src (FGrid); random(*RNG5,src); - LatticeFermion result(FGrid); result=Zero(); + LatticeFermion result1(FGrid); result1=Zero(); + LatticeFermion result2(FGrid); result2=Zero(); + LatticeFermion result3(FGrid); result3=Zero(); ConjugateGradient CG(1.0e-8,10000); SchurRedBlackDiagMooeeSolve SchurSolver(CG); - SchurSolver(Ddwf,src,result); + SchurSolver(Ddwf,src,result1); + + SchurRedBlackDiagOneSolve SchurSolverSymm1(CG); + SchurSolverSymm1(Ddwf,src,result2); + + SchurRedBlackDiagTwoSolve SchurSolverSymm2(CG); + SchurSolverSymm2(Ddwf,src,result3); + + std::cout << GridLogMessage << " Standard " < MrhsHermMatrix; - Chebyshev IRLCheby(0.0012,42.0,301); // 1 iter + // Chebyshev IRLCheby(0.0012,42.0,301); // 4.4.6.4 + // Chebyshev IRLCheby(0.0012,42.0,501); // for 4.4.4.4 blocking 350 evs + // Chebyshev IRLCheby(0.0014,42.0,501); // for 4.4.4.4 blocking 700 evs + // Chebyshev IRLCheby(0.002,42.0,501); // for 4.4.4.4 blocking 1226 evs + // Chebyshev IRLCheby(0.0025,42.0,501); // for 4.4.4.4 blocking 1059 evs + // 3e-4,2); + Chebyshev IRLCheby(0.0018,42.0,301); // for 4.4.4.4 blocking // 790 evs + MrhsHermMatrix MrhsCoarseOp (mrhs); CoarseVector pm_src(CoarseMrhs); pm_src = ComplexD(1.0); PowerMethod cPM; cPM(MrhsCoarseOp,pm_src); - int Nk=nrhs*30; + // int Nk=nrhs*30; // 4.4.6.4 // int Nk=nrhs*80; - int Nm=Nk*4; - int Nstop=Nk; + int Nk=nrhs*60; // 720 + int Nm=Nk*4; // 2880 ; generally finishes at 1440 + int Nstop=512; int Nconv_test_interval=1; ImplicitlyRestartedBlockLanczosCoarse IRL(MrhsCoarseOp, @@ -299,7 +308,7 @@ int main (int argc, char ** argv) nrhs, Nk, Nm, - 1e-4,20); + 3e-4,2); std::vector eval(Nm); std::vector evec(Nm,Coarse5d); @@ -331,7 +340,7 @@ int main (int argc, char ** argv) // Extra HDCG parameters ////////////////////////// int maxit=3000; - ConjugateGradient CG(5.0e-2,maxit,false); + ConjugateGradient CG(7.5e-2,maxit,false); RealD lo=2.0; int ord = 7;