diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index e9a3a500..90bee389 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -423,7 +423,6 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S #define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); #define KERNEL_CALL_EXT(A) \ - const uint64_t NN = Nsite*Ls; \ const uint64_t sz = st.surface_list.size(); \ auto ptr = &st.surface_list[0]; \ accelerator_forNB( ss, sz, Simd::Nsimd(), { \ diff --git a/Grid/qcd/utils/SUn.h b/Grid/qcd/utils/SUn.h index 00cd7f40..7ead2084 100644 --- a/Grid/qcd/utils/SUn.h +++ b/Grid/qcd/utils/SUn.h @@ -40,18 +40,20 @@ Lattice > > > Determinant(const LatticelSites(); Lattice > > > ret(grid); - + typedef typename Vec::scalar_type scalar; autoView(Umu_v,Umu,CpuRead); autoView(ret_v,ret,CpuWrite); thread_for(site,lvol,{ Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N); Coordinate lcoor; grid->LocalIndexToLocalCoor(site, lcoor); - iScalar > > Us; + iScalar > > Us; peekLocalSite(Us, Umu_v, lcoor); for(int i=0;i accelerator_inline iVector Exponentiate(c // Specialisation: Cayley-Hamilton exponential for SU(3) -#ifndef GRID_ACCELERATED +#if 0 template::TensorLevel == 0>::type * =nullptr> accelerator_inline iMatrix Exponentiate(const iMatrix &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) { diff --git a/HMC/FTHMC2p1f.cc b/HMC/FTHMC2p1f.cc new file mode 100644 index 00000000..dd824138 --- /dev/null +++ b/HMC/FTHMC2p1f.cc @@ -0,0 +1,224 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Copyright (C) 2023 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +using namespace Grid; + +int main(int argc, char **argv) +{ + std::cout << std::setprecision(12); + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + typedef WilsonImplR FermionImplPolicy; + typedef MobiusFermionD FermionAction; + typedef typename FermionAction::FermionField FermionField; + + typedef Grid::XmlReader Serialiser; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + IntegratorParameters MD; + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Leap Frog"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = 12; + MD.trajL = 1.0; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 0; + HMCparams.Trajectories = 200; + HMCparams.NoMetropolisUntil= 20; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + HMCparams.StartingType =std::string("HotStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_EODWF_lat"; + CPparams.smeared_prefix = "ckpoint_EODWF_lat_smr"; + CPparams.rng_prefix = "ckpoint_EODWF_rng"; + CPparams.saveInterval = 1; + CPparams.saveSmeared = true; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + // here there is too much indirection + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 16; + Real beta = 2.13; + Real light_mass = 0.01; + Real strange_mass = 0.04; + Real pv_mass = 1.0; + RealD M5 = 1.8; + RealD b = 1.0; // Scale factor two + RealD c = 0.0; + + OneFlavourRationalParams OFRp; + OFRp.lo = 1.0e-2; + OFRp.hi = 64; + OFRp.MaxIter = 10000; + OFRp.tolerance= 1.0e-10; + OFRp.degree = 14; + OFRp.precision= 40; + + std::vector hasenbusch({ 0.1 }); + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + LatticeGaugeField Uhot(GridPtr); + + // These lines are unecessary if BC are all periodic + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + + double StoppingCondition = 1e-10; + double MaxCGIterations = 30000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + + bool ApplySmearing = true; + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(2); + ActionLevel Level3(4); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + + MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); + MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c); + ExactOneFlavourRatioPseudoFermionAction + EOFA(Strange_Op_L, Strange_Op_R, + CG, + CG, CG, + CG, CG, + OFRp, false); + + EOFA.is_smeared = ApplySmearing; + Level1.push_back(&EOFA); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector *> Quotients; + + for(int h=0;h(*Numerators[h],*Denominators[h],CG,CG)); + } + + for(int h=0;his_smeared = ApplySmearing; + Level1.push_back(Quotients[h]); + } + + ///////////////////////////////////////////////////////////// + // lnDetJacobianAction + ///////////////////////////////////////////////////////////// + double rho = 0.1; // smearing parameter + int Nsmear = 1; // number of smearing levels - must be multiple of 2Nd + int Nstep = 8*Nsmear; // number of smearing levels - must be multiple of 2Nd + Smear_Stout Stout(rho); + SmearedConfigurationMasked SmearingPolicy(GridPtr, Nstep, Stout); + JacobianAction Jacobian(&SmearingPolicy); + if( ApplySmearing ) Level2.push_back(&Jacobian); + std::cout << GridLogMessage << " Built the Jacobian "<< std::endl; + + + ///////////////////////////////////////////////////////////// + // Gauge action + ///////////////////////////////////////////////////////////// + // GaugeAction.is_smeared = ApplySmearing; + GaugeAction.is_smeared = true; + Level3.push_back(&GaugeAction); + + std::cout << GridLogMessage << " ************************************************"<< std::endl; + std::cout << GridLogMessage << " Action complete -- NO FERMIONS FOR NOW -- FIXME"<< std::endl; + std::cout << GridLogMessage << " ************************************************"<< std::endl; + std::cout << GridLogMessage << std::endl; + std::cout << GridLogMessage << std::endl; + + + std::cout << GridLogMessage << " Running the FT HMC "<< std::endl; + + TheHMC.TheAction.push_back(Level1); + TheHMC.TheAction.push_back(Level2); + TheHMC.TheAction.push_back(Level3); + + TheHMC.Run(SmearingPolicy); // for smearing + + Grid_finalize(); +} // main + + + diff --git a/HMC/Mobius2p1f_EOFA_96I_hmc.cc b/HMC/Mobius2p1f_EOFA_96I_hmc.cc index 54bbe617..3d674db4 100644 --- a/HMC/Mobius2p1f_EOFA_96I_hmc.cc +++ b/HMC/Mobius2p1f_EOFA_96I_hmc.cc @@ -146,6 +146,8 @@ NAMESPACE_END(Grid); int main(int argc, char **argv) { using namespace Grid; + std::cout << " Grid Initialise "< HMCWrapper; // MD.name = std::string("Leap Frog"); - typedef GenericHMCRunner HMCWrapper; - MD.name = std::string("Force Gradient"); - //typedef GenericHMCRunner HMCWrapper; - // MD.name = std::string("MinimumNorm2"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); // TrajL = 2 // 4/2 => 0.6 dH // 3/3 => 0.8 dH .. depth 3, slower //MD.MDsteps = 4; - MD.MDsteps = 12; + MD.MDsteps = 14; MD.trajL = 0.5; HMCparameters HMCparams; HMCparams.StartTrajectory = 1077; - HMCparams.Trajectories = 1; + HMCparams.Trajectories = 20; HMCparams.NoMetropolisUntil= 0; // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; - // HMCparams.StartingType =std::string("ColdStart"); - HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.StartingType =std::string("ColdStart"); + // HMCparams.StartingType =std::string("CheckpointStart"); HMCparams.MD = MD; HMCWrapper TheHMC(HMCparams); @@ -223,7 +225,7 @@ int main(int argc, char **argv) { Real pv_mass = 1.0; // std::vector hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // std::vector hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass }); - std::vector hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated + std::vector hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 }); // Updated // std::vector hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); auto GridPtr = TheHMC.Resources.GetCartesian(); @@ -275,10 +277,10 @@ int main(int argc, char **argv) { // double StoppingCondition = 1e-14; // double MDStoppingCondition = 1e-9; - double StoppingCondition = 1e-8; - double MDStoppingCondition = 1e-7; - double MDStoppingConditionLoose = 1e-7; - double MDStoppingConditionStrange = 1e-7; + double StoppingCondition = 1e-9; + double MDStoppingCondition = 1e-8; + double MDStoppingConditionLoose = 1e-8; + double MDStoppingConditionStrange = 1e-8; double MaxCGIterations = 300000; ConjugateGradient CG(StoppingCondition,MaxCGIterations); ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); diff --git a/systems/Lumi/benchmarks/bench2.slurm b/systems/Lumi/benchmarks/bench2.slurm new file mode 100755 index 00000000..fe02bfba --- /dev/null +++ b/systems/Lumi/benchmarks/bench2.slurm @@ -0,0 +1,44 @@ +#!/bin/bash -l +#SBATCH --job-name=bench_lehner +#SBATCH --partition=small-g +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=8 +#SBATCH --cpus-per-task=7 +#SBATCH --gpus-per-node=8 +#SBATCH --time=00:10:00 +#SBATCH --account=project_465000546 +#SBATCH --gpu-bind=none +#SBATCH --exclusive +#SBATCH --mem=0 + +CPU_BIND="map_cpu:48,56,32,40,16,24,1,8" +echo $CPU_BIND + +cat << EOF > select_gpu +#!/bin/bash +export GPU_MAP=(0 1 2 3 4 5 6 7) +export GPU=\${GPU_MAP[\$SLURM_LOCALID]} +export HIP_VISIBLE_DEVICES=\$GPU +unset ROCR_VISIBLE_DEVICES +echo RANK \$SLURM_LOCALID using GPU \$GPU +exec \$* +EOF + +chmod +x ./select_gpu + +root=/scratch/project_465000546/boylepet/Grid/systems/Lumi +source ${root}/sourceme.sh + +export OMP_NUM_THREADS=7 +export MPICH_GPU_SUPPORT_ENABLED=1 +export MPICH_SMP_SINGLE_COPY_MODE=XPMEM + +for vol in 16.16.16.64 32.32.32.64 32.32.32.128 +do +srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol +#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol + +srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol +#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol +done + diff --git a/systems/Lumi/config-command b/systems/Lumi/config-command index 98ae275c..3f7877c8 100644 --- a/systems/Lumi/config-command +++ b/systems/Lumi/config-command @@ -3,30 +3,28 @@ spack load gmp spack load mpfr CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-` GMP=`spack find --paths gmp | grep gmp | cut -c 12-` -MPFR=`spack find --paths mpfr | grep mpfr | cut -c 12-` -echo clime $CLIME -echo gmp $GMP -echo mpfr $MPFR +MPFR=`spack find --paths mpfr | grep mpfr | cut -c 13-` +echo clime X$CLIME +echo gmp X$GMP +echo mpfr X$MPFR -../../configure --enable-comms=mpi-auto \ +../../configure \ +--enable-comms=mpi-auto \ --with-lime=$CLIME \ --enable-unified=no \ --enable-shm=nvlink \ ---enable-tracing=timer \ --enable-accelerator=hip \ --enable-gen-simd-width=64 \ --enable-simd=GPU \ ---disable-accelerator-cshift \ ---with-gmp=$OLCF_GMP_ROOT \ +--enable-accelerator-cshift \ +--with-gmp=$GMP \ +--with-mpfr=$MPFR \ --with-fftw=$FFTW_DIR/.. \ ---with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ --disable-fermion-reps \ --disable-gparity \ CXX=hipcc MPICXX=mpicxx \ -CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 --amdgpu-target=gfx90a" \ - LDFLAGS="-L/lib64 -L/opt/rocm-5.2.0/lib/ -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 --amdgpu-target=gfx90a " + CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++14 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \ + LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp" -#--enable-simd=GPU-RRII \ - diff --git a/systems/Lumi/sourceme.sh b/systems/Lumi/sourceme.sh index 83eb6539..2aebbb87 100644 --- a/systems/Lumi/sourceme.sh +++ b/systems/Lumi/sourceme.sh @@ -1 +1,5 @@ -module load CrayEnv LUMI/22.12 partition/G cray-fftw/3.3.10.1 +source ~/spack/share/spack/setup-env.sh +module load CrayEnv LUMI/22.12 partition/G cray-fftw/3.3.10.1 rocm +spack load c-lime +spack load gmp +spack load mpfr