#!/usr/bin/env bash # shellcheck disable=SC1091,SC2050,SC2170 # using options from https://github.com/paboyle/Grid/tree/develop/systems/Tursa #SBATCH -J power-8A-930 #SBATCH -A dp207 #SBATCH -t 48:00:00 #SBATCH --nodes=8 #SBATCH --ntasks=32 #SBATCH --ntasks-per-node=4 #SBATCH --cpus-per-task=8 #SBATCH --partition=gpu #SBATCH --gres=gpu:4 #SBATCH --output=%x.%j.out #SBATCH --error=%x.%j.err #SBATCH --reservation=dc-port1_61 #SBATCH --qos=reservation #SBATCH --no-requeue set -e # OpenMP/OpenMPI/UCX environment ############################################### export OMP_NUM_THREADS=4 export OMPI_MCA_btl=^uct,openib export OMPI_MCA_pml=ucx export UCX_TLS=gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc export UCX_RNDV_SCHEME=put_zcopy export UCX_RNDV_THRESH=16384 export UCX_IB_GPU_DIRECT_RDMA=yes export UCX_MEMTYPE_CACHE=n # IO environment ############################################################### if [ 8 -eq 1 ]; then export OMPI_MCA_io=ompio else export OMPI_MCA_io=romio321 fi export OMPI_MCA_btl_openib_allow_ib=true export OMPI_MCA_btl_openib_device_type=infiniband export OMPI_MCA_btl_openib_if_exclude=mlx5_1,mlx5_2,mlx5_3 # load environment ############################################################# env_dir="$(readlink -f /mnt/lustre/tursafs1/home/dp207/dp207/shared/env/versions/220428)" source "${env_dir}/env-base.sh" if [ "${SLURM_JOB_PARTITION}" = 'gpu' ]; then source "${env_dir}/env-gpu.sh" else echo "error: partition ${SLURM_JOB_PARTITION} not supported for this template" 1>&2 exit 1 fi spack load sshpass # application and parameters ################################################### app='/mnt/lustre/tursafs1/home/dp207/dp207/dc-port1/power-bench/2-racks/Benchmark_dwf_fp32' opt=('--comms-overlap' '--comms-concurrent') par='' # collect job information ###################################################### job_info_dir=job/${SLURM_JOB_NAME}.${SLURM_JOB_ID} mkdir -p "${job_info_dir}" date > "${job_info_dir}/start-date" echo "epoch $(date '+%s')" >> "${job_info_dir}/start-date" set > "${job_info_dir}/env" ldd ${app} > "${job_info_dir}/ldd" md5sum ${app} > "${job_info_dir}/app-hash" readelf -a ${app} > "${job_info_dir}/elf" echo "${SLURM_JOB_NODELIST}" > "${job_info_dir}/nodes" cp "${BASH_SOURCE[0]}" "${job_info_dir}/script" if [ -n "${par}" ]; then cp "${par}" "${job_info_dir}/par"; fi # GPU frequency control ######################################################## power_dir='/mnt/lustre/tursafs1/home/dp207/dp207/dc-port1/power-bench/2-racks/' freq=930 # set frequency for h in $(scontrol show hostnames "${SLURM_JOB_NODELIST}"); do ${power_dir}/remote-sudo.sh "$h" "nvidia-smi -ac 1215,${freq}" done # start NVIDIA SMI monitoring tmp=$(mktemp) sleep 1 coproc nvidia-smi dmon -o DT &> "${tmp}" # run! ######################################################################### mpirun -np "${SLURM_NTASKS}" -x LD_LIBRARY_PATH --bind-to none \ ./gpu-mpi-wrapper.sh \ ${app} "${par}" "${opt[@]}" \ --mpi 2.2.2.4 \ --accelerator-threads 8 \ --grid 48.48.48.48 \ --shm 2048 &> "${job_info_dir}/log" # if we reach that point the application exited successfully ################### touch "${job_info_dir}/success" date > "${job_info_dir}/end-date" echo "epoch $(date '+%s')" >> "${job_info_dir}/end-date" # reset GPUS ################################################################### # stop monitoring kill -INT "${COPROC_PID}" # make monitoring DB ${power_dir}/dmon-to-db.sh "${tmp}" smi-dmon-8A.db "clock_limit_${freq}" # reset clocks for h in $(scontrol show hostnames "${SLURM_JOB_NODELIST}"); do ${power_dir}/remote-sudo.sh "$h" 'nvidia-smi -ac 1215,1410' done ################################################################################