113 lines
3.8 KiB
Plaintext
113 lines
3.8 KiB
Plaintext
|
#!/usr/bin/env bash
|
||
|
# shellcheck disable=SC1091,SC2050,SC2170
|
||
|
|
||
|
# using options from https://github.com/paboyle/Grid/tree/develop/systems/Tursa
|
||
|
|
||
|
#SBATCH -J power-loc32-16A-795
|
||
|
#SBATCH -A dp207
|
||
|
#SBATCH -t 48:00:00
|
||
|
#SBATCH --nodes=16
|
||
|
#SBATCH --ntasks=64
|
||
|
#SBATCH --ntasks-per-node=4
|
||
|
#SBATCH --cpus-per-task=8
|
||
|
#SBATCH --partition=gpu
|
||
|
#SBATCH --gres=gpu:4
|
||
|
#SBATCH --output=%x.%j.out
|
||
|
#SBATCH --error=%x.%j.err
|
||
|
#SBATCH --reservation=dc-port1_61
|
||
|
#SBATCH --qos=reservation
|
||
|
#SBATCH --no-requeue
|
||
|
|
||
|
set -e
|
||
|
|
||
|
# OpenMP/OpenMPI/UCX environment ###############################################
|
||
|
export OMP_NUM_THREADS=4
|
||
|
export OMPI_MCA_btl=^uct,openib
|
||
|
export OMPI_MCA_pml=ucx
|
||
|
export UCX_TLS=gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
|
||
|
export UCX_RNDV_SCHEME=put_zcopy
|
||
|
export UCX_RNDV_THRESH=16384
|
||
|
export UCX_IB_GPU_DIRECT_RDMA=yes
|
||
|
export UCX_MEMTYPE_CACHE=n
|
||
|
|
||
|
# IO environment ###############################################################
|
||
|
|
||
|
if [ 16 -eq 1 ]; then
|
||
|
export OMPI_MCA_io=ompio
|
||
|
else
|
||
|
export OMPI_MCA_io=romio321
|
||
|
fi
|
||
|
export OMPI_MCA_btl_openib_allow_ib=true
|
||
|
export OMPI_MCA_btl_openib_device_type=infiniband
|
||
|
export OMPI_MCA_btl_openib_if_exclude=mlx5_1,mlx5_2,mlx5_3
|
||
|
|
||
|
# load environment #############################################################
|
||
|
env_dir="$(readlink -f /mnt/lustre/tursafs1/home/dp207/dp207/shared/env/versions/220428)"
|
||
|
source "${env_dir}/env-base.sh"
|
||
|
if [ "${SLURM_JOB_PARTITION}" = 'gpu' ]; then
|
||
|
source "${env_dir}/env-gpu.sh"
|
||
|
else
|
||
|
echo "error: partition ${SLURM_JOB_PARTITION} not supported for this template" 1>&2
|
||
|
exit 1
|
||
|
fi
|
||
|
spack load sshpass
|
||
|
|
||
|
# application and parameters ###################################################
|
||
|
app='/mnt/lustre/tursafs1/home/dp207/dp207/dc-port1/power-bench/2-racks/size-loc32/Benchmark_dwf_fp32'
|
||
|
opt=('--comms-overlap' '--comms-concurrent')
|
||
|
par=''
|
||
|
|
||
|
# collect job information ######################################################
|
||
|
job_info_dir=job/${SLURM_JOB_NAME}.${SLURM_JOB_ID}
|
||
|
mkdir -p "${job_info_dir}"
|
||
|
|
||
|
date > "${job_info_dir}/start-date"
|
||
|
echo "epoch $(date '+%s')" >> "${job_info_dir}/start-date"
|
||
|
set > "${job_info_dir}/env"
|
||
|
ldd ${app} > "${job_info_dir}/ldd"
|
||
|
md5sum ${app} > "${job_info_dir}/app-hash"
|
||
|
readelf -a ${app} > "${job_info_dir}/elf"
|
||
|
echo "${SLURM_JOB_NODELIST}" > "${job_info_dir}/nodes"
|
||
|
cp "${BASH_SOURCE[0]}" "${job_info_dir}/script"
|
||
|
if [ -n "${par}" ]; then cp "${par}" "${job_info_dir}/par"; fi
|
||
|
|
||
|
# GPU frequency control ########################################################
|
||
|
power_dir='/mnt/lustre/tursafs1/home/dp207/dp207/dc-port1/power-bench/2-racks/size-loc32'
|
||
|
freq=795
|
||
|
|
||
|
# set frequency
|
||
|
for h in $(scontrol show hostnames "${SLURM_JOB_NODELIST}"); do
|
||
|
${power_dir}/remote-sudo.sh "$h" "nvidia-smi -ac 1215,${freq}"
|
||
|
done
|
||
|
# start NVIDIA SMI monitoring
|
||
|
tmp=$(mktemp)
|
||
|
sleep 1
|
||
|
coproc nvidia-smi dmon -o DT &> "${tmp}"
|
||
|
|
||
|
# run! #########################################################################
|
||
|
mpirun -np "${SLURM_NTASKS}" -x LD_LIBRARY_PATH --bind-to none \
|
||
|
./gpu-mpi-wrapper.sh \
|
||
|
${app} "${par}" "${opt[@]}" \
|
||
|
--mpi 2.2.2.8 \
|
||
|
--accelerator-threads 8 \
|
||
|
--grid 64.64.64.256 \
|
||
|
--shm 2048 &> "${job_info_dir}/log"
|
||
|
|
||
|
# if we reach that point the application exited successfully ###################
|
||
|
touch "${job_info_dir}/success"
|
||
|
date > "${job_info_dir}/end-date"
|
||
|
echo "epoch $(date '+%s')" >> "${job_info_dir}/end-date"
|
||
|
|
||
|
# reset GPUS ###################################################################
|
||
|
# stop monitoring
|
||
|
kill -INT "${COPROC_PID}"
|
||
|
|
||
|
# make monitoring DB
|
||
|
${power_dir}/dmon-to-db.sh "${tmp}" smi-dmon-16A.db "clock_limit_${freq}"
|
||
|
|
||
|
# reset clocks
|
||
|
for h in $(scontrol show hostnames "${SLURM_JOB_NODELIST}"); do
|
||
|
${power_dir}/remote-sudo.sh "$h" 'nvidia-smi -ac 1215,1410'
|
||
|
done
|
||
|
################################################################################
|