From 829dc8ccebe53090cc79c411e5e2e25e92c5ab48 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 27 Apr 2026 14:38:02 -0700 Subject: [PATCH] 32 node --- systems/Perlmutter/tests/cg32.slurm | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 systems/Perlmutter/tests/cg32.slurm diff --git a/systems/Perlmutter/tests/cg32.slurm b/systems/Perlmutter/tests/cg32.slurm new file mode 100644 index 00000000..ae2246eb --- /dev/null +++ b/systems/Perlmutter/tests/cg32.slurm @@ -0,0 +1,44 @@ +#!/bin/bash +##SBATCH -A m5294_g +#SBATCH -A mp13_g +#m3886_g +#SBATCH -C gpu +#SBATCH -q premium +#SBATCH -t 00:10 +#SBATCH -c 32 +#SBATCH -N 32 +#SBATCH -n 128 +#SBATCH --ntasks-per-node=4 +#SBATCH --gpus-per-task=1 +#SBATCH --exclusive +#SBATCH --gpu-bind=none + +export SLURM_CPU_BIND="cores" +export MPICH_GPU_SUPPORT_ENABLED=1 +export MPICH_RDMA_ENABLED_CUDA=1 +export MPICH_GPU_IPC_ENABLED=1 +export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0 +export MPICH_GPU_NO_ASYNC_MEMCPY=0 +#export MPICH_SMP_SINGLE_COPY_MODE=CMA + +cat << EOF > select_gpu +#!/bin/bash +export GPU_MAP=(0 1 2 3) +export NUMA_MAP=( 0 1 2 3 ) +export GPU=\$SLURM_LOCALID +export NUMA=\$SLURM_LOCALID +export CUDA_VISIBLE_DEVICES=\$GPU +exec numactl -m \$NUMA -N \$NUMA \$* +EOF + +chmod +x ./select_gpu + +OPT="--comms-overlap --shm-mpi 0" +# +# Local volume WAS 32.16.32.24 +# +# 384 nodes +#srun ./select_gpu ./Test_dwf_mixedcg_prec --seconds 300 --grid 128.128.128.288 --mpi 4.8.4.12 --device-mem 16000 --accelerator-threads 8 --shm 2048 $OPT > job.log +# 32 nodes, same volume per node +srun ./select_gpu ./Test_dwf_mixedcg_prec --seconds 300 --grid 64.32.64.96 --mpi 2.2.2.4 --device-mem 16000 --accelerator-threads 8 --shm 2048 $OPT > job.log +