mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-14 09:45:36 +00:00
43 lines
1.1 KiB
Bash
43 lines
1.1 KiB
Bash
#!/bin/bash
|
|
#SBATCH --partition csi
|
|
#SBATCH --time=00:10:00
|
|
#SBATCH -A csigeneral
|
|
#SBATCH --exclusive
|
|
#SBATCH --nodes=1
|
|
#SBATCH --ntasks=4
|
|
#SBATCH --qos csi
|
|
#SBATCH --gres=gpu:4
|
|
|
|
source sourceme.sh
|
|
|
|
cat << EOF > select_gpu
|
|
#!/bin/bash
|
|
export GPU_MAP=(0 1 2 3)
|
|
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
|
export CUDA_VISIBLE_DEVICES=\$GPU
|
|
unset ROCR_VISIBLE_DEVICES
|
|
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
|
exec \$*
|
|
EOF
|
|
chmod +x ./select_gpu
|
|
|
|
|
|
export OMP_NUM_THREADS=4
|
|
export OMPI_MCA_btl=^uct,openib
|
|
export UCX_TLS=cuda,gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
|
|
export UCX_RNDV_SCHEME=put_zcopy
|
|
export UCX_RNDV_THRESH=16384
|
|
export UCX_IB_GPU_DIRECT_RDMA=no
|
|
export UCX_MEMTYPE_CACHE=n
|
|
|
|
export OMP_NUM_THREAD=8
|
|
#srun -N1 -n1 nvidia-smi
|
|
#srun -N1 -n1 numactl -H > numa.txt
|
|
srun -N1 -n1 lstopo A100-topo.pdf
|
|
|
|
# 4.35 TF/s
|
|
#srun -N1 -n1 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 16.32.32.32 --shm 2048 --shm-mpi 0 --accelerator-threads 16
|
|
|
|
srun -N1 -n4 ./select_gpu ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.2.2 --grid 32.32.64.64 --shm 2048 --shm-mpi 0 --accelerator-threads 16
|
|
|