35 lines
1.1 KiB
Bash
Executable File
35 lines
1.1 KiB
Bash
Executable File
#!/bin/bash
|
|
export global_rank=${OMPI_COMM_WORLD_RANK}
|
|
export local_rank=${OMPI_COMM_WORLD_LOCAL_RANK}
|
|
export ranks_per_node=${OMPI_COMM_WORLD_LOCAL_SIZE}
|
|
if [ -z "${NUM_CPUS}" ]; then
|
|
let NUM_CPUS=96
|
|
fi
|
|
if [ -z "${RANK_STRIDE}" ]; then
|
|
let RANK_STRIDE=$(( ${NUM_CPUS}/${ranks_per_node} ))
|
|
fi
|
|
if [ -z "${OMP_STRIDE}" ]; then
|
|
let OMP_STRIDE=1
|
|
fi
|
|
if [ -z "${NUM_GPUS}" ]; then
|
|
let NUM_GPUS=8
|
|
fi
|
|
if [ -z "${GPU_START}" ]; then
|
|
let GPU_START=0
|
|
fi
|
|
if [ -z "${GPU_STRIDE}" ]; then
|
|
let GPU_STRIDE=1
|
|
fi
|
|
cpu_list=($(seq 0 95))
|
|
let cpus_per_gpu=${NUM_CPUS}/${NUM_GPUS}
|
|
let cpu_start_index=$(( ($RANK_STRIDE*${local_rank})+${GPU_START}*$cpus_per_gpu ))
|
|
let cpu_start=${cpu_list[$cpu_start_index]}
|
|
let cpu_stop=$(($cpu_start+$OMP_NUM_THREADS*$OMP_STRIDE-1))
|
|
gpu_list=(0 1 2 3 4 5 6 7)
|
|
let ranks_per_gpu=$(((${ranks_per_node}+${NUM_GPUS}-1)/${NUM_GPUS}))
|
|
let my_gpu_index=$(($local_rank*$GPU_STRIDE/$ranks_per_gpu))+${GPU_START}
|
|
let my_gpu=${gpu_list[${my_gpu_index}]}
|
|
export GOMP_CPU_AFFINITY=$cpu_start-$cpu_stop:$OMP_STRIDE
|
|
export ROCR_VISIBLE_DEVICES=$my_gpu
|
|
"$@"
|