1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

New scripts

This commit is contained in:
Peter Boyle 2022-03-03 17:00:37 -05:00
parent 0d80eeb545
commit bb5c16b97f
3 changed files with 40 additions and 18 deletions

View File

@ -3,28 +3,28 @@
#SBATCH -A LGT104
#SBATCH -t 01:00:00
##SBATCH -U openmpThu
##SBATCH -p ecp
#SBATCH -J DWF
#SBATCH -o DWF.%J
#SBATCH -e DWF.%J
#SBATCH -N 1
#SBATCH -n 1
#SBATCH --exclusive
#SBATCH -n 8
#SBATCH --exclusive
#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4
DIR=.
module list
#export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export OMP_NUM_THREADS=1
AT=8
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads ${AT} --grid 24.24.24.24 --shm-mpi 0 --mpi 1.1.1.1"
srun --gpus-per-task 1 -n1 ./benchmarks/Benchmark_dwf_fp32 $PARAMS
PARAMS=" --accelerator-threads 16 --grid 32.32.32.256 --mpi 1.1.1.8 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
srun --gpus-per-task 1 -n8 ./benchmarks/Benchmark_dwf_fp32 $PARAMS

View File

@ -6,22 +6,43 @@
#SBATCH -J DWF
#SBATCH -o DWF.%J
#SBATCH -e DWF.%J
#SBATCH -N 1
#SBATCH -n 8
#SBATCH -N 8
#SBATCH -n 64
#SBATCH --exclusive
#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4
DIR=.
module list
export MPICH_OFI_NIC_POLICY=GPU
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export MPICH_SMP_SINGLE_COPY_MODE=NONE
export OMP_NUM_THREADS=1
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads 8 --grid 32.64.64.64 --mpi 1.2.2.2 --comms-overlap --shm 2048 --shm-mpi 0"
srun --gpus-per-task 1 -n8 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS
PARAMS=" --accelerator-threads 16 --grid 64.64.64.256 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.256.8node
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 4.4.4.1 --comms-overlap --shm 2048 --shm-mpi 1"
echo $PARAMS
srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.32.8node
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 4.4.4.1 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.64.64.64.32.8node.shm0
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node
PARAMS=" --accelerator-threads 16 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
#srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node_shm0

View File

@ -1,10 +1,11 @@
#!/bin/bash
lrank=$SLURM_LOCALID
lgpu=(0 1 2 3 7 6 5 4)
export ROCR_VISIBLE_DEVICES=$SLURM_LOCALID
export ROCR_VISIBLE_DEVICES=${lgpu[$lrank]}
echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES binding=$BINDING"
echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES "
$*