1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-05 03:35:55 +01:00
2023-12-21 23:20:17 +00:00

108 lines
3.4 KiB
Bash
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
## qsub -q EarlyAppAccess -A Aurora_Deployment -I -l select=1 -l walltime=60:00
#PBS -q EarlyAppAccess
#PBS -l select=2
#PBS -l walltime=01:00:00
#PBS -A LatticeQCD_aesp_CNDA
HDIR=/home/paboyle/
#module use /soft/testing/modulefiles/
#module load intel-UMD23.05.25593.11/23.05.25593.11
#module load tools/pti-gpu
#export LD_LIBRARY_PATH=$HDIR/tools/lib64:$LD_LIBRARY_PATH
#export PATH=$HDIR/tools/bin:$PATH
export TZ='/usr/share/zoneinfo/US/Central'
export OMP_PROC_BIND=spread
export OMP_NUM_THREADS=3
unset OMP_PLACES
cd $PBS_O_WORKDIR
source ../sourceme.sh
echo Jobid: $PBS_JOBID
echo Running on host `hostname`
echo Running on nodes `cat $PBS_NODEFILE`
echo NODES
cat $PBS_NODEFILE
NNODES=`wc -l < $PBS_NODEFILE`
NRANKS=12 # Number of MPI ranks per node
NDEPTH=4 # Number of hardware threads per rank, spacing between MPI ranks on a node
NTHREADS=$OMP_NUM_THREADS # Number of OMP threads per rank, given to OMP_NUM_THREADS
NTOTRANKS=$(( NNODES * NRANKS ))
echo "NUM_NODES=${NNODES} TOTAL_RANKS=${NTOTRANKS} RANKS_PER_NODE=${NRANKS} THREADS_PER_RANK=${OMP_NUM_THREADS}"
echo "OMP_PROC_BIND=$OMP_PROC_BIND OMP_PLACES=$OMP_PLACES"
CMD="mpiexec -np 2 -ppn 1 -d ${NDEPTH} -envall \
./gpu_tile_compact.sh \
./Benchmark_comms_host_device --mpi 1.1.1.2 --grid 32.24.32.192 \
--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=0
#$CMD | tee 1-to-1.comms.hmem0
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
#$CMD | tee 1-to-1.comms.hmem1
CMD="mpiexec -np 4 -ppn 2 -d ${NDEPTH} --cpu-bind=depth -envall \
./gpu_tile_compact.sh \
./Benchmark_comms_host_device --mpi 2.2.1.1 --grid 32.24.32.96 \
--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
#$CMD | tee 2-to-2.comms.hmem1
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=0
#$CMD | tee 2-to-2.comms.hmem0
CMD="mpiexec -np 6 -ppn 3 -d ${NDEPTH} --cpu-bind=depth -envall \
./gpu_tile_compact.sh \
./Benchmark_comms_host_device --mpi 3.2.1.1 --grid 32.24.32.96 \
--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
#$CMD | tee 3-to-3.comms.hmem1
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=0
#$CMD | tee 3-to-3.comms.hmem0
CMD="mpiexec -np 8 -ppn 4 -d ${NDEPTH} --cpu-bind=depth -envall \
./gpu_tile_compact4.sh \
./Benchmark_comms_host_device --mpi 2.2.2.1 --grid 32.24.32.96 \
--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
$CMD | tee 4-to-4.comms.hmem1.nic-affinity
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=0
$CMD | tee 4-to-4.comms.hmem0.nic-affinity
CMD="mpiexec -np 12 -ppn 6 -d ${NDEPTH} --cpu-bind=depth -envall \
./gpu_tile_compact.sh \
./Benchmark_comms_host_device --mpi 3.2.2.1 --grid 32.24.32.96 \
--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
#$CMD | tee 6-to-6.comms.hmem1
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=0
#$CMD | tee 6-to-6.comms.hmem0
CMD="mpiexec -np ${NTOTRANKS} -ppn ${NRANKS} -d ${NDEPTH} --cpu-bind=depth -envall \
./gpu_tile_compact.sh \
./Benchmark_comms_host_device --mpi 3.2.2.2 --grid 32.24.32.192 \
--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
#$CMD | tee 12-to-12.comms.hmem1
export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=0
#$CMD | tee 12-to-12.comms.hmem0