1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00
Grid/systems/Tursa/dwf4.slurm
Peter Boyle aab3bcb46f Dirichlet first cut - wrong answers on dagger multiply.
Struggling to get a compute node so changing systems
2022-02-22 19:58:33 +00:00

38 lines
804 B
Bash

#!/bin/bash
#SBATCH -J dslash
#SBATCH -A dp207
#SBATCH --exclusive
#SBATCH --nodes=4
#SBATCH --ntasks=16
#SBATCH --qos=standard
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --time=0:05:00
#SBATCH --partition=gpu
#SBATCH --gres=gpu:4
#SBATCH --output=%x.%j.out
#SBATCH --error=%x.%j.err
export OMP_NUM_THREADS=4
export OMPI_MCA_btl=^uct,openib
export UCX_TLS=gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
export UCX_RNDV_SCHEME=put_zcopy
export UCX_RNDV_THRESH=16384
export UCX_IB_GPU_DIRECT_RDMA=yes
export UCX_MEMTYPE_CACHE=n
OPT="--comms-overlap --comms-concurrent"
mpirun -np $SLURM_NTASKS -x LD_LIBRARY_PATH --bind-to none \
./mpiwrapper.sh \
./benchmarks/Benchmark_dwf_fp32 \
$OPT \
--mpi 2.2.2.2 \
--accelerator-threads 8 \
--grid 64.64.64.64 \
--shm 2048 > dwf.4node.perf