#!/bin/bash
#BSUB -P LGT104
#BSUB -W 2:00
#BSUB -nnodes 4
#BSUB -J DWF

export OMP_NUM_THREADS=6
export PAMI_IBV_ADAPTER_AFFINITY=1
export PAMI_ENABLE_STRIPING=1
export PAMI_DISABLE_IPC=1
export OPT="--comms-concurrent --comms-overlap "


APP="./wrap.sh ./benchmarks/Benchmark_dwf_fp32 --grid 48.48.48.72 --mpi 2.2.2.3 --shm 1024 --device-mem 4000 --shm-force-mpi 1 $OPT "
jsrun --nrs 24 -a1 -g1 -c6 -dpacked -b packed:6 --latency_priority gpu-cpu --smpiargs="-gpu" $APP > dwf.24.4node

APP="./wrap.sh ./benchmarks/Benchmark_comms_host_device --grid 48.48.48.72 --mpi 2.2.2.3 --shm 1024 --device-mem 4000 --shm-force-mpi 1 $OPT "
jsrun  --smpiargs="-gpu"  --nrs 4 -a6 -g6 -c42 -dpacked -b packed:6  $APP > comms.24.4node