diff --git a/systems/Perlmutter/config-command b/systems/Perlmutter/config-command index b399c535..b62704dc 100644 --- a/systems/Perlmutter/config-command +++ b/systems/Perlmutter/config-command @@ -1,9 +1,13 @@ +DIR=`pwd` +PREFIX=$DIR/../Prequisites/install/ ../../configure \ --enable-comms=mpi \ --enable-simd=GPU \ --enable-shm=nvlink \ --enable-gen-simd-width=64 \ --enable-accelerator=cuda \ + --disable-accelerator-cshift \ + --with-gmp=$PREFIX \ --disable-fermion-reps \ --disable-unified \ --disable-gparity \ diff --git a/systems/Perlmutter/dwf4.slurm b/systems/Perlmutter/dwf4.slurm index ba198595..f6d6a2a9 100644 --- a/systems/Perlmutter/dwf4.slurm +++ b/systems/Perlmutter/dwf4.slurm @@ -1,24 +1,20 @@ #!/bin/bash -#SBATCH -A mp13 +#SBATCH -A m3886_g #SBATCH -C gpu -#SBATCH -q regular -#SBATCH -t 0:20:00 -#SBATCH -n 16 +#SBATCH -q debug +#SBATCH -t 0:10:00 +#SBATCH -n 4 #SBATCH --ntasks-per-node=4 #SBATCH -c 32 #SBATCH --exclusive #SBATCH --gpus-per-task=1 -#SBATCH --gpu-bind=map_gpu:0,1,2,3 +#SBATCH --gpu-bind=none export SLURM_CPU_BIND="cores" -export MPICH_RDMA_ENABLED_CUDA=1 export MPICH_GPU_SUPPORT_ENABLED=1 -srun ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.2 --accelerator-threads 8 > comms.4node - -OPT="--comms-overlap --comms-concurrent --shm-mpi 0" -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt0 -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt0 - +export MPICH_RDMA_ENABLED_CUDA=0 +export MPICH_GPU_IPC_ENABLED=0 +export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0 +export MPICH_GPU_NO_ASYNC_MEMCPY=1 OPT="--comms-overlap --comms-concurrent --shm-mpi 1" -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt1 -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt1 +srun ./benchmarks/Benchmark_ITT --mpi 2.1.1.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > ITT.log diff --git a/systems/Perlmutter/sourceme.sh b/systems/Perlmutter/sourceme.sh index 9359dea9..6d09b1c9 100644 --- a/systems/Perlmutter/sourceme.sh +++ b/systems/Perlmutter/sourceme.sh @@ -1,4 +1,4 @@ export CRAY_ACCEL_TARGET=nvidia80 -module load PrgEnv-gnu cpe-cuda cuda +module load PrgEnv-gnu cpe-cuda cudatoolkit/11.4