1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Merge branch 'develop' into feature/dirichlet

This commit is contained in:
Peter Boyle 2022-05-17 09:09:00 -07:00
commit 6fb6ca5b6b
3 changed files with 15 additions and 15 deletions

View File

@ -1,9 +1,13 @@
DIR=`pwd`
PREFIX=$DIR/../Prequisites/install/
../../configure \ ../../configure \
--enable-comms=mpi \ --enable-comms=mpi \
--enable-simd=GPU \ --enable-simd=GPU \
--enable-shm=nvlink \ --enable-shm=nvlink \
--enable-gen-simd-width=64 \ --enable-gen-simd-width=64 \
--enable-accelerator=cuda \ --enable-accelerator=cuda \
--disable-accelerator-cshift \
--with-gmp=$PREFIX \
--disable-fermion-reps \ --disable-fermion-reps \
--disable-unified \ --disable-unified \
--disable-gparity \ --disable-gparity \

View File

@ -1,24 +1,20 @@
#!/bin/bash #!/bin/bash
#SBATCH -A mp13 #SBATCH -A m3886_g
#SBATCH -C gpu #SBATCH -C gpu
#SBATCH -q regular #SBATCH -q debug
#SBATCH -t 0:20:00 #SBATCH -t 0:10:00
#SBATCH -n 16 #SBATCH -n 4
#SBATCH --ntasks-per-node=4 #SBATCH --ntasks-per-node=4
#SBATCH -c 32 #SBATCH -c 32
#SBATCH --exclusive #SBATCH --exclusive
#SBATCH --gpus-per-task=1 #SBATCH --gpus-per-task=1
#SBATCH --gpu-bind=map_gpu:0,1,2,3 #SBATCH --gpu-bind=none
export SLURM_CPU_BIND="cores" export SLURM_CPU_BIND="cores"
export MPICH_RDMA_ENABLED_CUDA=1
export MPICH_GPU_SUPPORT_ENABLED=1 export MPICH_GPU_SUPPORT_ENABLED=1
srun ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.2 --accelerator-threads 8 > comms.4node export MPICH_RDMA_ENABLED_CUDA=0
export MPICH_GPU_IPC_ENABLED=0
OPT="--comms-overlap --comms-concurrent --shm-mpi 0" export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt0 export MPICH_GPU_NO_ASYNC_MEMCPY=1
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt0
OPT="--comms-overlap --comms-concurrent --shm-mpi 1" OPT="--comms-overlap --comms-concurrent --shm-mpi 1"
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt1 srun ./benchmarks/Benchmark_ITT --mpi 2.1.1.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > ITT.log
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt1

View File

@ -1,4 +1,4 @@
export CRAY_ACCEL_TARGET=nvidia80 export CRAY_ACCEL_TARGET=nvidia80
module load PrgEnv-gnu cpe-cuda cuda module load PrgEnv-gnu cpe-cuda cudatoolkit/11.4