mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 20:57:06 +01:00
Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet
This commit is contained in:
@ -1,9 +1,14 @@
|
||||
DIR=`pwd`
|
||||
PREFIX=$DIR/../Prequisites/install/
|
||||
../../configure \
|
||||
--enable-comms=mpi \
|
||||
--enable-simd=GPU \
|
||||
--enable-shm=nvlink \
|
||||
--enable-gen-simd-width=64 \
|
||||
--enable-accelerator=cuda \
|
||||
--enable-setdevice \
|
||||
--disable-accelerator-cshift \
|
||||
--with-gmp=$PREFIX \
|
||||
--disable-fermion-reps \
|
||||
--disable-unified \
|
||||
--disable-gparity \
|
||||
|
@ -1,24 +1,27 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -A mp13
|
||||
#SBATCH -A m3886_g
|
||||
#SBATCH -C gpu
|
||||
#SBATCH -q regular
|
||||
#SBATCH -q debug
|
||||
#SBATCH -t 0:20:00
|
||||
#SBATCH -n 16
|
||||
#SBATCH --ntasks-per-node=4
|
||||
#SBATCH -c 32
|
||||
#SBATCH --exclusive
|
||||
#SBATCH -N 1
|
||||
#SBATCH -n 4
|
||||
#SBATCH --ntasks-per-node=4
|
||||
#SBATCH --gpus-per-task=1
|
||||
#SBATCH --gpu-bind=map_gpu:0,1,2,3
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --gpu-bind=none
|
||||
|
||||
export SLURM_CPU_BIND="cores"
|
||||
export MPICH_RDMA_ENABLED_CUDA=1
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
srun ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.2 --accelerator-threads 8 > comms.4node
|
||||
export MPICH_RDMA_ENABLED_CUDA=1
|
||||
export MPICH_GPU_IPC_ENABLED=1
|
||||
export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0
|
||||
export MPICH_GPU_NO_ASYNC_MEMCPY=0
|
||||
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
|
||||
|
||||
OPT="--comms-overlap --comms-concurrent --shm-mpi 0"
|
||||
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt0
|
||||
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt0
|
||||
OPT="--comms-overlap --shm-mpi 1"
|
||||
VOL=64.64.32.32
|
||||
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.1.1 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
|
||||
#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.1.1.4 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
|
||||
#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.8 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
|
||||
|
||||
OPT="--comms-overlap --comms-concurrent --shm-mpi 1"
|
||||
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt1
|
||||
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt1
|
||||
|
@ -1,4 +1,4 @@
|
||||
|
||||
export CRAY_ACCEL_TARGET=nvidia80
|
||||
|
||||
module load PrgEnv-gnu cpe-cuda cuda
|
||||
module load PrgEnv-gnu cpe-cuda cudatoolkit/11.4
|
||||
|
Reference in New Issue
Block a user