diff --git a/systems/OEM/README b/systems/OEM/README new file mode 100644 index 00000000..ce62f914 --- /dev/null +++ b/systems/OEM/README @@ -0,0 +1,53 @@ +1. Prerequisites: +=================== +Make sure you have the latest Intel ipcx release loaded (via modules or similar) +Make sure you have SYCL aware MPICH or Intel MPI loaded (assumed as mpicxx) + +2. Obtain Grid: +=================== + +bash$ +git clone https://github.com/paboyle/Grid +cd Grid +./bootstrap.sh +cd systems/PVC + +3. Build Grid: +=================== + +Here, configure command is stored in file config-command: + +bash$ +../../configure \ + --enable-simd=GPU \ + --enable-gen-simd-width=64 \ + --enable-comms=mpi-auto \ + --enable-accelerator-cshift \ + --disable-gparity \ + --disable-fermion-reps \ + --enable-shm=nvlink \ + --enable-accelerator=sycl \ + --enable-unified=no \ + MPICXX=mpicxx \ + CXX=icpx \ + LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \ + CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare " + +make all + +4. Run a benchmark: +=================== + +*** Assumes interactive access to node. *** + +run Benchmark_dwf_fp32 using benchmarks/bench.sh + +bash$ +cd benchmarks +./bench.sh + + + + + + diff --git a/systems/OEM/benchmarks/bench.sh b/systems/OEM/benchmarks/bench.sh new file mode 100755 index 00000000..29b728b3 --- /dev/null +++ b/systems/OEM/benchmarks/bench.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +export EnableImplicitScaling=0 +export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1 +export ZE_AFFINITY_MASK=$gpu_id.$tile_id +export ONEAPI_DEVICE_FILTER=gpu,level_zero +export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0 +export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2 +export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1 + +mpiexec -launcher ssh -n 1 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 1tile.log +mpiexec -launcher ssh -n 2 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 2tile.log + +#mpiexec -launcher ssh -n 4 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.2 --grid 16.16.64.64 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 4tile.log +#mpiexec -launcher ssh -n 8 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.4 --grid 16.16.64.128 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 8tile.log + + diff --git a/systems/OEM/benchmarks/select_gpu.sh b/systems/OEM/benchmarks/select_gpu.sh new file mode 100755 index 00000000..2ef1f82d --- /dev/null +++ b/systems/OEM/benchmarks/select_gpu.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +num_tile=2 + +gpu_id=$(( (MPI_LOCAL_RANKID % num_tile ) )) +tile_id=$((MPI_LOCAL_RANKID / num_tile)) + +export ZE_AFFINITY_MASK=$gpu_id.$tile_id + +echo "local rank $MPI_LOCALRANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" + +"$@" + diff --git a/systems/OEM/config-command b/systems/OEM/config-command new file mode 100644 index 00000000..49b3e044 --- /dev/null +++ b/systems/OEM/config-command @@ -0,0 +1,15 @@ +../../configure \ + --enable-simd=GPU \ + --enable-gen-simd-width=64 \ + --enable-comms=mpi-auto \ + --enable-accelerator-cshift \ + --disable-gparity \ + --disable-fermion-reps \ + --enable-shm=nvlink \ + --enable-accelerator=sycl \ + --enable-unified=no \ + MPICXX=mpicxx \ + CXX=icpx \ + LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \ + CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare " + diff --git a/systems/OEM/setup.sh b/systems/OEM/setup.sh new file mode 100644 index 00000000..3b8188f0 --- /dev/null +++ b/systems/OEM/setup.sh @@ -0,0 +1,3 @@ +export https_proxy=http://proxy-chain.intel.com:911 +module load intel-release +module load intel/mpich