mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 11:15:55 +01:00
Bland SYCL compile
This commit is contained in:
parent
3d437c5cc4
commit
80359e0d49
53
systems/OEM/README
Normal file
53
systems/OEM/README
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
1. Prerequisites:
|
||||||
|
===================
|
||||||
|
Make sure you have the latest Intel ipcx release loaded (via modules or similar)
|
||||||
|
Make sure you have SYCL aware MPICH or Intel MPI loaded (assumed as mpicxx)
|
||||||
|
|
||||||
|
2. Obtain Grid:
|
||||||
|
===================
|
||||||
|
|
||||||
|
bash$
|
||||||
|
git clone https://github.com/paboyle/Grid
|
||||||
|
cd Grid
|
||||||
|
./bootstrap.sh
|
||||||
|
cd systems/PVC
|
||||||
|
|
||||||
|
3. Build Grid:
|
||||||
|
===================
|
||||||
|
|
||||||
|
Here, configure command is stored in file config-command:
|
||||||
|
|
||||||
|
bash$
|
||||||
|
../../configure \
|
||||||
|
--enable-simd=GPU \
|
||||||
|
--enable-gen-simd-width=64 \
|
||||||
|
--enable-comms=mpi-auto \
|
||||||
|
--enable-accelerator-cshift \
|
||||||
|
--disable-gparity \
|
||||||
|
--disable-fermion-reps \
|
||||||
|
--enable-shm=nvlink \
|
||||||
|
--enable-accelerator=sycl \
|
||||||
|
--enable-unified=no \
|
||||||
|
MPICXX=mpicxx \
|
||||||
|
CXX=icpx \
|
||||||
|
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \
|
||||||
|
CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare "
|
||||||
|
|
||||||
|
make all
|
||||||
|
|
||||||
|
4. Run a benchmark:
|
||||||
|
===================
|
||||||
|
|
||||||
|
*** Assumes interactive access to node. ***
|
||||||
|
|
||||||
|
run Benchmark_dwf_fp32 using benchmarks/bench.sh
|
||||||
|
|
||||||
|
bash$
|
||||||
|
cd benchmarks
|
||||||
|
./bench.sh
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
18
systems/OEM/benchmarks/bench.sh
Executable file
18
systems/OEM/benchmarks/bench.sh
Executable file
@ -0,0 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
export EnableImplicitScaling=0
|
||||||
|
export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
|
||||||
|
export ZE_AFFINITY_MASK=$gpu_id.$tile_id
|
||||||
|
export ONEAPI_DEVICE_FILTER=gpu,level_zero
|
||||||
|
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
|
||||||
|
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||||
|
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
|
||||||
|
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
|
||||||
|
|
||||||
|
mpiexec -launcher ssh -n 1 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 1tile.log
|
||||||
|
mpiexec -launcher ssh -n 2 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads 16 --shm-mpi 1 --shm 2048 --device-mem 32768 | tee 2tile.log
|
||||||
|
|
||||||
|
#mpiexec -launcher ssh -n 4 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.2 --grid 16.16.64.64 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 4tile.log
|
||||||
|
#mpiexec -launcher ssh -n 8 -host localhost ./select_gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.2.4 --grid 16.16.64.128 --accelerator-threads 16 --shm-mpi 0 --shm 2048 --device-mem 32768 | tee 8tile.log
|
||||||
|
|
||||||
|
|
13
systems/OEM/benchmarks/select_gpu.sh
Executable file
13
systems/OEM/benchmarks/select_gpu.sh
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
num_tile=2
|
||||||
|
|
||||||
|
gpu_id=$(( (MPI_LOCAL_RANKID % num_tile ) ))
|
||||||
|
tile_id=$((MPI_LOCAL_RANKID / num_tile))
|
||||||
|
|
||||||
|
export ZE_AFFINITY_MASK=$gpu_id.$tile_id
|
||||||
|
|
||||||
|
echo "local rank $MPI_LOCALRANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK"
|
||||||
|
|
||||||
|
"$@"
|
||||||
|
|
15
systems/OEM/config-command
Normal file
15
systems/OEM/config-command
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
../../configure \
|
||||||
|
--enable-simd=GPU \
|
||||||
|
--enable-gen-simd-width=64 \
|
||||||
|
--enable-comms=mpi-auto \
|
||||||
|
--enable-accelerator-cshift \
|
||||||
|
--disable-gparity \
|
||||||
|
--disable-fermion-reps \
|
||||||
|
--enable-shm=nvlink \
|
||||||
|
--enable-accelerator=sycl \
|
||||||
|
--enable-unified=no \
|
||||||
|
MPICXX=mpicxx \
|
||||||
|
CXX=icpx \
|
||||||
|
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader " \
|
||||||
|
CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare "
|
||||||
|
|
3
systems/OEM/setup.sh
Normal file
3
systems/OEM/setup.sh
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
export https_proxy=http://proxy-chain.intel.com:911
|
||||||
|
module load intel-release
|
||||||
|
module load intel/mpich
|
Loading…
x
Reference in New Issue
Block a user