1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 04:37:05 +01:00

Merge branch 'develop' into feature/dirichlet

Conflicts:
	Grid/qcd/action/fermion/WilsonCompressor.h
	Grid/stencil/Stencil.h
This commit is contained in:
Peter Boyle
2022-12-13 08:24:58 -05:00
14 changed files with 425 additions and 228 deletions

4
systems/PVC/benchmarks/run-1tile.sh Normal file → Executable file
View File

@ -6,7 +6,7 @@
source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh
export NT=16
export NT=8
export I_MPI_OFFLOAD=1
export I_MPI_OFFLOAD_TOPOLIB=level_zero
@ -21,7 +21,7 @@ export I_MPI_OFFLOAD_CELL=tile
export EnableImplicitScaling=0
export EnableWalkerPartition=0
export ZE_AFFINITY_MASK=0.0
mpiexec -launcher ssh -n 1 -host localhost ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 1 --cacheblocking 8.8.8.8
mpiexec -launcher ssh -n 1 -host localhost ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 1 --device-mem 32768
export ZE_AFFINITY_MASK=0
export I_MPI_OFFLOAD_CELL=device

View File

@ -1,13 +1,13 @@
#!/bin/bash
##SBATCH -p PVC-SPR-QZEH
##SBATCH -p PVC-ICX-QZNW
#SBATCH -p QZ1J-ICX-PVC
source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh
export NT=16
# export IGC_EnableLSCFenceUGMBeforeEOT=0
# export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file=False"
#export IGC_ShaderDumpEnable=1
@ -19,8 +19,16 @@ export SYCL_DEVICE_FILTER=gpu,level_zero
export I_MPI_OFFLOAD_CELL=tile
export EnableImplicitScaling=0
export EnableWalkerPartition=0
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0
mpiexec -launcher ssh -n 1 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 1 > 1tile.log
for i in 0
do
mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768
mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768
done
#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.1x2.log
#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.2x1.log
mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 1 > 2tile.log

View File

@ -7,8 +7,8 @@ echo Ranke $MPI_LOCALRANKID ZE_AFFINITY_MASK is $ZE_AFFINITY_MASK
if [ $MPI_LOCALRANKID = "0" ]
then
# ~psteinbr/build_pti/ze_tracer -c $@
onetrace --chrome-kernel-timeline $@
# ~psteinbr/build_pti/ze_tracer -h $@
onetrace --chrome-device-timeline $@
else
$@
fi

View File

@ -2,14 +2,15 @@ INSTALL=/nfs/site/home/azusayax/install
../../configure \
--enable-simd=GPU \
--enable-gen-simd-width=64 \
--enable-comms=mpi \
--enable-comms=mpi-auto \
--disable-accelerator-cshift \
--disable-gparity \
--disable-fermion-reps \
--enable-shm=nvlink \
--enable-accelerator=sycl \
--enable-unified=yes \
CXX=mpicxx \
--enable-unified=no \
MPICXX=mpicxx \
CXX=dpcpp \
LDFLAGS="-fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$INSTALL/lib" \
CXXFLAGS="-cxx=dpcpp -fsycl-unnamed-lambda -fsycl -no-fma -I$INSTALL/include -Wtautological-constant-compare"
CXXFLAGS="-fsycl-unnamed-lambda -fsycl -no-fma -I$INSTALL/include -Wno-tautological-compare"