1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Pipeline mode commit on Aurora. 5+ TF/s on 16^3x32 per tile at 384

nodes.
More concurrency/fine grained scheduling is possible.
This commit is contained in:
2025-02-04 19:27:26 +00:00
parent b50fb34e71
commit 0baaddbe98
9 changed files with 151 additions and 81 deletions

View File

@ -32,15 +32,9 @@ export MPICH_OFI_NIC_POLICY=GPU
# Local vol 16.16.16.32
#
VOL 128.64.128.96
MPI 4.4.4.3
NPROC 192
mpiexec -np 192 -ppn 12 -envall ./gpu_tile.sh ./Benchmark_dwf_fp32 --mpi 4.4.4.3 --grid 128.64.128.96 --shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32 --comms-overlap
LX=32
LX=16
LY=16
LZ=32
LZ=16
LT=32
NX=2

View File

@ -19,7 +19,7 @@ export ONEAPI_DEVICE_FILTER=gpu,level_zero
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:3
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:4
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
#export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1