mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-12 20:27:06 +01:00
Pipeline mode commit on Aurora. 5+ TF/s on 16^3x32 per tile at 384
nodes. More concurrency/fine grained scheduling is possible.
This commit is contained in:
@ -32,15 +32,9 @@ export MPICH_OFI_NIC_POLICY=GPU
|
||||
# Local vol 16.16.16.32
|
||||
#
|
||||
|
||||
VOL 128.64.128.96
|
||||
MPI 4.4.4.3
|
||||
NPROC 192
|
||||
mpiexec -np 192 -ppn 12 -envall ./gpu_tile.sh ./Benchmark_dwf_fp32 --mpi 4.4.4.3 --grid 128.64.128.96 --shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32 --comms-overlap
|
||||
|
||||
|
||||
LX=32
|
||||
LX=16
|
||||
LY=16
|
||||
LZ=32
|
||||
LZ=16
|
||||
LT=32
|
||||
|
||||
NX=2
|
||||
|
@ -19,7 +19,7 @@ export ONEAPI_DEVICE_FILTER=gpu,level_zero
|
||||
|
||||
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:3
|
||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:4
|
||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
|
||||
#export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
|
||||
#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1
|
||||
|
Reference in New Issue
Block a user