1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Significantly better performance on Aurora without using pipeline mode

This commit is contained in:
2025-01-30 16:36:46 +00:00
parent d6b2727f86
commit 94019a922e
12 changed files with 306 additions and 101 deletions

View File

@ -27,10 +27,22 @@ export MPICH_OFI_NIC_POLICY=GPU
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
#
# Local vol 16.16.16.32
#
#VOL=32.64.64.96
for VOL in 32.32.32.96 32.64.64.96
do
for AT in 32
do
CMD="mpiexec -np 24 -ppn 12 -envall \
./gpu_tile.sh ./Benchmark_dwf_fp32 --mpi 2.2.2.3 --grid 32.64.64.96 \
--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 8 "
./gpu_tile.sh ./Benchmark_dwf_fp32 --mpi 2.2.2.3 --grid $VOL \
--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads $AT --comms-overlap "
echo $CMD
$CMD
done
done

View File

@ -5,11 +5,11 @@
#export GPU_MAP=(0.0 0.1 3.0 3.1 1.0 1.1 4.0 4.1 2.0 2.1 5.0 5.1)
export NUMA_PMAP=(0 0 0 1 1 1 0 0 0 1 1 1 );
export NUMA_MMAP=(2 2 2 3 3 3 3 2 2 2 2 3 3 3 );
export NUMA_HMAP=(2 2 2 3 3 3 3 2 2 2 2 3 3 3 );
export GPU_MAP=(0.0 1.0 2.0 3.0 4.0 5.0 0.1 1.1 2.1 3.1 4.1 5.1 )
export NUMAP=${NUMA_PMAP[$PALS_LOCAL_RANKID]}
export NUMAM=${NUMA_PMAP[$PALS_LOCAL_RANKID]}
export NUMAH=${NUMA_HMAP[$PALS_LOCAL_RANKID]}
export gpu_id=${GPU_MAP[$PALS_LOCAL_RANKID]}
unset EnableWalkerPartition
@ -19,17 +19,19 @@ export ONEAPI_DEVICE_FILTER=gpu,level_zero
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:5
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:7
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
#export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1
export MPI_BUF_NUMA=$NUMAH
echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK ; NUMA $NUMA "
if [ $PALS_RANKID = "0" ]
then
numactl -m $NUMAM -N $NUMAP unitrace --chrome-kernel-logging --chrome-mpi-logging --chrome-sycl-logging --demangle "$@"
# numactl -m $NUMAM -N $NUMAP "$@"
numactl -p $NUMAP -N $NUMAP unitrace --chrome-kernel-logging --chrome-mpi-logging --chrome-sycl-logging --demangle "$@"
# numactl -p $NUMAP -N $NUMAP "$@"
else
numactl -m $NUMAM -N $NUMAP "$@"
numactl -p $NUMAP -N $NUMAP "$@"
fi