1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 20:57:06 +01:00

Deprecate UVM

This commit is contained in:
2024-09-17 13:34:27 +00:00
parent 11be10d2c0
commit 066544281f
75 changed files with 668 additions and 1082 deletions

View File

@ -1,6 +1,6 @@
#!/bin/bash
#PBS -q debug
#PBS -q EarlyAppAccess
#PBS -l select=1
#PBS -l walltime=00:20:00
#PBS -A LatticeQCD_aesp_CNDA
@ -44,7 +44,7 @@ CMD="mpiexec -np 1 -ppn 1 -envall \
./gpu_tile_compact.sh \
./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 16.32.32.32 \
--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32 "
#$CMD | tee 1tile.dwf
$CMD | tee 1tile.dwf
CMD="mpiexec -np 12 -ppn 12 -envall \
./gpu_tile_compact.sh \

View File

@ -1,6 +1,6 @@
#!/bin/bash
#PBS -q workq
#PBS -q EarlyAppAccess
#PBS -l select=2
#PBS -l walltime=00:20:00
#PBS -A LatticeQCD_aesp_CNDA
@ -43,13 +43,13 @@ $CMD | tee 2node.comms
CMD="mpiexec -np 24 -ppn 12 -envall \
./gpu_tile_compact.sh \
./Benchmark_dwf_fp32 --mpi 2.2.2.3 --grid 32.32.64.48 \
--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32 --comms-overlap"
--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32 "
$CMD | tee 2node.32.32.64.48.dwf
CMD="mpiexec -np 24 -ppn 12 -envall \
./gpu_tile_compact.sh \
./Benchmark_dwf_fp32 --mpi 2.2.2.3 --grid 64.64.64.96 \
--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32 --comms-overlap"
--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32 "
$CMD | tee 2node.64.64.64.96.dwf

View File

@ -1,40 +1,12 @@
module load oneapi/release/2023.12.15.001
#module load intel_compute_runtime/release/821.35
source ~/spack/share/spack/setup-env.sh
spack load c-lime
spack load openssl
export CLIME=`spack find --paths c-lime | grep ^c-lime | awk '{print $2}' `
#spack load libefence
#export EFENCE=`spack find --paths libefence | grep ^libefence | awk '{print $2}' `
#export LD_LIBRARY_PATH=${EFENCE}/lib:$LD_LIBRARY_PATH
#spack load gperftools
export TCMALLOC=/home/paboyle/gperftools/install
export LD_LIBRARY_PATH=${TCMALLOC}/lib:$LD_LIBRARY_PATH
export INTELGT_AUTO_ATTACH_DISABLE=1
#export ONEAPI_DEVICE_SELECTOR=level_zero:0.0
#module load oneapi/release/2023.12.15.001
#module use /soft/modulefiles
#module load intel_compute_runtime/release/agama-devel-682.22
#export FI_CXI_DEFAULT_CQ_SIZE=131072
#export FI_CXI_CQ_FILL_PERCENT=20
#export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
#export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-intel-enable-auto-large-GRF-mode"
#
# -ftarget-register-alloc-mode=pvc:default
# -ftarget-register-alloc-mode=pvc:small
# -ftarget-register-alloc-mode=pvc:large
# -ftarget-register-alloc-mode=pvc:auto
#export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
export HTTP_PROXY=http://proxy.alcf.anl.gov:3128
export HTTPS_PROXY=http://proxy.alcf.anl.gov:3128
export http_proxy=http://proxy.alcf.anl.gov:3128
export https_proxy=http://proxy.alcf.anl.gov:3128
git config --global http.proxy http://proxy.alcf.anl.gov:3128
#source ~/spack/share/spack/setup-env.sh
#spack load gperftools
#export TCMALLOC=`spack find --paths gperftools | grep ^gperftools | awk '{print $2}' `
#export LD_LIBRARY_PATH=${TCMALLOC}/lib:$LD_LIBRARY_PATH
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"

View File

@ -1,6 +1,6 @@
#!/bin/bash
#PBS -l select=16
#PBS -l select=32
#PBS -q EarlyAppAccess
#PBS -A LatticeQCD_aesp_CNDA
#PBS -l walltime=02:00:00
@ -15,7 +15,7 @@
# 56 cores / 6 threads ~9
export OMP_NUM_THREADS=6
export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
#export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
@ -24,14 +24,14 @@ export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
#export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
#export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=1
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
export GRID_PRINT_ENTIRE_LOG=0
export GRID_CHECKSUM_RECV_BUF=0
export GRID_CHECKSUM_SEND_BUF=0
export GRID_CHECKSUM_RECV_BUF=1
export GRID_CHECKSUM_SEND_BUF=1
export MPICH_OFI_NIC_POLICY=GPU
@ -51,10 +51,10 @@ cd $DIR
cp $PBS_NODEFILE nodefile
CMD="mpiexec -np 192 -ppn 12 -envall --hostfile nodefile \
CMD="mpiexec -np 384 -ppn 12 -envall --hostfile nodefile \
../gpu_tile_compact.sh \
../Test_dwf_mixedcg_prec --mpi 4.4.4.3 --grid 128.128.128.96 \
--shm-mpi 0 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message --comms-overlap"
../Test_dwf_mixedcg_prec --mpi 4.4.4.6 --grid 128.128.128.96 \
--shm-mpi 1 --comms-overlap --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message --debug-signals"
echo $CMD > command-line
env > environment