diff --git a/systems/PVC/benchmarks/run-1tile.sh b/systems/PVC/benchmarks/run-1tile.sh index 0fe80247..9a29b773 100755 --- a/systems/PVC/benchmarks/run-1tile.sh +++ b/systems/PVC/benchmarks/run-1tile.sh @@ -4,7 +4,7 @@ #SBATCH -p QZ1J-ICX-PVC ##SBATCH -p QZ1J-SPR-PVC-2C -source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh +#source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh export NT=8 diff --git a/systems/PVC/benchmarks/run-2tile-mpi.sh b/systems/PVC/benchmarks/run-2tile-mpi.sh index cefab776..5a6a9b8f 100755 --- a/systems/PVC/benchmarks/run-2tile-mpi.sh +++ b/systems/PVC/benchmarks/run-2tile-mpi.sh @@ -4,7 +4,7 @@ #SBATCH -p QZ1J-ICX-PVC -source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh +#source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh export NT=16 @@ -19,16 +19,14 @@ export SYCL_DEVICE_FILTER=gpu,level_zero export I_MPI_OFFLOAD_CELL=tile export EnableImplicitScaling=0 export EnableWalkerPartition=0 -export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 -export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +#export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 +#export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0 -for i in 0 +for i in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 do -mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768 -mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768 +mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 0 --device-mem 32768 > 1.1.1.2.log$i +mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 0 --device-mem 32768 > 2.1.1.1.log$i done -#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.1x2.log -#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.2x1.log diff --git a/systems/PVC/benchmarks/wrap.sh b/systems/PVC/benchmarks/wrap.sh index bb7b517d..06ed0ca1 100755 --- a/systems/PVC/benchmarks/wrap.sh +++ b/systems/PVC/benchmarks/wrap.sh @@ -5,10 +5,10 @@ export ZE_AFFINITY_MASK=0.$MPI_LOCALRANKID echo Ranke $MPI_LOCALRANKID ZE_AFFINITY_MASK is $ZE_AFFINITY_MASK -if [ $MPI_LOCALRANKID = "0" ] -then +#if [ $MPI_LOCALRANKID = "0" ] +#then # ~psteinbr/build_pti/ze_tracer -h $@ - onetrace --chrome-device-timeline $@ -else +# onetrace --chrome-device-timeline $@ +#else $@ -fi +#fi