mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
SDCC benchmarking scripts for A100 nodes and IceLake nodes (AVX512)
This commit is contained in:
parent
b77a9b8947
commit
14643c0aab
@ -185,6 +185,7 @@ void Benchmark(int Ls, Coordinate Dirichlet)
|
||||
GaugeField Umu(UGrid);
|
||||
GaugeField UmuCopy(UGrid);
|
||||
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||
// SU<Nc>::ColdConfiguration(Umu);
|
||||
UmuCopy=Umu;
|
||||
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
|
||||
|
||||
@ -307,6 +308,14 @@ void Benchmark(int Ls, Coordinate Dirichlet)
|
||||
if(( n2e>1.0e-4) ) {
|
||||
std::cout<<GridLogMessage << "WRONG RESULT" << std::endl;
|
||||
FGrid->Barrier();
|
||||
std::cout<<GridLogMessage << "RESULT" << std::endl;
|
||||
// std::cout << result<<std::endl;
|
||||
std::cout << norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "REF" << std::endl;
|
||||
std::cout << norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "ERR" << std::endl;
|
||||
std::cout << norm2(err)<<std::endl;
|
||||
FGrid->Barrier();
|
||||
exit(-1);
|
||||
}
|
||||
assert (n2e< 1.0e-4 );
|
||||
|
42
systems/SDCC-A100/bench.slurm
Normal file
42
systems/SDCC-A100/bench.slurm
Normal file
@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --partition csi
|
||||
#SBATCH --time=00:10:00
|
||||
#SBATCH -A csigeneral
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=4
|
||||
#SBATCH --qos csi
|
||||
#SBATCH --gres=gpu:4
|
||||
|
||||
source sourceme.sh
|
||||
|
||||
cat << EOF > select_gpu
|
||||
#!/bin/bash
|
||||
export GPU_MAP=(0 1 2 3)
|
||||
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
||||
export CUDA_VISIBLE_DEVICES=\$GPU
|
||||
unset ROCR_VISIBLE_DEVICES
|
||||
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
||||
exec \$*
|
||||
EOF
|
||||
chmod +x ./select_gpu
|
||||
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export OMPI_MCA_btl=^uct,openib
|
||||
export UCX_TLS=cuda,gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
|
||||
export UCX_RNDV_SCHEME=put_zcopy
|
||||
export UCX_RNDV_THRESH=16384
|
||||
export UCX_IB_GPU_DIRECT_RDMA=no
|
||||
export UCX_MEMTYPE_CACHE=n
|
||||
|
||||
export OMP_NUM_THREAD=8
|
||||
#srun -N1 -n1 nvidia-smi
|
||||
#srun -N1 -n1 numactl -H > numa.txt
|
||||
srun -N1 -n1 lstopo A100-topo.pdf
|
||||
|
||||
# 4.35 TF/s
|
||||
#srun -N1 -n1 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 16.32.32.32 --shm 2048 --shm-mpi 0 --accelerator-threads 16
|
||||
|
||||
srun -N1 -n4 ./select_gpu ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.2.2 --grid 32.32.64.64 --shm 2048 --shm-mpi 0 --accelerator-threads 16
|
||||
|
@ -5,7 +5,7 @@
|
||||
--enable-accelerator=cuda \
|
||||
--enable-gen-simd-width=64 \
|
||||
--enable-simd=GPU \
|
||||
--enable-accelerator-cshift \
|
||||
--disable-accelerator-cshift \
|
||||
--disable-fermion-reps \
|
||||
--disable-gparity \
|
||||
CXX=nvcc \
|
||||
|
31
systems/SDCC-ICE/bench.slurm
Normal file
31
systems/SDCC-ICE/bench.slurm
Normal file
@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --partition lqcd
|
||||
#SBATCH --time=00:20:00
|
||||
#SBATCH -A lqcdtest
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=2
|
||||
#SBATCH --qos lqcd
|
||||
|
||||
source sourceme.sh
|
||||
|
||||
export OMP_NUM_THREAD=24
|
||||
#srun -N1 -n1 numactl -H > numa.txt
|
||||
#srun -N1 -n1 lstopo ice-topo.pdf
|
||||
|
||||
cat << EOF > select_socket
|
||||
#!/bin/bash
|
||||
export NUM_MAP=(0 1)
|
||||
export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]}
|
||||
exec \$*
|
||||
EOF
|
||||
chmod +x ./select_socket
|
||||
|
||||
#for vol in 8.8.8.16 8.8.8.32 8.8.8.64
|
||||
#for vol in 8.8.16.16 8.8.16.32 8.8.16.64
|
||||
for vol in 8.16.16.16 8.16.16.32 8.16.16.64 16.16.16.32 16.16.16.64 24.24.24.64 32.32.32.32
|
||||
do
|
||||
srun --cpu-bind=ldoms -N1 -n2 ./select_socket ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid $vol --dslash-asm > $vol.2socket.out
|
||||
srun --cpu-bind=ldoms -N1 -n1 ./select_socket ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid $vol --dslash-asm > $vol.1socket.out
|
||||
done
|
||||
|
@ -1,13 +1,18 @@
|
||||
../../configure \
|
||||
--enable-comms=mpi \
|
||||
--enable-debug \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-unified=yes \
|
||||
--enable-shm=shmopen \
|
||||
--enable-shm-fast-path=shmopen \
|
||||
--enable-accelerator=none \
|
||||
--enable-simd=AVX2 \
|
||||
--enable-simd=AVX512 \
|
||||
--disable-accelerator-cshift \
|
||||
--disable-fermion-reps \
|
||||
--disable-gparity \
|
||||
CXX=mpicxx \
|
||||
CXX=clang++ \
|
||||
MPICXX=mpicxx \
|
||||
LDFLAGS=-L/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/hwloc-2.9.1-hgkscnt5pferhtde4ahctlupb6qf3vtl/lib/ \
|
||||
LIBS=-lhwloc \
|
||||
CXXFLAGS="-std=c++17"
|
||||
|
||||
|
||||
|
@ -1 +1,2 @@
|
||||
export LD_LIBRARY_PATH=/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/llvm-12.0.1-agey6vtuw3e375rewhhobvkznjh5ltz4/lib/:$LD_LIBRARY_PATH
|
||||
module load openmpi
|
||||
|
Loading…
Reference in New Issue
Block a user