From 0f5733df97077c86e8ec6ccfa0c603c94a5a991d Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 24 Jan 2023 18:17:43 +0000 Subject: [PATCH] working scripts for Tursa --- Grid/systems/tursa/bootstrap-env.sh | 24 +++ Grid/systems/tursa/env-cpu.sh | 5 - Grid/systems/tursa/env-gpu.sh | 5 - Grid/systems/tursa/env.sh | 8 + .../tursa/{ => files}/cpu-mpi-wrapper.sh | 0 Grid/systems/tursa/files/env-cpu.sh | 4 + Grid/systems/tursa/files/env-gpu.sh | 4 + .../tursa/{ => files}/gpu-mpi-wrapper.sh | 0 Grid/systems/tursa/grid-config.json | 25 +++ Grid/systems/tursa/grid-cpu.spack | 10 -- Grid/systems/tursa/grid-gpu.spack | 10 -- Grid/systems/tursa/spack-bootstrap.sh | 169 +++++++++--------- 12 files changed, 151 insertions(+), 113 deletions(-) create mode 100755 Grid/systems/tursa/bootstrap-env.sh delete mode 100644 Grid/systems/tursa/env-cpu.sh delete mode 100644 Grid/systems/tursa/env-gpu.sh create mode 100644 Grid/systems/tursa/env.sh rename Grid/systems/tursa/{ => files}/cpu-mpi-wrapper.sh (100%) create mode 100644 Grid/systems/tursa/files/env-cpu.sh create mode 100644 Grid/systems/tursa/files/env-gpu.sh rename Grid/systems/tursa/{ => files}/gpu-mpi-wrapper.sh (100%) create mode 100644 Grid/systems/tursa/grid-config.json delete mode 100644 Grid/systems/tursa/grid-cpu.spack delete mode 100644 Grid/systems/tursa/grid-gpu.spack mode change 100644 => 100755 Grid/systems/tursa/spack-bootstrap.sh diff --git a/Grid/systems/tursa/bootstrap-env.sh b/Grid/systems/tursa/bootstrap-env.sh new file mode 100755 index 0000000..d0e62c8 --- /dev/null +++ b/Grid/systems/tursa/bootstrap-env.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -euo pipefail + +if (( $# != 1 )); then + echo "usage: $(basename "$0") " 1>&2 + exit 1 +fi +dir=$1 + +call_dir=$(pwd -P) +script_dir="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")" +if [ -d "${dir}" ]; then + echo "error: directory '${dir}' exists" + exit 1 +fi +mkdir -p "${dir}" +cd "${dir}" +git clone https://github.com/spack/spack.git +cd "${call_dir}" +cp "${script_dir}"/files/* "${dir}" +cp "${script_dir}/env.sh" "${script_dir}/grid-config.json" "${dir}" +source "${dir}"/spack/share/spack/setup-env.sh +"${script_dir}"/spack-bootstrap.sh "${dir}" diff --git a/Grid/systems/tursa/env-cpu.sh b/Grid/systems/tursa/env-cpu.sh deleted file mode 100644 index 2df240b..0000000 --- a/Grid/systems/tursa/env-cpu.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash -# shellcheck disable=SC2046 - -script_dir="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")" -spack load $(cat "${script_dir}"/grid-cpu.spack) diff --git a/Grid/systems/tursa/env-gpu.sh b/Grid/systems/tursa/env-gpu.sh deleted file mode 100644 index 0d0d85e..0000000 --- a/Grid/systems/tursa/env-gpu.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash -# shellcheck disable=SC2046 - -script_dir="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")" -spack load $(cat "${script_dir}"/grid-gpu.spack) diff --git a/Grid/systems/tursa/env.sh b/Grid/systems/tursa/env.sh new file mode 100644 index 0000000..9534176 --- /dev/null +++ b/Grid/systems/tursa/env.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# shellcheck disable=SC1091 + +GRIDENVDIR="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")" +export GRIDENVDIR +export PATH="${GRIDENVDIR}/prefix/base/bin:${PATH}" +export ACLOCAL_PATH="${GRIDENVDIR}/prefix/base/share/aclocal:${ACLOCAL_PATH}" +source "${GRIDENVDIR}"/spack/share/spack/setup-env.sh diff --git a/Grid/systems/tursa/cpu-mpi-wrapper.sh b/Grid/systems/tursa/files/cpu-mpi-wrapper.sh similarity index 100% rename from Grid/systems/tursa/cpu-mpi-wrapper.sh rename to Grid/systems/tursa/files/cpu-mpi-wrapper.sh diff --git a/Grid/systems/tursa/files/env-cpu.sh b/Grid/systems/tursa/files/env-cpu.sh new file mode 100644 index 0000000..ec64373 --- /dev/null +++ b/Grid/systems/tursa/files/env-cpu.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +spack env activate grid-cpu +spack env status diff --git a/Grid/systems/tursa/files/env-gpu.sh b/Grid/systems/tursa/files/env-gpu.sh new file mode 100644 index 0000000..231647f --- /dev/null +++ b/Grid/systems/tursa/files/env-gpu.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +spack env activate grid-gpu +spack env status diff --git a/Grid/systems/tursa/gpu-mpi-wrapper.sh b/Grid/systems/tursa/files/gpu-mpi-wrapper.sh similarity index 100% rename from Grid/systems/tursa/gpu-mpi-wrapper.sh rename to Grid/systems/tursa/files/gpu-mpi-wrapper.sh diff --git a/Grid/systems/tursa/grid-config.json b/Grid/systems/tursa/grid-config.json new file mode 100644 index 0000000..d91d9e6 --- /dev/null +++ b/Grid/systems/tursa/grid-config.json @@ -0,0 +1,25 @@ +{ + "configs": [ + { + "name": "gpu", + "env-script": "env-gpu.sh", + "commit": "796abfad80625d81bb16af7ff6ec612a836f17d8", + "config-options": "--enable-comms=mpi --enable-simd=GPU --enable-alloc-align=4k --enable-accelerator-cshift --enable-shm=nvlink --enable-gen-simd-width=64 --enable-accelerator=cuda --disable-unified --enable-gparity=no --enable-fermion-reps=no", + "env" : { + "CXX": "nvcc", + "LDFLAGS": "-cudart shared", + "CXXFLAGS": "-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared" + } + }, + { + "name": "cpu", + "env-script": "env-cpu.sh", + "commit": "796abfad80625d81bb16af7ff6ec612a836f17d8", + "config-options": "--enable-comms=mpi-auto --enable-simd=AVX2 --enable-alloc-align=4k --enable-shm=shmget --enable-gparity=no --enable-fermion-reps=no", + "env" : { + "CXX": "clang++", + "MPICXX": "mpicxx" + } + } + ] +} diff --git a/Grid/systems/tursa/grid-cpu.spack b/Grid/systems/tursa/grid-cpu.spack deleted file mode 100644 index 85e1164..0000000 --- a/Grid/systems/tursa/grid-cpu.spack +++ /dev/null @@ -1,10 +0,0 @@ -gcc@9.4.0 -llvm@12.0.1 -ucx@1.12.0.CPU%gcc@9.4.0 -openmpi@4.1.1.CPU%gcc@9.4.0 -hdf5^openmpi@4.1.1.CPU%gcc@9.4.0 -fftw^openmpi@4.1.1.CPU%gcc@9.4.0 -openssl -gmp%gcc@9.4.0 -mpfr%gcc@9.4.0 -c-lime diff --git a/Grid/systems/tursa/grid-gpu.spack b/Grid/systems/tursa/grid-gpu.spack deleted file mode 100644 index f3d9660..0000000 --- a/Grid/systems/tursa/grid-gpu.spack +++ /dev/null @@ -1,10 +0,0 @@ -gcc@9.4.0 -cuda@11.4.0 -ucx@1.12.0.GPU%gcc@9.4.0 -openmpi@4.1.1.GPU%gcc@9.4.0 -hdf5^openmpi@4.1.1.GPU%gcc@9.4.0 -fftw^openmpi@4.1.1.GPU%gcc@9.4.0 -openssl -gmp%gcc@9.4.0 -mpfr%gcc@9.4.0 -c-lime diff --git a/Grid/systems/tursa/spack-bootstrap.sh b/Grid/systems/tursa/spack-bootstrap.sh old mode 100644 new mode 100755 index 33e515c..582cb93 --- a/Grid/systems/tursa/spack-bootstrap.sh +++ b/Grid/systems/tursa/spack-bootstrap.sh @@ -2,16 +2,19 @@ # shellcheck disable=SC2016 set -euo pipefail -GCC='gcc@9.4.0' -CUDA='cuda@11.4.0' -HDF5='hdf5@1.10.7' +gcc_spec='gcc@9.4.0' +cuda_spec='cuda@11.4.0' +hdf5_spec='hdf5@1.10.7' if (( $# != 1 )); then echo "usage: $(basename "$0") " 1>&2 exit 1 fi -ENVDIR=$1 -CWD=$(pwd -P) +dir=$1 +cwd=$(pwd -P) +cd "${dir}" +dir=$(pwd -P) +cd "${cwd}" # General configuration ######################################################## # build with 128 tasks @@ -33,107 +36,92 @@ echo 'packages: spack config --scope site add -f external.yaml rm external.yaml +# Base compilers ############################################################### # configure system base spack compiler find --scope site -# Base packages ################################################################ -# install GCC -spack install ${GCC} -spack load ${GCC} +# install GCC, CUDA & LLVM +spack install ${gcc_spec} ${cuda_spec} llvm + +spack load llvm spack compiler find --scope site -spack unload ${GCC} +spack unload llvm -# clean -spack clean -spack gc -y - -# install CUDA -spack install ${CUDA} - -# install development tools -dev_tools=("autoconf" "automake" "libtool" "git") -spack install "${dev_tools[@]}" - -# create view for CLI & dev tools -spack view symlink -i "${ENVDIR}/prefix/base" "${dev_tools[@]}" - -# install clang -spack install llvm@12.0.1 - -# locate new compilers -spack load llvm@12.0.1 +spack load ${gcc_spec} spack compiler find --scope site -spack unload llvm@12.0.1 +spack unload ${gcc_spec} # Manual compilation of OpenMPI & UCX ########################################## # set build directories -mkdir -p "${ENVDIR}"/build -cd "${ENVDIR}"/build +mkdir -p "${dir}"/build +cd "${dir}"/build -spack load ${GCC} ${CUDA} +spack load ${gcc_spec} ${cuda_spec} -CUDA_PATH=$(which nvcc | sed "s/bin/@/g" | cut -d "@" -f1) -GDRCOPY_PATH=/mnt/lustre/tursafs1/apps/gdrcopy/2.3.1 +cuda_path=$(spack find --format "{prefix}" cuda) +gdrcopy_path=/mnt/lustre/tursafs1/apps/gdrcopy/2.3.1 # Install ucx 1.12.0 -UCX_URL=https://github.com/openucx/ucx/releases/download/v1.12.0/ucx-1.12.0.tar.gz +ucx_url=https://github.com/openucx/ucx/releases/download/v1.12.0/ucx-1.12.0.tar.gz echo "-- building UCX from source" -wget ${UCX_URL} -UCX_AR=$(basename ${UCX_URL}) -tar -xvf "${UCX_AR}" -cd "${UCX_AR%.tar.gz}" +wget ${ucx_url} +ucx_ar=$(basename ${ucx_url}) +tar -xvf "${ucx_ar}" +cd "${ucx_ar%.tar.gz}" # ucx gpu build -mkdir build_gpu; cd build_gpu +mkdir -p build_gpu; cd build_gpu ../configure --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu \ - --disable-dependency-tracking --prefix="${ENVDIR}"/prefix/ucx_gpu \ + --disable-dependency-tracking --prefix="${dir}"/prefix/ucx_gpu \ --enable-devel-headers --enable-examples --enable-optimizations \ - --with-gdrcopy=${GDRCOPY_PATH} --with-verbs --disable-logging \ + --with-gdrcopy=${gdrcopy_path} --with-verbs --disable-logging \ --disable-debug --disable-assertions --enable-cma \ --with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \ --without-rocm --without-ugni --without-java \ - --enable-compiler-opt=3 --with-cuda="${CUDA_PATH}" --without-cm \ + --enable-compiler-opt=3 --with-cuda="${cuda_path}" --without-cm \ --with-rc --with-ud --with-dc --with-mlx5-dv --with-dm \ - --enable-mt LDFLAGS=-L${GDRCOPY_PATH}/lib + --enable-mt --without-go LDFLAGS=-L${gdrcopy_path}/lib make -j 128 make install cd .. # ucx cpu build -mkdir build_cpu; cd build_cpu +mkdir -p build_cpu; cd build_cpu ../configure --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu \ - --disable-dependency-tracking --prefix="${ENVDIR}"/prefix/ucx_cpu \ + --disable-dependency-tracking --prefix="${dir}"/prefix/ucx_cpu \ --enable-devel-headers --enable-examples --enable-optimizations \ --with-verbs --disable-logging --disable-debug \ --disable-assertions --enable-mt --enable-cma \ --with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \ --without-rocm --without-ugni --without-java \ --enable-compiler-opt=3 --without-cm --without-ugni --with-rc \ - --with-ud --with-dc --with-mlx5-dv --with-dm --enable-mt + --with-ud --with-dc --with-mlx5-dv --with-dm --enable-mt --without-go make -j 128 make install -cd "${ENVDIR}"/build +cd "${dir}"/build -# Install openmpi 4.1.1 (needs to be done on a gpu node) -OMPI_URL=https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.1.tar.gz +# Install openmpi 4.1.1 +ompi_url=https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.1.tar.gz echo "-- building OpenMPI from source" -wget ${OMPI_URL} -OMPI_AR=$(basename ${OMPI_URL}) -tar -xvf "${OMPI_AR}" -cd "${OMPI_AR%.tar.gz}" +wget ${ompi_url} +ompi_ar=$(basename ${ompi_url}) +tar -xvf "${ompi_ar}" +cd "${ompi_ar%.tar.gz}" +export AUTOMAKE_JOBS=128 +./autogen.pl -f # openmpi gpu build mkdir build_gpu; cd build_gpu -../configure --prefix="${ENVDIR}"/prefix/ompi_gpu --without-xpmem \ - --with-ucx="${ENVDIR}"/prefix/ucx_gpu \ - --with-ucx-libdir="${ENVDIR}"/prefix/ucx_gpu/lib \ +../configure --prefix="${dir}"/prefix/ompi_gpu --without-xpmem \ + --with-ucx="${dir}"/prefix/ucx_gpu \ + --with-ucx-libdir="${dir}"/prefix/ucx_gpu/lib \ --with-knem=/opt/knem-1.1.4.90mlnx1/ \ --enable-mca-no-build=btl-uct \ - --with-cuda="${CUDA_PATH}" --disable-getpwuid \ + --with-cuda="${cuda_path}" --disable-getpwuid \ --with-verbs --with-slurm --enable-mpi-fortran=all \ --with-pmix=internal --with-libevent=internal make -j 128 @@ -142,61 +130,76 @@ cd .. # openmpi cpu build mkdir build_cpu; cd build_cpu -../configure --prefix="${ENVDIR}"/prefix/ompi_cpu --without-xpmem \ - --with-ucx="${ENVDIR}"/prefix/ucx_cpu \ - --with-ucx-libdir="${ENVDIR}"/prefix/ucx_cpu/lib \ +../configure --prefix="${dir}"/prefix/ompi_cpu --without-xpmem \ + --with-ucx="${dir}"/prefix/ucx_cpu \ + --with-ucx-libdir="${dir}"/prefix/ucx_cpu/lib \ --with-knem=/opt/knem-1.1.4.90mlnx1/ \ --enable-mca-no-build=btl-uct --disable-getpwuid \ --with-verbs --with-slurm --enable-mpi-fortran=all \ --with-pmix=internal --with-libevent=internal make -j 128 make install -cd "${ENVDIR}" +cd "${dir}" # Add externals to spack echo "packages: ucx: externals: - spec: \"ucx@1.12.0.GPU%gcc@9.4.0\" - prefix: ${ENVDIR}/prefix/ucx_gpu + prefix: ${dir}/prefix/ucx_gpu - spec: \"ucx@1.12.0.CPU%gcc@9.4.0\" - prefix: ${ENVDIR}/prefix/ucx_cpu + prefix: ${dir}/prefix/ucx_cpu buildable: False openmpi: externals: - spec: \"openmpi@4.1.1.GPU%gcc@9.4.0\" - prefix: ${ENVDIR}/prefix/ompi_gpu + prefix: ${dir}/prefix/ompi_gpu - spec: \"openmpi@4.1.1.CPU%gcc@9.4.0\" - prefix: ${ENVDIR}/prefix/ompi_cpu + prefix: ${dir}/prefix/ompi_cpu buildable: False" > spack.yaml spack config --scope site add -f spack.yaml rm spack.yaml -spack install ucx@1.12.0.GPU%gcc@9.4.0 -spack install ucx@1.12.0.CPU%gcc@9.4.0 -spack install openmpi@4.1.1.GPU%gcc@9.4.0 -spack install openmpi@4.1.1.CPU%gcc@9.4.0 +spack install ucx@1.12.0.GPU%gcc@9.4.0 openmpi@4.1.1.GPU%gcc@9.4.0 +spack install ucx@1.12.0.CPU%gcc@9.4.0 openmpi@4.1.1.CPU%gcc@9.4.0 -# Install Grid dependencies #################################################### -cd "${CWD}" +cd "${cwd}" -OPENMPIGPUHASH=$(spack find --format "{hash}" openmpi@4.1.1.GPU) -OPENMPICPUHASH=$(spack find --format "{hash}" openmpi@4.1.1.CPU) +# environments ################################################################# +dev_tools=("autoconf" "automake" "libtool" "jq") +ompi_gpu_hash=$(spack find --format "{hash}" openmpi@4.1.1.GPU) +ompi_cpu_hash=$(spack find --format "{hash}" openmpi@4.1.1.CPU) -spack install ${HDF5}+cxx+threadsafe ^/"${OPENMPIGPUHASH}" -spack install ${HDF5}+cxx+threadsafe ^/"${OPENMPICPUHASH}" -spack install fftw ^/"${OPENMPIGPUHASH}" -spack install fftw ^/"${OPENMPICPUHASH}" -spack install openssl gmp mpfr c-lime +spack env create grid-gpu +spack env activate grid-gpu +spack add ${gcc_spec} ${cuda_spec} "${dev_tools[@]}" +spack add ucx@1.12.0.GPU%gcc@9.4.0 openmpi@4.1.1.GPU%gcc@9.4.0 +spack add ${hdf5_spec}+cxx+threadsafe ^/"${ompi_gpu_hash}" +spack add fftw ^/"${ompi_gpu_hash}" +spack add openssl gmp mpfr c-lime +spack install +spack env deactivate + +spack env create grid-cpu +spack env activate grid-cpu +spack add llvm "${dev_tools[@]}" +spack add ucx@1.12.0.CPU%gcc@9.4.0 openmpi@4.1.1.CPU%gcc@9.4.0 +spack add ${hdf5_spec}+cxx+threadsafe ^/"${ompi_cpu_hash}" +spack add fftw ^/"${ompi_cpu_hash}" +spack add openssl gmp mpfr c-lime +spack install +spack env deactivate # Final setup ################################################################## spack clean +spack gc -y # add more environment variables in module loading -spack config --scope site add 'modules:prefix_inspections:lib:[LIBRARY_PATH]' +spack config --scope site add 'modules:prefix_inspections:lib:[LD_LIBRARY_PATH,LIBRARY_PATH]' +spack config --scope site add 'modules:prefix_inspections:lib64:[LD_LIBRARY_PATH,LIBRARY_PATH]' spack config --scope site add 'modules:prefix_inspections:include:[C_INCLUDE_PATH,CPLUS_INCLUDE_PATH,INCLUDE]' spack module tcl refresh -y # permission change for group access -chmod -R g+rw "${ENVDIR}/spack/var/spack/cache" -setfacl -d -R -m g::rwX "${ENVDIR}/spack/var/spack/cache" +chmod -R g+rw "${dir}/spack/var/spack/cache" +setfacl -d -R -m g::rwX "${dir}/spack/var/spack/cache"