Tursa specific scripts, untested
This commit is contained in:
parent
2a64c41a8c
commit
21c92f9fb7
13
Grid/systems/tursa/cpu-mpi-wrapper.sh
Normal file
13
Grid/systems/tursa/cpu-mpi-wrapper.sh
Normal file
@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
|
||||
numa=${lrank}
|
||||
cpus="$(( lrank*16 ))-$(( (lrank+1)*16-1 ))"
|
||||
places="$(( lrank*16 )):$(( (lrank+1)*16 ))"
|
||||
|
||||
BINDING="taskset -c ${cpus} numactl -m ${numa}"
|
||||
export OMP_PLACES=${places}
|
||||
|
||||
echo "$(hostname) - ${lrank} binding='${BINDING}'"
|
||||
|
||||
${BINDING} "$@"
|
5
Grid/systems/tursa/env-cpu.sh
Normal file
5
Grid/systems/tursa/env-cpu.sh
Normal file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# shellcheck disable=SC2046
|
||||
|
||||
script_dir="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")"
|
||||
spack load $(cat "${script_dir}"/grid-cpu.spack)
|
5
Grid/systems/tursa/env-gpu.sh
Normal file
5
Grid/systems/tursa/env-gpu.sh
Normal file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# shellcheck disable=SC2046
|
||||
|
||||
script_dir="$(dirname "$(readlink -f "${BASH_SOURCE:-$0}")")"
|
||||
spack load $(cat "${script_dir}"/grid-gpu.spack)
|
14
Grid/systems/tursa/gpu-mpi-wrapper.sh
Normal file
14
Grid/systems/tursa/gpu-mpi-wrapper.sh
Normal file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
|
||||
numa1=$(( 2 * lrank))
|
||||
numa2=$(( 2 * lrank + 1 ))
|
||||
netdev=mlx5_${lrank}:1
|
||||
|
||||
export CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK
|
||||
export UCX_NET_DEVICES=${netdev}
|
||||
BINDING="--interleave=$numa1,$numa2"
|
||||
|
||||
echo "$(hostname) - $lrank device=$CUDA_VISIBLE_DEVICES binding=$BINDING"
|
||||
|
||||
numactl ${BINDING} "$@"
|
10
Grid/systems/tursa/grid-cpu.spack
Normal file
10
Grid/systems/tursa/grid-cpu.spack
Normal file
@ -0,0 +1,10 @@
|
||||
gcc@9.4.0
|
||||
llvm@12.0.1
|
||||
ucx@1.12.0.CPU%gcc@9.4.0
|
||||
openmpi@4.1.1.CPU%gcc@9.4.0
|
||||
hdf5^openmpi@4.1.1.CPU%gcc@9.4.0
|
||||
fftw^openmpi@4.1.1.CPU%gcc@9.4.0
|
||||
openssl
|
||||
gmp%gcc@9.4.0
|
||||
mpfr%gcc@9.4.0
|
||||
c-lime
|
10
Grid/systems/tursa/grid-gpu.spack
Normal file
10
Grid/systems/tursa/grid-gpu.spack
Normal file
@ -0,0 +1,10 @@
|
||||
gcc@9.4.0
|
||||
cuda@11.4.0
|
||||
ucx@1.12.0.GPU%gcc@9.4.0
|
||||
openmpi@4.1.1.GPU%gcc@9.4.0
|
||||
hdf5^openmpi@4.1.1.GPU%gcc@9.4.0
|
||||
fftw^openmpi@4.1.1.GPU%gcc@9.4.0
|
||||
openssl
|
||||
gmp%gcc@9.4.0
|
||||
mpfr%gcc@9.4.0
|
||||
c-lime
|
202
Grid/systems/tursa/spack-bootstrap.sh
Normal file
202
Grid/systems/tursa/spack-bootstrap.sh
Normal file
@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env bash
|
||||
# shellcheck disable=SC2016
|
||||
set -euo pipefail
|
||||
|
||||
GCC='gcc@9.4.0'
|
||||
CUDA='cuda@11.4.0'
|
||||
HDF5='hdf5@1.10.7'
|
||||
|
||||
if (( $# != 1 )); then
|
||||
echo "usage: $(basename "$0") <env dir>" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
ENVDIR=$1
|
||||
CWD=$(pwd -P)
|
||||
|
||||
# General configuration ########################################################
|
||||
# build with 128 tasks
|
||||
echo 'config:
|
||||
build_jobs: 128
|
||||
build_stage:
|
||||
- $spack/var/spack/stage
|
||||
test_stage: $spack/var/spack/test
|
||||
misc_cache: $spack/var/spack/cache' > jobs.yaml
|
||||
spack config --scope site add -f jobs.yaml
|
||||
rm jobs.yaml
|
||||
|
||||
# add lustre as external package
|
||||
echo 'packages:
|
||||
lustre:
|
||||
externals:
|
||||
- spec: "lustre@2.12.6_ddn36"
|
||||
prefix: /usr' > external.yaml
|
||||
spack config --scope site add -f external.yaml
|
||||
rm external.yaml
|
||||
|
||||
# configure system base
|
||||
spack compiler find --scope site
|
||||
|
||||
# Base packages ################################################################
|
||||
# install GCC
|
||||
spack install ${GCC}
|
||||
spack load ${GCC}
|
||||
spack compiler find --scope site
|
||||
spack unload ${GCC}
|
||||
|
||||
# clean
|
||||
spack clean
|
||||
spack gc -y
|
||||
|
||||
# install CUDA
|
||||
spack install ${CUDA}
|
||||
|
||||
# install development tools
|
||||
dev_tools=("autoconf" "automake" "libtool" "git")
|
||||
spack install "${dev_tools[@]}"
|
||||
|
||||
# create view for CLI & dev tools
|
||||
spack view symlink -i "${ENVDIR}/prefix/base" "${dev_tools[@]}"
|
||||
|
||||
# install clang
|
||||
spack install llvm@12.0.1
|
||||
|
||||
# locate new compilers
|
||||
spack load llvm@12.0.1
|
||||
spack compiler find --scope site
|
||||
spack unload llvm@12.0.1
|
||||
|
||||
# Manual compilation of OpenMPI & UCX ##########################################
|
||||
# set build directories
|
||||
mkdir -p "${ENVDIR}"/build
|
||||
cd "${ENVDIR}"/build
|
||||
|
||||
spack load ${GCC} ${CUDA}
|
||||
|
||||
CUDA_PATH=$(which nvcc | sed "s/bin/@/g" | cut -d "@" -f1)
|
||||
GDRCOPY_PATH=/mnt/lustre/tursafs1/apps/gdrcopy/2.3.1
|
||||
|
||||
# Install ucx 1.12.0
|
||||
UCX_URL=https://github.com/openucx/ucx/releases/download/v1.12.0/ucx-1.12.0.tar.gz
|
||||
|
||||
echo "-- building UCX from source"
|
||||
wget ${UCX_URL}
|
||||
UCX_AR=$(basename ${UCX_URL})
|
||||
tar -xvf "${UCX_AR}"
|
||||
cd "${UCX_AR%.tar.gz}"
|
||||
|
||||
# ucx gpu build
|
||||
mkdir build_gpu; cd build_gpu
|
||||
../configure --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu \
|
||||
--disable-dependency-tracking --prefix="${ENVDIR}"/prefix/ucx_gpu \
|
||||
--enable-devel-headers --enable-examples --enable-optimizations \
|
||||
--with-gdrcopy=${GDRCOPY_PATH} --with-verbs --disable-logging \
|
||||
--disable-debug --disable-assertions --enable-cma \
|
||||
--with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \
|
||||
--without-rocm --without-ugni --without-java \
|
||||
--enable-compiler-opt=3 --with-cuda="${CUDA_PATH}" --without-cm \
|
||||
--with-rc --with-ud --with-dc --with-mlx5-dv --with-dm \
|
||||
--enable-mt LDFLAGS=-L${GDRCOPY_PATH}/lib
|
||||
make -j 128
|
||||
make install
|
||||
cd ..
|
||||
|
||||
# ucx cpu build
|
||||
mkdir build_cpu; cd build_cpu
|
||||
../configure --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu \
|
||||
--disable-dependency-tracking --prefix="${ENVDIR}"/prefix/ucx_cpu \
|
||||
--enable-devel-headers --enable-examples --enable-optimizations \
|
||||
--with-verbs --disable-logging --disable-debug \
|
||||
--disable-assertions --enable-mt --enable-cma \
|
||||
--with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \
|
||||
--without-rocm --without-ugni --without-java \
|
||||
--enable-compiler-opt=3 --without-cm --without-ugni --with-rc \
|
||||
--with-ud --with-dc --with-mlx5-dv --with-dm --enable-mt
|
||||
make -j 128
|
||||
make install
|
||||
|
||||
cd "${ENVDIR}"/build
|
||||
|
||||
# Install openmpi 4.1.1 (needs to be done on a gpu node)
|
||||
OMPI_URL=https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.1.tar.gz
|
||||
|
||||
echo "-- building OpenMPI from source"
|
||||
|
||||
wget ${OMPI_URL}
|
||||
OMPI_AR=$(basename ${OMPI_URL})
|
||||
tar -xvf "${OMPI_AR}"
|
||||
cd "${OMPI_AR%.tar.gz}"
|
||||
|
||||
# openmpi gpu build
|
||||
mkdir build_gpu; cd build_gpu
|
||||
../configure --prefix="${ENVDIR}"/prefix/ompi_gpu --without-xpmem \
|
||||
--with-ucx="${ENVDIR}"/prefix/ucx_gpu \
|
||||
--with-ucx-libdir="${ENVDIR}"/prefix/ucx_gpu/lib \
|
||||
--with-knem=/opt/knem-1.1.4.90mlnx1/ \
|
||||
--enable-mca-no-build=btl-uct \
|
||||
--with-cuda="${CUDA_PATH}" --disable-getpwuid \
|
||||
--with-verbs --with-slurm --enable-mpi-fortran=all \
|
||||
--with-pmix=internal --with-libevent=internal
|
||||
make -j 128
|
||||
make install
|
||||
cd ..
|
||||
|
||||
# openmpi cpu build
|
||||
mkdir build_cpu; cd build_cpu
|
||||
../configure --prefix="${ENVDIR}"/prefix/ompi_cpu --without-xpmem \
|
||||
--with-ucx="${ENVDIR}"/prefix/ucx_cpu \
|
||||
--with-ucx-libdir="${ENVDIR}"/prefix/ucx_cpu/lib \
|
||||
--with-knem=/opt/knem-1.1.4.90mlnx1/ \
|
||||
--enable-mca-no-build=btl-uct --disable-getpwuid \
|
||||
--with-verbs --with-slurm --enable-mpi-fortran=all \
|
||||
--with-pmix=internal --with-libevent=internal
|
||||
make -j 128
|
||||
make install
|
||||
cd "${ENVDIR}"
|
||||
|
||||
# Add externals to spack
|
||||
echo "packages:
|
||||
ucx:
|
||||
externals:
|
||||
- spec: \"ucx@1.12.0.GPU%gcc@9.4.0\"
|
||||
prefix: ${ENVDIR}/prefix/ucx_gpu
|
||||
- spec: \"ucx@1.12.0.CPU%gcc@9.4.0\"
|
||||
prefix: ${ENVDIR}/prefix/ucx_cpu
|
||||
buildable: False
|
||||
openmpi:
|
||||
externals:
|
||||
- spec: \"openmpi@4.1.1.GPU%gcc@9.4.0\"
|
||||
prefix: ${ENVDIR}/prefix/ompi_gpu
|
||||
- spec: \"openmpi@4.1.1.CPU%gcc@9.4.0\"
|
||||
prefix: ${ENVDIR}/prefix/ompi_cpu
|
||||
buildable: False" > spack.yaml
|
||||
|
||||
spack config --scope site add -f spack.yaml
|
||||
rm spack.yaml
|
||||
spack install ucx@1.12.0.GPU%gcc@9.4.0
|
||||
spack install ucx@1.12.0.CPU%gcc@9.4.0
|
||||
spack install openmpi@4.1.1.GPU%gcc@9.4.0
|
||||
spack install openmpi@4.1.1.CPU%gcc@9.4.0
|
||||
|
||||
# Install Grid dependencies ####################################################
|
||||
cd "${CWD}"
|
||||
|
||||
OPENMPIGPUHASH=$(spack find --format "{hash}" openmpi@4.1.1.GPU)
|
||||
OPENMPICPUHASH=$(spack find --format "{hash}" openmpi@4.1.1.CPU)
|
||||
|
||||
spack install ${HDF5}+cxx+threadsafe ^/"${OPENMPIGPUHASH}"
|
||||
spack install ${HDF5}+cxx+threadsafe ^/"${OPENMPICPUHASH}"
|
||||
spack install fftw ^/"${OPENMPIGPUHASH}"
|
||||
spack install fftw ^/"${OPENMPICPUHASH}"
|
||||
spack install openssl gmp mpfr c-lime
|
||||
|
||||
# Final setup ##################################################################
|
||||
spack clean
|
||||
|
||||
# add more environment variables in module loading
|
||||
spack config --scope site add 'modules:prefix_inspections:lib:[LIBRARY_PATH]'
|
||||
spack config --scope site add 'modules:prefix_inspections:include:[C_INCLUDE_PATH,CPLUS_INCLUDE_PATH,INCLUDE]'
|
||||
spack module tcl refresh -y
|
||||
|
||||
# permission change for group access
|
||||
chmod -R g+rw "${ENVDIR}/spack/var/spack/cache"
|
||||
setfacl -d -R -m g::rwX "${ENVDIR}/spack/var/spack/cache"
|
Loading…
Reference in New Issue
Block a user