6 Commits

3 changed files with 9 additions and 14 deletions

View File

@@ -273,7 +273,7 @@ class Benchmark
double rate = bidibytes / (timestat.mean / 1.e6) / 1024. / 1024. / 1024.;
double rate_err = rate * timestat.err / timestat.mean;
double rate_max = rate * timestat.mean / timestat.min;
grid_printf("%5d %5d %7s %15d %15.2f %15.2f %15.1f %15.2f\n", lat, dir,
grid_printf("%5d %5d %7s %15llu %15.2f %15.2f %15.1f %15.2f\n", lat, dir,
is_shm ? "yes"
: is_partial_shm ? "partial"
: "no",
@@ -438,7 +438,7 @@ class Benchmark
double rate_max = rate * timestat.mean / timestat.min;
double rate_min = rate * timestat.mean / timestat.max;
grid_printf("%2d %2d %15.4f %15.3f %15.4f %15d %15.2f\n", from, to, timestat.mean,
grid_printf("%2d %2d %15.4f %15.3f %15.4f %15zu %15.2f\n", from, to, timestat.mean,
timestat.err, timestat.min, bytes, rate);
nlohmann::json tmp;
@@ -1032,11 +1032,7 @@ int main(int argc, char **argv)
CartesianCommunicator::SetCommunicatorPolicy(
CartesianCommunicator::CommunicatorPolicySequential);
#ifdef KNL
LebesgueOrder::Block = std::vector<int>({8, 2, 2, 2});
#else
LebesgueOrder::Block = std::vector<int>({2, 2, 2, 2});
#endif
Benchmark::Decomposition();
int sel = 4;

View File

@@ -1,13 +1,12 @@
#!/usr/bin/env bash
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
numa1=$(( 2 * lrank))
numa2=$(( 2 * lrank + 1 ))
numa1=$((lrank))
netdev=mlx5_${lrank}:1
export CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK
export UCX_NET_DEVICES=${netdev}
BINDING="--interleave=$numa1,$numa2"
BINDING="--interleave=$numa1"
echo "$(hostname) - $lrank device=$CUDA_VISIBLE_DEVICES binding=$BINDING"

View File

@@ -80,7 +80,7 @@ mkdir -p build_gpu; cd build_gpu
--enable-devel-headers --enable-examples --enable-optimizations \
--with-gdrcopy=${gdrcopy_path} --with-verbs --disable-logging \
--disable-debug --disable-assertions --enable-cma \
--with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \
--with-knem=/opt/knem-1.1.4.90mlnx2/ --with-rdmacm \
--without-rocm --without-ugni --without-java \
--enable-compiler-opt=3 --with-cuda="${cuda_path}" --without-cm \
--with-rc --with-ud --with-dc --with-mlx5-dv --with-dm \
@@ -96,7 +96,7 @@ mkdir -p build_cpu; cd build_cpu
--enable-devel-headers --enable-examples --enable-optimizations \
--with-verbs --disable-logging --disable-debug \
--disable-assertions --enable-mt --enable-cma \
--with-knem=/opt/knem-1.1.4.90mlnx1/ --with-rdmacm \
--with-knem=/opt/knem-1.1.4.90mlnx2/--with-rdmacm \
--without-rocm --without-ugni --without-java \
--enable-compiler-opt=3 --without-cm --without-ugni --with-rc \
--with-ud --with-dc --with-mlx5-dv --with-dm --enable-mt --without-go
@@ -122,7 +122,7 @@ mkdir build_gpu; cd build_gpu
../configure --prefix="${dir}"/prefix/ompi_gpu --without-xpmem \
--with-ucx="${dir}"/prefix/ucx_gpu \
--with-ucx-libdir="${dir}"/prefix/ucx_gpu/lib \
--with-knem=/opt/knem-1.1.4.90mlnx1/ \
--with-knem=/opt/knem-1.1.4.90mlnx2/ \
--enable-mca-no-build=btl-uct \
--with-cuda="${cuda_path}" --disable-getpwuid \
--with-verbs --with-slurm --enable-mpi-fortran=all \
@@ -136,7 +136,7 @@ mkdir build_cpu; cd build_cpu
../configure --prefix="${dir}"/prefix/ompi_cpu --without-xpmem \
--with-ucx="${dir}"/prefix/ucx_cpu \
--with-ucx-libdir="${dir}"/prefix/ucx_cpu/lib \
--with-knem=/opt/knem-1.1.4.90mlnx1/ \
--with-knem=/opt/knem-1.1.4.90mlnx2/ \
--enable-mca-no-build=btl-uct --disable-getpwuid \
--with-verbs --with-slurm --enable-mpi-fortran=all \
--with-pmix=internal --with-libevent=internal