tursa-energy-efficiency/make-result-table.sh
2022-09-07 17:31:28 +01:00

70 lines
3.0 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
if (( $# != 4 )); then
echo "usage: $(basename "$0") <job db> <job table> <rack power db> <rack power table>" 1>&2
exit 1
fi
job_db=$1
job_table=$2
power_db=$3
power_table=$4
echo '# Columns'
echo '# -------'
echo '# 1 clock limit (MHz)'
echo '# 2 job duration (s)'
echo '# 3 rack energy (kWh)'
echo '# 4 rack median power (kW)'
echo '# 5 GPU energy (kWh)'
echo '# 6 GPU average power (kWh)'
echo '# 7 Dhop average performance (TFlop/s/node)'
echo '# 8 DhopEO average performance (TFlop/s/node)'
for c in $(seq 210 15 1410); do
start=$(sqlite3 "${job_db}" "SELECT MAX(start) FROM ${job_table} WHERE clock_limit = ${c}")
end=$(sqlite3 "${job_db}" "SELECT MIN(end) FROM ${job_table} WHERE clock_limit = ${c}")
duration=$((end - start))
sample_count=$(sqlite3 "${power_db}" "SELECT COUNT(*) FROM ${power_table} WHERE timestamp >= ${start} and timestamp <= ${end};")
if (( sample_count == 0 )); then
echo "error: no rack power sample" 1>&2
exit 1
fi
energy=$(./get-rack12-energy.sh "${power_db}" "${power_table}" "${start}" "${end}")
power_med=$(sqlite3 "${power_db}" "SELECT (rack_1 + rack_2)/1000. FROM ${power_table} WHERE timestamp >= ${start} and timestamp <= ${end};" | datamash median 1)
gpu_energy='0.'
gpu_power='0.'
for nodes in 8 16; do
smi_dbs=$(sqlite3 "${job_db}" "SELECT smi_db FROM ${job_table} WHERE clock_limit = ${c} AND nodes=${nodes}")
for smi_db in ${smi_dbs}; do
job_energy=$(sqlite3 "${smi_db}" "SELECT ${nodes}*SUM(power) FROM clock_limit_${c};")
job_power=$(sqlite3 "${smi_db}" "SELECT ${nodes}*4*AVG(power)/1000. FROM clock_limit_${c};")
gpu_energy=$(echo "${gpu_energy} + ${job_energy}" | bc -l)
gpu_power=$(echo "${gpu_power} + ${job_power}" | bc -l)
done
done
job_dirs=$(sqlite3 "${job_db}" "SELECT job_dir FROM ${job_table} WHERE clock_limit = ${c}")
njobs=$(sqlite3 "${job_db}" "SELECT COUNT(job_dir) FROM ${job_table} WHERE clock_limit = ${c}")
if (( njobs != 4 )); then
echo "error: number of jobs should be 4" 1>&2
exit 1
fi
dhop_perf='0.'
dhopeo_perf='0.'
for job_dir in ${job_dirs}; do
log="${job_dir}/log"
if (( $(grep -c 'Average mflops/s per call per node (full)' "${log}") != 2 )); then
echo "error: log ${log} is ill-formed" 1>&2
exit 1
fi
tmp_dhop_perf=$(grep 'Average mflops/s per call per node (full)' "${log}" | head -n1 | awk '{printf("%.6f", $NF/1.e+6)}')
tmp_dhopeo_perf=$(grep 'Average mflops/s per call per node (full)' "${log}" | tail -n1 | awk '{printf("%.6f", $NF/1.e+6)}')
dhop_perf=$(echo "${dhop_perf} + ${tmp_dhop_perf}" | bc -l)
dhopeo_perf=$(echo "${dhopeo_perf} + ${tmp_dhopeo_perf}" | bc -l)
done
dhop_perf=$(echo "${dhop_perf}/4." | bc -l)
dhopeo_perf=$(echo "${dhopeo_perf}/4." | bc -l)
gpu_energy=$(echo "${gpu_energy}/3600000." | bc -l)
printf '%5d %5d %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f\n' "${c}" "${duration}" "${energy}" "${power_med}" "${gpu_energy}" "${gpu_power}" "${dhop_perf}" "${dhopeo_perf}"
done