#!/usr/bin/env bash set -euo pipefail if (( $# != 4 )); then echo "usage: $(basename "$0") " 1>&2 exit 1 fi job_db=$1 job_table=$2 power_db=$3 power_table=$4 echo '# Columns' echo '# -------' echo '# 1 clock limit (MHz)' echo '# 2 job duration (s)' echo '# 3 rack energy (kWh)' echo '# 4 rack median power (kW)' echo '# 5 GPU energy (kWh)' echo '# 6 GPU average power (kWh)' echo '# 7 Dhop average performance (TFlop/s/node)' echo '# 8 DhopEO average performance (TFlop/s/node)' for c in $(seq 210 15 1410); do start=$(sqlite3 "${job_db}" "SELECT MAX(start) FROM ${job_table} WHERE clock_limit = ${c}") end=$(sqlite3 "${job_db}" "SELECT MIN(end) FROM ${job_table} WHERE clock_limit = ${c}") duration=$((end - start)) sample_count=$(sqlite3 "${power_db}" "SELECT COUNT(*) FROM ${power_table} WHERE timestamp >= ${start} and timestamp <= ${end};") if (( sample_count == 0 )); then echo "error: no rack power sample" 1>&2 exit 1 fi energy=$(./get-rack12-energy.sh "${power_db}" "${power_table}" "${start}" "${end}") power_med=$(sqlite3 "${power_db}" "SELECT (rack_1 + rack_2)/1000. FROM ${power_table} WHERE timestamp >= ${start} and timestamp <= ${end};" | datamash median 1) gpu_energy='0.' gpu_power='0.' for nodes in 8 16; do smi_dbs=$(sqlite3 "${job_db}" "SELECT smi_db FROM ${job_table} WHERE clock_limit = ${c} AND nodes=${nodes}") for smi_db in ${smi_dbs}; do job_energy=$(sqlite3 "${smi_db}" "SELECT ${nodes}*SUM(power) FROM clock_limit_${c};") job_power=$(sqlite3 "${smi_db}" "SELECT ${nodes}*4*AVG(power)/1000. FROM clock_limit_${c};") gpu_energy=$(echo "${gpu_energy} + ${job_energy}" | bc -l) gpu_power=$(echo "${gpu_power} + ${job_power}" | bc -l) done done job_dirs=$(sqlite3 "${job_db}" "SELECT job_dir FROM ${job_table} WHERE clock_limit = ${c}") njobs=$(sqlite3 "${job_db}" "SELECT COUNT(job_dir) FROM ${job_table} WHERE clock_limit = ${c}") if (( njobs != 4 )); then echo "error: number of jobs should be 4" 1>&2 exit 1 fi dhop_perf='0.' dhopeo_perf='0.' for job_dir in ${job_dirs}; do log="${job_dir}/log" if (( $(grep -c 'Average mflops/s per call per node (full)' "${log}") != 2 )); then echo "error: log ${log} is ill-formed" 1>&2 exit 1 fi tmp_dhop_perf=$(grep 'Average mflops/s per call per node (full)' "${log}" | head -n1 | awk '{printf("%.6f", $NF/1.e+6)}') tmp_dhopeo_perf=$(grep 'Average mflops/s per call per node (full)' "${log}" | tail -n1 | awk '{printf("%.6f", $NF/1.e+6)}') dhop_perf=$(echo "${dhop_perf} + ${tmp_dhop_perf}" | bc -l) dhopeo_perf=$(echo "${dhopeo_perf} + ${tmp_dhopeo_perf}" | bc -l) done dhop_perf=$(echo "${dhop_perf}/4." | bc -l) dhopeo_perf=$(echo "${dhopeo_perf}/4." | bc -l) gpu_energy=$(echo "${gpu_energy}/3600000." | bc -l) printf '%5d %5d %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f\n' "${c}" "${duration}" "${energy}" "${power_med}" "${gpu_energy}" "${gpu_power}" "${dhop_perf}" "${dhopeo_perf}" done