updates scripts

This commit is contained in:
2026-06-07 12:59:09 +02:00
parent 754564167f
commit 4e86091f7d
+62 -26
View File
@@ -75,31 +75,61 @@ data:
#!/usr/bin/env python3 #!/usr/bin/env python3
# irtt JSON (stdin) -> Prometheus sample lines (no HELP/TYPE; header is static). # irtt JSON (stdin) -> Prometheus sample lines (no HELP/TYPE; header is static).
# args: <condition> <target> # args: <condition> <target>
import json, sys, time # Hardened: tolerates missing/null/NaN/Inf fields and clamps ratios to 0..1
cond = sys.argv[1] if len(sys.argv) > 1 else "idle" # so a pathological irtt run can never emit an out-of-range or invalid sample.
import json, sys, time, math
cond = sys.argv[1] if len(sys.argv) > 1 else "idle"
target = sys.argv[2] if len(sys.argv) > 2 else "hetzner" target = sys.argv[2] if len(sys.argv) > 2 else "hetzner"
L = f'target="{target}",condition="{cond}"' L = f'target="{target}",condition="{cond}"'
ts = f'{time.time():.0f}' ts = f'{time.time():.0f}'
try:
s = json.load(sys.stdin)["stats"] def num(x, default=0.0):
except Exception: # finite float or default (handles None / str / missing / NaN / Inf)
try:
v = float(x)
except (TypeError, ValueError):
return default
return v if math.isfinite(v) else default
def pct_ratio(x):
# percent (0..100, possibly garbage) -> ratio clamped to 0..1
return max(0.0, min(1.0, num(x) / 100.0))
def fail():
print(f'wan_irtt_success{{{L}}} 0') print(f'wan_irtt_success{{{L}}} 0')
print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}') print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}')
sys.exit(0) sys.exit(0)
rtt = s["rtt"]
try:
s = json.load(sys.stdin).get("stats")
except Exception:
fail()
if not isinstance(s, dict):
fail()
rtt = s.get("rtt") or {}
for k in ("min", "mean", "median", "max", "stddev"): for k in ("min", "mean", "median", "max", "stddev"):
print(f'wan_irtt_rtt_seconds{{{L},stat="{k}"}} {rtt[k]/1e9}') print(f'wan_irtt_rtt_seconds{{{L},stat="{k}"}} {num(rtt.get(k)) / 1e9}')
print(f'wan_irtt_jitter_seconds{{{L},direction="round_trip"}} {s["ipdv_round_trip"]["mean"]/1e9}')
print(f'wan_irtt_jitter_seconds{{{L},direction="send"}} {s["ipdv_send"]["mean"]/1e9}') def ipdv(key):
print(f'wan_irtt_jitter_seconds{{{L},direction="receive"}} {s["ipdv_receive"]["mean"]/1e9}') d = s.get(key) or {}
print(f'wan_irtt_loss_ratio{{{L},direction="round_trip"}} {s["packet_loss_percent"]/100.0}') return num(d.get("mean")) / 1e9
print(f'wan_irtt_loss_ratio{{{L},direction="upstream"}} {s["upstream_loss_percent"]/100.0}')
print(f'wan_irtt_loss_ratio{{{L},direction="downstream"}} {s["downstream_loss_percent"]/100.0}') print(f'wan_irtt_jitter_seconds{{{L},direction="round_trip"}} {ipdv("ipdv_round_trip")}')
print(f'wan_irtt_late_ratio{{{L}}} {s["late_packets_percent"]/100.0}') print(f'wan_irtt_jitter_seconds{{{L},direction="send"}} {ipdv("ipdv_send")}')
print(f'wan_irtt_duplicate_ratio{{{L}}} {s["duplicate_percent"]/100.0}') print(f'wan_irtt_jitter_seconds{{{L},direction="receive"}} {ipdv("ipdv_receive")}')
print(f'wan_irtt_packets{{{L},kind="sent"}} {s["packets_sent"]}')
print(f'wan_irtt_packets{{{L},kind="received"}} {s["packets_received"]}') print(f'wan_irtt_loss_ratio{{{L},direction="round_trip"}} {pct_ratio(s.get("packet_loss_percent"))}')
print(f'wan_irtt_packets{{{L},kind="server_received"}} {s["server_packets_received"]}') print(f'wan_irtt_loss_ratio{{{L},direction="upstream"}} {pct_ratio(s.get("upstream_loss_percent"))}')
print(f'wan_irtt_loss_ratio{{{L},direction="downstream"}} {pct_ratio(s.get("downstream_loss_percent"))}')
print(f'wan_irtt_late_ratio{{{L}}} {pct_ratio(s.get("late_packets_percent"))}')
print(f'wan_irtt_duplicate_ratio{{{L}}} {pct_ratio(s.get("duplicate_percent"))}')
print(f'wan_irtt_packets{{{L},kind="sent"}} {int(num(s.get("packets_sent")))}')
print(f'wan_irtt_packets{{{L},kind="received"}} {int(num(s.get("packets_received")))}')
print(f'wan_irtt_packets{{{L},kind="server_received"}} {int(num(s.get("server_packets_received")))}')
print(f'wan_irtt_success{{{L}}} 1') print(f'wan_irtt_success{{{L}}} 1')
print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}') print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}')
@@ -129,8 +159,8 @@ data:
IPERF_PORT="${IPERF_PORT:-5201}" IPERF_PORT="${IPERF_PORT:-5201}"
IRTT_INTERVAL="${IRTT_INTERVAL:-20ms}" IRTT_INTERVAL="${IRTT_INTERVAL:-20ms}"
IRTT_DURATION="${IRTT_DURATION:-60}" # seconds (numeric, for timeout math) IRTT_DURATION="${IRTT_DURATION:-60}" # seconds (numeric, for timeout math)
TPUT_EVERY="${TPUT_EVERY:-900}" # seconds between throughput tests TPUT_EVERY="${TPUT_EVERY:-900}" # seconds between throughput tests
TPUT_TIME="${TPUT_TIME:-10}" # iperf3 seconds per direction TPUT_TIME="${TPUT_TIME:-10}" # iperf3 seconds per direction
IRTT_TARGET="${IRTT_TARGET:-hetzner}" IRTT_TARGET="${IRTT_TARGET:-hetzner}"
TPUT_TARGET="${TPUT_TARGET:-hetzner}" TPUT_TARGET="${TPUT_TARGET:-hetzner}"
HMAC_OPT="" HMAC_OPT=""
@@ -140,26 +170,32 @@ data:
: > "$SHARED/.irtt.prom"; : > "$SHARED/.irttload.prom"; : > "$SHARED/.tput.prom" : > "$SHARED/.irtt.prom"; : > "$SHARED/.irttload.prom"; : > "$SHARED/.tput.prom"
cp "$HDR" "$SHARED/metrics" # serve header immediately so first scrapes don't 404 cp "$HDR" "$SHARED/metrics" # serve header immediately so first scrapes don't 404
# Concatenate fragments into the served file via temp + atomic rename.
assemble() { assemble() {
cat "$HDR" "$SHARED/.irtt.prom" "$SHARED/.irttload.prom" "$SHARED/.tput.prom" \ cat "$HDR" "$SHARED/.irtt.prom" "$SHARED/.irttload.prom" "$SHARED/.tput.prom" \
> "$SHARED/.metrics.tmp" 2>/dev/null > "$SHARED/.metrics.tmp" 2>/dev/null
mv "$SHARED/.metrics.tmp" "$SHARED/metrics" mv "$SHARED/.metrics.tmp" "$SHARED/metrics"
} }
run_irtt() { # Each fragment is written to <file>.tmp then renamed, so assemble() never
# cats a partially written file (the cause of the impossible loss spikes).
run_irtt() { # $1 condition $2 outfile $3 duration(seconds)
timeout "$(( $3 + 25 ))" irtt client -i "$IRTT_INTERVAL" -d "${3}s" -q $HMAC_OPT \ timeout "$(( $3 + 25 ))" irtt client -i "$IRTT_INTERVAL" -d "${3}s" -q $HMAC_OPT \
-o - "${HETZNER}:${IRTT_PORT}" 2>/dev/null \ -o - "${HETZNER}:${IRTT_PORT}" 2>/dev/null \
| python3 /scripts/irtt_to_prom.py "$1" "$IRTT_TARGET" > "$2.tmp" | python3 /scripts/irtt_to_prom.py "$1" "$IRTT_TARGET" > "$2.tmp"
mv "$2.tmp" "$2" mv "$2.tmp" "$2"
} }
run_tput() { run_tput() {
P="${IPERF_PARALLEL:-4}" # parallel streams: a single stream can't fill 1 Gbps over the RTT P="${IPERF_PARALLEL:-4}" # parallel streams: a single stream can't fill the pipe over the RTT
TO="$(( TPUT_TIME + 20 ))" TO="$(( TPUT_TIME + 20 ))"
TMP="$SHARED/.tput.prom.partial"
: > "$TMP"
timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -R -J 2>/dev/null \ timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -R -J 2>/dev/null \
| python3 /scripts/tput_to_prom.py download "$TPUT_TARGET" > "$SHARED/.tput.prom" | python3 /scripts/tput_to_prom.py download "$TPUT_TARGET" > "$TMP"
timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -J 2>/dev/null \ timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -J 2>/dev/null \
| python3 /scripts/tput_to_prom.py upload "$TPUT_TARGET" >> "$SHARED/.tput.prom" | python3 /scripts/tput_to_prom.py upload "$TPUT_TARGET" >> "$TMP"
mv "$TMP" "$SHARED/.tput.prom"
} }
last_tput=0 last_tput=0