updates scripts
This commit is contained in:
@@ -75,31 +75,61 @@ data:
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# irtt JSON (stdin) -> Prometheus sample lines (no HELP/TYPE; header is static).
|
# irtt JSON (stdin) -> Prometheus sample lines (no HELP/TYPE; header is static).
|
||||||
# args: <condition> <target>
|
# args: <condition> <target>
|
||||||
import json, sys, time
|
# Hardened: tolerates missing/null/NaN/Inf fields and clamps ratios to 0..1
|
||||||
cond = sys.argv[1] if len(sys.argv) > 1 else "idle"
|
# so a pathological irtt run can never emit an out-of-range or invalid sample.
|
||||||
|
import json, sys, time, math
|
||||||
|
|
||||||
|
cond = sys.argv[1] if len(sys.argv) > 1 else "idle"
|
||||||
target = sys.argv[2] if len(sys.argv) > 2 else "hetzner"
|
target = sys.argv[2] if len(sys.argv) > 2 else "hetzner"
|
||||||
L = f'target="{target}",condition="{cond}"'
|
L = f'target="{target}",condition="{cond}"'
|
||||||
ts = f'{time.time():.0f}'
|
ts = f'{time.time():.0f}'
|
||||||
try:
|
|
||||||
s = json.load(sys.stdin)["stats"]
|
def num(x, default=0.0):
|
||||||
except Exception:
|
# finite float or default (handles None / str / missing / NaN / Inf)
|
||||||
|
try:
|
||||||
|
v = float(x)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
return v if math.isfinite(v) else default
|
||||||
|
|
||||||
|
def pct_ratio(x):
|
||||||
|
# percent (0..100, possibly garbage) -> ratio clamped to 0..1
|
||||||
|
return max(0.0, min(1.0, num(x) / 100.0))
|
||||||
|
|
||||||
|
def fail():
|
||||||
print(f'wan_irtt_success{{{L}}} 0')
|
print(f'wan_irtt_success{{{L}}} 0')
|
||||||
print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}')
|
print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}')
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
rtt = s["rtt"]
|
|
||||||
|
try:
|
||||||
|
s = json.load(sys.stdin).get("stats")
|
||||||
|
except Exception:
|
||||||
|
fail()
|
||||||
|
if not isinstance(s, dict):
|
||||||
|
fail()
|
||||||
|
|
||||||
|
rtt = s.get("rtt") or {}
|
||||||
for k in ("min", "mean", "median", "max", "stddev"):
|
for k in ("min", "mean", "median", "max", "stddev"):
|
||||||
print(f'wan_irtt_rtt_seconds{{{L},stat="{k}"}} {rtt[k]/1e9}')
|
print(f'wan_irtt_rtt_seconds{{{L},stat="{k}"}} {num(rtt.get(k)) / 1e9}')
|
||||||
print(f'wan_irtt_jitter_seconds{{{L},direction="round_trip"}} {s["ipdv_round_trip"]["mean"]/1e9}')
|
|
||||||
print(f'wan_irtt_jitter_seconds{{{L},direction="send"}} {s["ipdv_send"]["mean"]/1e9}')
|
def ipdv(key):
|
||||||
print(f'wan_irtt_jitter_seconds{{{L},direction="receive"}} {s["ipdv_receive"]["mean"]/1e9}')
|
d = s.get(key) or {}
|
||||||
print(f'wan_irtt_loss_ratio{{{L},direction="round_trip"}} {s["packet_loss_percent"]/100.0}')
|
return num(d.get("mean")) / 1e9
|
||||||
print(f'wan_irtt_loss_ratio{{{L},direction="upstream"}} {s["upstream_loss_percent"]/100.0}')
|
|
||||||
print(f'wan_irtt_loss_ratio{{{L},direction="downstream"}} {s["downstream_loss_percent"]/100.0}')
|
print(f'wan_irtt_jitter_seconds{{{L},direction="round_trip"}} {ipdv("ipdv_round_trip")}')
|
||||||
print(f'wan_irtt_late_ratio{{{L}}} {s["late_packets_percent"]/100.0}')
|
print(f'wan_irtt_jitter_seconds{{{L},direction="send"}} {ipdv("ipdv_send")}')
|
||||||
print(f'wan_irtt_duplicate_ratio{{{L}}} {s["duplicate_percent"]/100.0}')
|
print(f'wan_irtt_jitter_seconds{{{L},direction="receive"}} {ipdv("ipdv_receive")}')
|
||||||
print(f'wan_irtt_packets{{{L},kind="sent"}} {s["packets_sent"]}')
|
|
||||||
print(f'wan_irtt_packets{{{L},kind="received"}} {s["packets_received"]}')
|
print(f'wan_irtt_loss_ratio{{{L},direction="round_trip"}} {pct_ratio(s.get("packet_loss_percent"))}')
|
||||||
print(f'wan_irtt_packets{{{L},kind="server_received"}} {s["server_packets_received"]}')
|
print(f'wan_irtt_loss_ratio{{{L},direction="upstream"}} {pct_ratio(s.get("upstream_loss_percent"))}')
|
||||||
|
print(f'wan_irtt_loss_ratio{{{L},direction="downstream"}} {pct_ratio(s.get("downstream_loss_percent"))}')
|
||||||
|
print(f'wan_irtt_late_ratio{{{L}}} {pct_ratio(s.get("late_packets_percent"))}')
|
||||||
|
print(f'wan_irtt_duplicate_ratio{{{L}}} {pct_ratio(s.get("duplicate_percent"))}')
|
||||||
|
|
||||||
|
print(f'wan_irtt_packets{{{L},kind="sent"}} {int(num(s.get("packets_sent")))}')
|
||||||
|
print(f'wan_irtt_packets{{{L},kind="received"}} {int(num(s.get("packets_received")))}')
|
||||||
|
print(f'wan_irtt_packets{{{L},kind="server_received"}} {int(num(s.get("server_packets_received")))}')
|
||||||
|
|
||||||
print(f'wan_irtt_success{{{L}}} 1')
|
print(f'wan_irtt_success{{{L}}} 1')
|
||||||
print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}')
|
print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}')
|
||||||
|
|
||||||
@@ -129,8 +159,8 @@ data:
|
|||||||
IPERF_PORT="${IPERF_PORT:-5201}"
|
IPERF_PORT="${IPERF_PORT:-5201}"
|
||||||
IRTT_INTERVAL="${IRTT_INTERVAL:-20ms}"
|
IRTT_INTERVAL="${IRTT_INTERVAL:-20ms}"
|
||||||
IRTT_DURATION="${IRTT_DURATION:-60}" # seconds (numeric, for timeout math)
|
IRTT_DURATION="${IRTT_DURATION:-60}" # seconds (numeric, for timeout math)
|
||||||
TPUT_EVERY="${TPUT_EVERY:-900}" # seconds between throughput tests
|
TPUT_EVERY="${TPUT_EVERY:-900}" # seconds between throughput tests
|
||||||
TPUT_TIME="${TPUT_TIME:-10}" # iperf3 seconds per direction
|
TPUT_TIME="${TPUT_TIME:-10}" # iperf3 seconds per direction
|
||||||
IRTT_TARGET="${IRTT_TARGET:-hetzner}"
|
IRTT_TARGET="${IRTT_TARGET:-hetzner}"
|
||||||
TPUT_TARGET="${TPUT_TARGET:-hetzner}"
|
TPUT_TARGET="${TPUT_TARGET:-hetzner}"
|
||||||
HMAC_OPT=""
|
HMAC_OPT=""
|
||||||
@@ -140,26 +170,32 @@ data:
|
|||||||
: > "$SHARED/.irtt.prom"; : > "$SHARED/.irttload.prom"; : > "$SHARED/.tput.prom"
|
: > "$SHARED/.irtt.prom"; : > "$SHARED/.irttload.prom"; : > "$SHARED/.tput.prom"
|
||||||
cp "$HDR" "$SHARED/metrics" # serve header immediately so first scrapes don't 404
|
cp "$HDR" "$SHARED/metrics" # serve header immediately so first scrapes don't 404
|
||||||
|
|
||||||
|
# Concatenate fragments into the served file via temp + atomic rename.
|
||||||
assemble() {
|
assemble() {
|
||||||
cat "$HDR" "$SHARED/.irtt.prom" "$SHARED/.irttload.prom" "$SHARED/.tput.prom" \
|
cat "$HDR" "$SHARED/.irtt.prom" "$SHARED/.irttload.prom" "$SHARED/.tput.prom" \
|
||||||
> "$SHARED/.metrics.tmp" 2>/dev/null
|
> "$SHARED/.metrics.tmp" 2>/dev/null
|
||||||
mv "$SHARED/.metrics.tmp" "$SHARED/metrics"
|
mv "$SHARED/.metrics.tmp" "$SHARED/metrics"
|
||||||
}
|
}
|
||||||
|
|
||||||
run_irtt() {
|
# Each fragment is written to <file>.tmp then renamed, so assemble() never
|
||||||
|
# cats a partially written file (the cause of the impossible loss spikes).
|
||||||
|
run_irtt() { # $1 condition $2 outfile $3 duration(seconds)
|
||||||
timeout "$(( $3 + 25 ))" irtt client -i "$IRTT_INTERVAL" -d "${3}s" -q $HMAC_OPT \
|
timeout "$(( $3 + 25 ))" irtt client -i "$IRTT_INTERVAL" -d "${3}s" -q $HMAC_OPT \
|
||||||
-o - "${HETZNER}:${IRTT_PORT}" 2>/dev/null \
|
-o - "${HETZNER}:${IRTT_PORT}" 2>/dev/null \
|
||||||
| python3 /scripts/irtt_to_prom.py "$1" "$IRTT_TARGET" > "$2.tmp"
|
| python3 /scripts/irtt_to_prom.py "$1" "$IRTT_TARGET" > "$2.tmp"
|
||||||
mv "$2.tmp" "$2"
|
mv "$2.tmp" "$2"
|
||||||
}
|
}
|
||||||
|
|
||||||
run_tput() {
|
run_tput() {
|
||||||
P="${IPERF_PARALLEL:-4}" # parallel streams: a single stream can't fill 1 Gbps over the RTT
|
P="${IPERF_PARALLEL:-4}" # parallel streams: a single stream can't fill the pipe over the RTT
|
||||||
TO="$(( TPUT_TIME + 20 ))"
|
TO="$(( TPUT_TIME + 20 ))"
|
||||||
|
TMP="$SHARED/.tput.prom.partial"
|
||||||
|
: > "$TMP"
|
||||||
timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -R -J 2>/dev/null \
|
timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -R -J 2>/dev/null \
|
||||||
| python3 /scripts/tput_to_prom.py download "$TPUT_TARGET" > "$SHARED/.tput.prom"
|
| python3 /scripts/tput_to_prom.py download "$TPUT_TARGET" > "$TMP"
|
||||||
timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -J 2>/dev/null \
|
timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -J 2>/dev/null \
|
||||||
| python3 /scripts/tput_to_prom.py upload "$TPUT_TARGET" >> "$SHARED/.tput.prom"
|
| python3 /scripts/tput_to_prom.py upload "$TPUT_TARGET" >> "$TMP"
|
||||||
|
mv "$TMP" "$SHARED/.tput.prom"
|
||||||
}
|
}
|
||||||
|
|
||||||
last_tput=0
|
last_tput=0
|
||||||
|
|||||||
Reference in New Issue
Block a user