updates scripts
This commit is contained in:
@@ -75,31 +75,61 @@ data:
|
||||
#!/usr/bin/env python3
|
||||
# irtt JSON (stdin) -> Prometheus sample lines (no HELP/TYPE; header is static).
|
||||
# args: <condition> <target>
|
||||
import json, sys, time
|
||||
# Hardened: tolerates missing/null/NaN/Inf fields and clamps ratios to 0..1
|
||||
# so a pathological irtt run can never emit an out-of-range or invalid sample.
|
||||
import json, sys, time, math
|
||||
|
||||
cond = sys.argv[1] if len(sys.argv) > 1 else "idle"
|
||||
target = sys.argv[2] if len(sys.argv) > 2 else "hetzner"
|
||||
L = f'target="{target}",condition="{cond}"'
|
||||
ts = f'{time.time():.0f}'
|
||||
|
||||
def num(x, default=0.0):
|
||||
# finite float or default (handles None / str / missing / NaN / Inf)
|
||||
try:
|
||||
s = json.load(sys.stdin)["stats"]
|
||||
except Exception:
|
||||
v = float(x)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return v if math.isfinite(v) else default
|
||||
|
||||
def pct_ratio(x):
|
||||
# percent (0..100, possibly garbage) -> ratio clamped to 0..1
|
||||
return max(0.0, min(1.0, num(x) / 100.0))
|
||||
|
||||
def fail():
|
||||
print(f'wan_irtt_success{{{L}}} 0')
|
||||
print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}')
|
||||
sys.exit(0)
|
||||
rtt = s["rtt"]
|
||||
|
||||
try:
|
||||
s = json.load(sys.stdin).get("stats")
|
||||
except Exception:
|
||||
fail()
|
||||
if not isinstance(s, dict):
|
||||
fail()
|
||||
|
||||
rtt = s.get("rtt") or {}
|
||||
for k in ("min", "mean", "median", "max", "stddev"):
|
||||
print(f'wan_irtt_rtt_seconds{{{L},stat="{k}"}} {rtt[k]/1e9}')
|
||||
print(f'wan_irtt_jitter_seconds{{{L},direction="round_trip"}} {s["ipdv_round_trip"]["mean"]/1e9}')
|
||||
print(f'wan_irtt_jitter_seconds{{{L},direction="send"}} {s["ipdv_send"]["mean"]/1e9}')
|
||||
print(f'wan_irtt_jitter_seconds{{{L},direction="receive"}} {s["ipdv_receive"]["mean"]/1e9}')
|
||||
print(f'wan_irtt_loss_ratio{{{L},direction="round_trip"}} {s["packet_loss_percent"]/100.0}')
|
||||
print(f'wan_irtt_loss_ratio{{{L},direction="upstream"}} {s["upstream_loss_percent"]/100.0}')
|
||||
print(f'wan_irtt_loss_ratio{{{L},direction="downstream"}} {s["downstream_loss_percent"]/100.0}')
|
||||
print(f'wan_irtt_late_ratio{{{L}}} {s["late_packets_percent"]/100.0}')
|
||||
print(f'wan_irtt_duplicate_ratio{{{L}}} {s["duplicate_percent"]/100.0}')
|
||||
print(f'wan_irtt_packets{{{L},kind="sent"}} {s["packets_sent"]}')
|
||||
print(f'wan_irtt_packets{{{L},kind="received"}} {s["packets_received"]}')
|
||||
print(f'wan_irtt_packets{{{L},kind="server_received"}} {s["server_packets_received"]}')
|
||||
print(f'wan_irtt_rtt_seconds{{{L},stat="{k}"}} {num(rtt.get(k)) / 1e9}')
|
||||
|
||||
def ipdv(key):
|
||||
d = s.get(key) or {}
|
||||
return num(d.get("mean")) / 1e9
|
||||
|
||||
print(f'wan_irtt_jitter_seconds{{{L},direction="round_trip"}} {ipdv("ipdv_round_trip")}')
|
||||
print(f'wan_irtt_jitter_seconds{{{L},direction="send"}} {ipdv("ipdv_send")}')
|
||||
print(f'wan_irtt_jitter_seconds{{{L},direction="receive"}} {ipdv("ipdv_receive")}')
|
||||
|
||||
print(f'wan_irtt_loss_ratio{{{L},direction="round_trip"}} {pct_ratio(s.get("packet_loss_percent"))}')
|
||||
print(f'wan_irtt_loss_ratio{{{L},direction="upstream"}} {pct_ratio(s.get("upstream_loss_percent"))}')
|
||||
print(f'wan_irtt_loss_ratio{{{L},direction="downstream"}} {pct_ratio(s.get("downstream_loss_percent"))}')
|
||||
print(f'wan_irtt_late_ratio{{{L}}} {pct_ratio(s.get("late_packets_percent"))}')
|
||||
print(f'wan_irtt_duplicate_ratio{{{L}}} {pct_ratio(s.get("duplicate_percent"))}')
|
||||
|
||||
print(f'wan_irtt_packets{{{L},kind="sent"}} {int(num(s.get("packets_sent")))}')
|
||||
print(f'wan_irtt_packets{{{L},kind="received"}} {int(num(s.get("packets_received")))}')
|
||||
print(f'wan_irtt_packets{{{L},kind="server_received"}} {int(num(s.get("server_packets_received")))}')
|
||||
|
||||
print(f'wan_irtt_success{{{L}}} 1')
|
||||
print(f'wan_probe_last_run_timestamp_seconds{{probe="irtt",{L}}} {ts}')
|
||||
|
||||
@@ -140,13 +170,16 @@ data:
|
||||
: > "$SHARED/.irtt.prom"; : > "$SHARED/.irttload.prom"; : > "$SHARED/.tput.prom"
|
||||
cp "$HDR" "$SHARED/metrics" # serve header immediately so first scrapes don't 404
|
||||
|
||||
# Concatenate fragments into the served file via temp + atomic rename.
|
||||
assemble() {
|
||||
cat "$HDR" "$SHARED/.irtt.prom" "$SHARED/.irttload.prom" "$SHARED/.tput.prom" \
|
||||
> "$SHARED/.metrics.tmp" 2>/dev/null
|
||||
mv "$SHARED/.metrics.tmp" "$SHARED/metrics"
|
||||
}
|
||||
|
||||
run_irtt() {
|
||||
# Each fragment is written to <file>.tmp then renamed, so assemble() never
|
||||
# cats a partially written file (the cause of the impossible loss spikes).
|
||||
run_irtt() { # $1 condition $2 outfile $3 duration(seconds)
|
||||
timeout "$(( $3 + 25 ))" irtt client -i "$IRTT_INTERVAL" -d "${3}s" -q $HMAC_OPT \
|
||||
-o - "${HETZNER}:${IRTT_PORT}" 2>/dev/null \
|
||||
| python3 /scripts/irtt_to_prom.py "$1" "$IRTT_TARGET" > "$2.tmp"
|
||||
@@ -154,12 +187,15 @@ data:
|
||||
}
|
||||
|
||||
run_tput() {
|
||||
P="${IPERF_PARALLEL:-4}" # parallel streams: a single stream can't fill 1 Gbps over the RTT
|
||||
P="${IPERF_PARALLEL:-4}" # parallel streams: a single stream can't fill the pipe over the RTT
|
||||
TO="$(( TPUT_TIME + 20 ))"
|
||||
TMP="$SHARED/.tput.prom.partial"
|
||||
: > "$TMP"
|
||||
timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -R -J 2>/dev/null \
|
||||
| python3 /scripts/tput_to_prom.py download "$TPUT_TARGET" > "$SHARED/.tput.prom"
|
||||
| python3 /scripts/tput_to_prom.py download "$TPUT_TARGET" > "$TMP"
|
||||
timeout "$TO" iperf3 -c "$HETZNER" -p "$IPERF_PORT" -t "$TPUT_TIME" -P "$P" --connect-timeout 5000 -J 2>/dev/null \
|
||||
| python3 /scripts/tput_to_prom.py upload "$TPUT_TARGET" >> "$SHARED/.tput.prom"
|
||||
| python3 /scripts/tput_to_prom.py upload "$TPUT_TARGET" >> "$TMP"
|
||||
mv "$TMP" "$SHARED/.tput.prom"
|
||||
}
|
||||
|
||||
last_tput=0
|
||||
|
||||
Reference in New Issue
Block a user