updated wan rules

This commit is contained in:
2026-06-07 11:20:30 +02:00
parent e0fd669f7c
commit ef77ab9285
+9 -7
View File
@@ -216,6 +216,8 @@ data:
# "normal conditions" figures: 700 Mbit/s down / 28 Mbit/s up. # "normal conditions" figures: 700 Mbit/s down / 28 Mbit/s up.
# Throughput is sampled every ~15 min, so `for:` spans >=2 samples to avoid # Throughput is sampled every ~15 min, so `for:` spans >=2 samples to avoid
# firing on a single fluke. Recalibrate floors after a week of baseline data. # firing on a single fluke. Recalibrate floors after a week of baseline data.
# NOTE: uses Prometheus template funcs (humanize/humanizePercentage/humanizeDuration);
# mul/div are NOT valid Prometheus template functions.
wan-alerts.yml: | wan-alerts.yml: |
groups: groups:
- name: wan-quality-alerts - name: wan-quality-alerts
@@ -228,7 +230,7 @@ data:
severity: warning severity: warning
annotations: annotations:
summary: "WAN upstream packet loss to {{ $labels.target }}" summary: "WAN upstream packet loss to {{ $labels.target }}"
description: "irtt upstream loss {{ printf \"%.2f\" (mul $value 100) }}% (>1%) for 2m. Cable-upstream symptom; capture for ISP." description: "irtt upstream loss {{ $value | humanizePercentage }} (>1%) for 2m. Cable-upstream symptom; capture for ISP."
- alert: WanDownstreamPacketLoss - alert: WanDownstreamPacketLoss
expr: wan_irtt_loss_ratio{direction="downstream",condition="idle"} > 0.01 expr: wan_irtt_loss_ratio{direction="downstream",condition="idle"} > 0.01
@@ -237,7 +239,7 @@ data:
severity: warning severity: warning
annotations: annotations:
summary: "WAN downstream packet loss to {{ $labels.target }}" summary: "WAN downstream packet loss to {{ $labels.target }}"
description: "irtt downstream loss {{ printf \"%.2f\" (mul $value 100) }}% (>1%) for 2m." description: "irtt downstream loss {{ $value | humanizePercentage }} (>1%) for 2m."
# --- latency / jitter --- # --- latency / jitter ---
- alert: WanLatencyHigh - alert: WanLatencyHigh
@@ -247,7 +249,7 @@ data:
severity: warning severity: warning
annotations: annotations:
summary: "WAN RTT spikes to {{ $labels.target }}" summary: "WAN RTT spikes to {{ $labels.target }}"
description: "irtt max RTT > 80 ms for 5m (idle). Real-time apps will feel this." description: "irtt max RTT {{ $value | humanizeDuration }} (>80 ms) for 5m (idle). Real-time apps will feel this."
- alert: WanJitterHigh - alert: WanJitterHigh
expr: wan_irtt_jitter_seconds{direction="round_trip",condition="idle"} > 0.03 expr: wan_irtt_jitter_seconds{direction="round_trip",condition="idle"} > 0.03
@@ -256,7 +258,7 @@ data:
severity: warning severity: warning
annotations: annotations:
summary: "WAN jitter high to {{ $labels.target }}" summary: "WAN jitter high to {{ $labels.target }}"
description: "Round-trip jitter > 30 ms for 5m. Degrades VoIP/video." description: "Round-trip jitter {{ $value | humanizeDuration }} (>30 ms) for 5m. Degrades VoIP/video."
# --- bufferbloat: latency added while the line is saturated --- # --- bufferbloat: latency added while the line is saturated ---
- alert: WanBufferbloat - alert: WanBufferbloat
@@ -270,7 +272,7 @@ data:
severity: info severity: info
annotations: annotations:
summary: "WAN bufferbloat on {{ $labels.target }}" summary: "WAN bufferbloat on {{ $labels.target }}"
description: "RTT rises {{ printf \"%.0f\" (mul $value 1000) }} ms under load (>100 ms). Line buckles when saturated." description: "RTT rises {{ $value | humanizeDuration }} under load (>100 ms). Line buckles when saturated."
# --- throughput vs One.hu "normal" 700/28 (alert below 50%) --- # --- throughput vs One.hu "normal" 700/28 (alert below 50%) ---
- alert: WanDownloadDegraded - alert: WanDownloadDegraded
@@ -280,7 +282,7 @@ data:
severity: warning severity: warning
annotations: annotations:
summary: "WAN download below half of plan" summary: "WAN download below half of plan"
description: "Download {{ printf \"%.0f\" (div $value 1e6) }} Mbit/s (< 350, half of 700 normal) for 20m." description: "Download {{ $value | humanize }}bit/s (< 350M, half of 700 normal) for 20m."
- alert: WanUploadDegraded - alert: WanUploadDegraded
expr: wan_throughput_bits_per_second{direction="upload"} < 14e6 expr: wan_throughput_bits_per_second{direction="upload"} < 14e6
@@ -289,7 +291,7 @@ data:
severity: warning severity: warning
annotations: annotations:
summary: "WAN upload below half of plan" summary: "WAN upload below half of plan"
description: "Upload {{ printf \"%.1f\" (div $value 1e6) }} Mbit/s (< 14, half of 28 normal) for 20m." description: "Upload {{ $value | humanize }}bit/s (< 14M, half of 28 normal) for 20m."
# --- the monitor itself stopped producing data --- # --- the monitor itself stopped producing data ---
- alert: WanProbeStalled - alert: WanProbeStalled