volume alert modified

This commit is contained in:
kisfenyo
2025-12-31 18:41:11 +01:00
parent 81398d2db7
commit af54b576ba
+73
View File
@@ -0,0 +1,73 @@
# =============================================================================
# Prometheus Alerting Rules for Longhorn
# =============================================================================
# =============================================================================
# Prometheus Alerting Rules for Longhorn
# Excludes prometheus-data PVC since it's designed to run at ~95% capacity
# =============================================================================
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-rules
namespace: mon-system
labels:
app: prometheus
data:
longhorn-alerts.yml: |
groups:
- name: longhorn-volume-alerts
rules:
# Critical: Volume at 95% capacity (excluding prometheus-data)
- alert: LonghornVolumeSpaceCritical
expr: |
(
(avg by (volume) (longhorn_volume_actual_size_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
/
(avg by (volume) (longhorn_volume_capacity_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
) * 100 > 95
for: 5m
labels:
severity: critical
annotations:
summary: "Longhorn volume {{ $labels.volume }} is critically full"
description: "Volume {{ $labels.volume }} is at {{ printf \"%.1f\" $value }}% capacity. Immediate action required."
# Warning: Volume at 85% capacity (excluding prometheus-data)
- alert: LonghornVolumeSpaceWarning
expr: |
(
(avg by (volume) (longhorn_volume_actual_size_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
/
(avg by (volume) (longhorn_volume_capacity_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
) * 100 > 85
for: 10m
labels:
severity: warning
annotations:
summary: "Longhorn volume {{ $labels.volume }} is running low on space"
description: "Volume {{ $labels.volume }} is at {{ printf \"%.1f\" $value }}% capacity. Consider expanding or cleaning up."
# Volume degraded
- alert: LonghornVolumeDegraded
expr: longhorn_volume_robustness != 1
for: 5m
labels:
severity: warning
annotations:
summary: "Longhorn volume {{ $labels.volume }} is degraded"
description: "Volume {{ $labels.volume }} robustness is not healthy. Check replica status."
# Node storage pressure
- alert: LonghornNodeStoragePressure
expr: |
(
longhorn_node_storage_usage_bytes
/
longhorn_node_storage_capacity_bytes
) * 100 > 90
for: 10m
labels:
severity: warning
annotations:
summary: "Longhorn node {{ $labels.node }} storage pressure"
description: "Node {{ $labels.node }} disk usage is at {{ printf \"%.1f\" $value }}%."