volume alert modified
This commit is contained in:
@@ -0,0 +1,73 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# Prometheus Alerting Rules for Longhorn
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Prometheus Alerting Rules for Longhorn
|
||||||
|
# Excludes prometheus-data PVC since it's designed to run at ~95% capacity
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: prometheus-rules
|
||||||
|
namespace: mon-system
|
||||||
|
labels:
|
||||||
|
app: prometheus
|
||||||
|
data:
|
||||||
|
longhorn-alerts.yml: |
|
||||||
|
groups:
|
||||||
|
- name: longhorn-volume-alerts
|
||||||
|
rules:
|
||||||
|
# Critical: Volume at 95% capacity (excluding prometheus-data)
|
||||||
|
- alert: LonghornVolumeSpaceCritical
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
(avg by (volume) (longhorn_volume_actual_size_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
|
||||||
|
/
|
||||||
|
(avg by (volume) (longhorn_volume_capacity_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
|
||||||
|
) * 100 > 95
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Longhorn volume {{ $labels.volume }} is critically full"
|
||||||
|
description: "Volume {{ $labels.volume }} is at {{ printf \"%.1f\" $value }}% capacity. Immediate action required."
|
||||||
|
# Warning: Volume at 85% capacity (excluding prometheus-data)
|
||||||
|
- alert: LonghornVolumeSpaceWarning
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
(avg by (volume) (longhorn_volume_actual_size_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
|
||||||
|
/
|
||||||
|
(avg by (volume) (longhorn_volume_capacity_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
|
||||||
|
) * 100 > 85
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Longhorn volume {{ $labels.volume }} is running low on space"
|
||||||
|
description: "Volume {{ $labels.volume }} is at {{ printf \"%.1f\" $value }}% capacity. Consider expanding or cleaning up."
|
||||||
|
# Volume degraded
|
||||||
|
- alert: LonghornVolumeDegraded
|
||||||
|
expr: longhorn_volume_robustness != 1
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Longhorn volume {{ $labels.volume }} is degraded"
|
||||||
|
description: "Volume {{ $labels.volume }} robustness is not healthy. Check replica status."
|
||||||
|
|
||||||
|
# Node storage pressure
|
||||||
|
- alert: LonghornNodeStoragePressure
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
longhorn_node_storage_usage_bytes
|
||||||
|
/
|
||||||
|
longhorn_node_storage_capacity_bytes
|
||||||
|
) * 100 > 90
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Longhorn node {{ $labels.node }} storage pressure"
|
||||||
|
description: "Node {{ $labels.node }} disk usage is at {{ printf \"%.1f\" $value }}%."
|
||||||
Reference in New Issue
Block a user