volume alert modified
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
# =============================================================================
|
||||
# Prometheus Alerting Rules for Longhorn
|
||||
# =============================================================================
|
||||
|
||||
# =============================================================================
|
||||
# Prometheus Alerting Rules for Longhorn
|
||||
# Excludes prometheus-data PVC since it's designed to run at ~95% capacity
|
||||
# =============================================================================
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prometheus-rules
|
||||
namespace: mon-system
|
||||
labels:
|
||||
app: prometheus
|
||||
data:
|
||||
longhorn-alerts.yml: |
|
||||
groups:
|
||||
- name: longhorn-volume-alerts
|
||||
rules:
|
||||
# Critical: Volume at 95% capacity (excluding prometheus-data)
|
||||
- alert: LonghornVolumeSpaceCritical
|
||||
expr: |
|
||||
(
|
||||
(avg by (volume) (longhorn_volume_actual_size_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
|
||||
/
|
||||
(avg by (volume) (longhorn_volume_capacity_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
|
||||
) * 100 > 95
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Longhorn volume {{ $labels.volume }} is critically full"
|
||||
description: "Volume {{ $labels.volume }} is at {{ printf \"%.1f\" $value }}% capacity. Immediate action required."
|
||||
# Warning: Volume at 85% capacity (excluding prometheus-data)
|
||||
- alert: LonghornVolumeSpaceWarning
|
||||
expr: |
|
||||
(
|
||||
(avg by (volume) (longhorn_volume_actual_size_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
|
||||
/
|
||||
(avg by (volume) (longhorn_volume_capacity_bytes{volume!="pvc-6c6f1864-de15-4f10-9d73-8fbb678c391f"}))
|
||||
) * 100 > 85
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Longhorn volume {{ $labels.volume }} is running low on space"
|
||||
description: "Volume {{ $labels.volume }} is at {{ printf \"%.1f\" $value }}% capacity. Consider expanding or cleaning up."
|
||||
# Volume degraded
|
||||
- alert: LonghornVolumeDegraded
|
||||
expr: longhorn_volume_robustness != 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Longhorn volume {{ $labels.volume }} is degraded"
|
||||
description: "Volume {{ $labels.volume }} robustness is not healthy. Check replica status."
|
||||
|
||||
# Node storage pressure
|
||||
- alert: LonghornNodeStoragePressure
|
||||
expr: |
|
||||
(
|
||||
longhorn_node_storage_usage_bytes
|
||||
/
|
||||
longhorn_node_storage_capacity_bytes
|
||||
) * 100 > 90
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Longhorn node {{ $labels.node }} storage pressure"
|
||||
description: "Node {{ $labels.node }} disk usage is at {{ printf \"%.1f\" $value }}%."
|
||||
Reference in New Issue
Block a user