v0.6.0: healthcheck + hub reporting implementation

- Add heartbeat ping (every 5 min, controller alive signal)
- Add backup integrity check (weekly restic check, Sunday 04:00)
- Add Heartbeat + BackupIntegrity fields to PingUUIDsConfig
- Add HubConfig for central hub reporting
- Add report package (types, builder, pusher) for hub push
- Wire hub reporting into scheduler (configurable interval)
- Update controller.yaml.example with new monitoring + hub sections
- Add monitoring/DEPRECATED.md for legacy bash scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-16 13:19:08 +01:00
parent 94efc39c34
commit 97074e7a0c
8 changed files with 525 additions and 9 deletions
+12 -3
View File
@@ -80,9 +80,11 @@ monitoring:
enabled: true
healthchecks_base: "https://status.felhom.eu"
ping_uuids:
db_dump: "CHANGEME-uuid-for-db-dump"
backup: "CHANGEME-uuid-for-backup"
system_health: "CHANGEME-uuid-for-system-health"
heartbeat: "" # Every 5 min — controller process alive
system_health: "CHANGEME-uuid-for-system-health" # Every 5 min — comprehensive system check
db_dump: "CHANGEME-uuid-for-db-dump" # Daily — after database dumps
backup: "CHANGEME-uuid-for-backup" # Daily — after restic snapshot
backup_integrity: "" # Weekly (Sunday) — restic check
system_health_interval: "5m"
health_check_schedule: "06:00"
thresholds:
@@ -93,6 +95,13 @@ monitoring:
memory_warn_percent: 85
temperature_warn_celsius: 75
# --- Central hub (operator dashboard) ---
hub:
enabled: false # Enable central reporting
url: "https://hub.felhom.eu" # Hub API endpoint
api_key: "" # Shared secret for authentication
push_interval: "15m" # How often to push reports
# --- Self-update ---
self_update:
enabled: true