v0.6.0: healthcheck + hub reporting implementation

- Add heartbeat ping (every 5 min, controller alive signal)
- Add backup integrity check (weekly restic check, Sunday 04:00)
- Add Heartbeat + BackupIntegrity fields to PingUUIDsConfig
- Add HubConfig for central hub reporting
- Add report package (types, builder, pusher) for hub push
- Wire hub reporting into scheduler (configurable interval)
- Update controller.yaml.example with new monitoring + hub sections
- Add monitoring/DEPRECATED.md for legacy bash scripts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-16 13:19:08 +01:00
parent 94efc39c34
commit 97074e7a0c
8 changed files with 525 additions and 9 deletions
+14 -3
View File
@@ -20,6 +20,7 @@ type Config struct {
Stacks StacksConfig `yaml:"stacks"`
Backup BackupConfig `yaml:"backup"`
Monitoring MonitoringConfig `yaml:"monitoring"`
Hub HubConfig `yaml:"hub"`
SelfUpdate SelfUpdateConfig `yaml:"self_update"`
Notifications NotificationsConfig `yaml:"notifications"`
Logging LoggingConfig `yaml:"logging"`
@@ -98,9 +99,11 @@ type MonitoringConfig struct {
}
type PingUUIDsConfig struct {
DBDump string `yaml:"db_dump"`
Backup string `yaml:"backup"`
SystemHealth string `yaml:"system_health"`
Heartbeat string `yaml:"heartbeat"`
DBDump string `yaml:"db_dump"`
Backup string `yaml:"backup"`
SystemHealth string `yaml:"system_health"`
BackupIntegrity string `yaml:"backup_integrity"`
}
type ThresholdsConfig struct {
@@ -136,6 +139,13 @@ type AssetsConfig struct {
SourceURL string `yaml:"source_url"` // Only used during build, not runtime
}
type HubConfig struct {
Enabled bool `yaml:"enabled"`
URL string `yaml:"url"`
APIKey string `yaml:"api_key"`
PushInterval string `yaml:"push_interval"`
}
// Load reads and parses the config file, applies defaults, and validates.
func Load(path string) (*Config, error) {
data, err := os.ReadFile(path)
@@ -198,6 +208,7 @@ func applyDefaults(cfg *Config) {
di(&cfg.Monitoring.Thresholds.CPUWarnPercent, 90)
di(&cfg.Monitoring.Thresholds.MemoryWarnPercent, 85)
di(&cfg.Monitoring.Thresholds.TemperatureWarnCelsius, 75)
d(&cfg.Hub.PushInterval, "15m")
d(&cfg.SelfUpdate.CheckInterval, "6h")
di(&cfg.SelfUpdate.HealthTimeoutSeconds, 60)
d(&cfg.Logging.Level, "info")