v0.4.0: monitoring & backup — scheduler, CPU/temp metrics, healthchecks, restic backups
Phase 2 (Monitoring & Health): - Central job scheduler replacing ad-hoc goroutines (internal/scheduler) - CPU usage collector via /proc/stat background sampling (internal/system/cpu_linux.go) - Temperature reading from /sys/class/thermal + /host/sys (Docker mount) - Load average from /proc/loadavg - Healthchecks.io-compatible HTTP pinger (internal/monitor/pinger.go) - System health checks: disk, memory, CPU, temp, Docker, protected containers (internal/monitor/healthcheck.go) Phase 3 (Backups): - Database auto-discovery via docker ps + docker inspect (internal/backup/dbdump.go) - Database dumping via docker exec (pg_dump / mariadb-dump) with atomic writes - Restic backup integration with auto-password generation (internal/backup/restic.go) - Backup orchestrator: DB dumps + restic snapshots + weekly prune (internal/backup/backup.go) - Manual backup trigger via dashboard button and POST /api/backup/run Dashboard UI: - CPU usage bar with load average display - Temperature with colored indicator dot - Backup status card with last run time, DB count, repo stats - "Mentés most" button for manual backup trigger Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -89,11 +89,12 @@ type RetentionConfig struct {
|
||||
}
|
||||
|
||||
type MonitoringConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
HealthchecksBase string `yaml:"healthchecks_base"`
|
||||
PingUUIDs PingUUIDsConfig `yaml:"ping_uuids"`
|
||||
HealthCheckSchedule string `yaml:"health_check_schedule"`
|
||||
Thresholds ThresholdsConfig `yaml:"thresholds"`
|
||||
Enabled bool `yaml:"enabled"`
|
||||
HealthchecksBase string `yaml:"healthchecks_base"`
|
||||
PingUUIDs PingUUIDsConfig `yaml:"ping_uuids"`
|
||||
HealthCheckSchedule string `yaml:"health_check_schedule"`
|
||||
SystemHealthInterval string `yaml:"system_health_interval"`
|
||||
Thresholds ThresholdsConfig `yaml:"thresholds"`
|
||||
}
|
||||
|
||||
type PingUUIDsConfig struct {
|
||||
@@ -187,8 +188,10 @@ func applyDefaults(cfg *Config) {
|
||||
di(&cfg.Backup.Retention.KeepDaily, 7)
|
||||
di(&cfg.Backup.Retention.KeepWeekly, 4)
|
||||
di(&cfg.Backup.Retention.KeepMonthly, 6)
|
||||
d(&cfg.Backup.ResticPasswordFile, "/opt/docker/felhom-controller/data/restic-password")
|
||||
d(&cfg.Monitoring.HealthchecksBase, "https://status.felhom.eu")
|
||||
d(&cfg.Monitoring.HealthCheckSchedule, "06:00")
|
||||
d(&cfg.Monitoring.SystemHealthInterval, "5m")
|
||||
di(&cfg.Monitoring.Thresholds.DiskWarnPercent, 80)
|
||||
di(&cfg.Monitoring.Thresholds.DiskCritPercent, 90)
|
||||
di(&cfg.Monitoring.Thresholds.BackupMaxAgeHours, 36)
|
||||
@@ -217,6 +220,7 @@ func applyEnvOverrides(cfg *Config) {
|
||||
envStr("FELHOM_PATHS_STACKS_DIR", &cfg.Paths.StacksDir)
|
||||
envStr("FELHOM_PATHS_HDD_PATH", &cfg.Paths.HDDPath)
|
||||
envStr("FELHOM_LOGGING_LEVEL", &cfg.Logging.Level)
|
||||
envStr("FELHOM_MONITORING_SYSTEM_HEALTH_INTERVAL", &cfg.Monitoring.SystemHealthInterval)
|
||||
}
|
||||
|
||||
func validate(cfg *Config) error {
|
||||
|
||||
Reference in New Issue
Block a user