From 7d69d96cf3e50aa0327470b379666f92277cbe74 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Fri, 20 Feb 2026 19:56:12 +0100 Subject: [PATCH] Remove ping_uuids from example config, update architecture diagram - Comment out ping_uuids section in controller.yaml.example (deprecated) - Architecture diagram: remove status.felhom.eu, update to Hub event system - Mark Healthchecks references as deprecated throughout README Co-Authored-By: Claude Opus 4.6 --- controller/README.md | 18 +++++++++--------- controller/configs/controller.yaml.example | 7 +------ 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/controller/README.md b/controller/README.md index eb6f25c..b6c5997 100644 --- a/controller/README.md +++ b/controller/README.md @@ -49,19 +49,19 @@ A single, lightweight Go container that replaces Portainer + scattered systemd s │ │ App │ │ └──────────┘ └─────────────────────────┘│ │ │ │ stacks │ │ ┌──────────┐ ┌─────────────────────────┐│ │ │ │ (docker │ │ │Scheduler │ │ Monitor & Metrics ││ │ -│ │ compose) │ │ │(cron-like│ │ (health, pings, SQLite ││ │ +│ │ compose) │ │ │(cron-like│ │ (health, SQLite ││ │ │ └──────────┘ │ │ jobs) │ │ time-series, Chart.js) ││ │ │ │ └──────────┘ └─────────────────────────┘│ │ │ │ ┌──────────┐ ┌─────────────────────────┐│ │ │ │ │ Notify │ │ REST API + Hub Reporter ││ │ -│ │ │ (email) │ │ (JSON push to hub) ││ │ +│ │ │ (events) │ │ (JSON push + events) ││ │ │ │ └──────────┘ └─────────────────────────┘│ │ │ └────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────────────┘ - │ pings │ JSON push │ git pull - ▼ ▼ ▼ - status.felhom.eu hub.felhom.eu gitea.dooplex.hu - (Healthchecks) (central dashboard) (stack definitions) + │ events + reports │ git pull + ▼ ▼ + hub.felhom.eu gitea.dooplex.hu + (central dashboard) (stack definitions) ``` ### Key Architecture Decisions @@ -85,7 +85,7 @@ A single, lightweight Go container that replaces Portainer + scattered systemd s | **Backup** | `internal/backup/` | Per-drive 3-layer backup: DB dumps → restic snapshots → cross-drive copies, restore | | **Storage** | `internal/storage/` | Disk scanning (`lsblk`), partitioning (`sfdisk`), formatting (`mkfs.ext4`), mounting, data migration (`rsync`) | | **System** | `internal/system/` | System info (`/proc`), CPU collector, mount points, disk usage, FS info | -| **Monitor** | `internal/monitor/` | Healthchecks.io pinger, system health checks, storage watchdog | +| **Monitor** | `internal/monitor/` | System health checks, storage watchdog, legacy Healthchecks pinger (deprecated) | | **Metrics** | `internal/metrics/` | SQLite time-series store, system + container metric collection | | **Scheduler** | `internal/scheduler/` | Central job scheduler (periodic + daily, skip-if-running, panic recovery) | | **SelfUpdate** | `internal/selfupdate/` | Version checking (registry), update trigger, state persistence, startup verification | @@ -624,7 +624,7 @@ The controller can update itself — a Watchtower-style pull-and-restart mechani ##### Design Philosophy -- **No automatic rollback** — follows the Watchtower pattern (24k+ GitHub stars, no rollback). Docker's `restart: unless-stopped` policy is the crash safety net. Healthchecks.io detects when the controller goes down. +- **No automatic rollback** — follows the Watchtower pattern (24k+ GitHub stars, no rollback). Docker's `restart: unless-stopped` policy is the crash safety net. The Hub's dead man's switch detects when the controller goes down. - **Audit state file** — `update-state.json` in the data volume records every update attempt (previous version, target version, initiator, result). Operators can SSH in and revert using `PreviousImage` from this file. - **Backup-aware** — refuses to start an update while a backup is in progress (`backupRunning()` guard). @@ -980,7 +980,7 @@ Auto-generated during deployment. Contains env vars, locked fields list, deploy |-----|------|------|---------| | status-refresh | periodic | 30s | Refresh container states | | stack-scan | periodic | 2m | Rescan stacks directory | -| heartbeat | periodic | 5m | Ping Healthchecks "I'm alive" | +| heartbeat | periodic | 5m | Legacy Healthchecks ping (deprecated — Hub handles via event system) | | system-health | periodic | configurable | Health checks + alert refresh | | backup-cache | periodic | 5m | Refresh backup status cache | | hub-report | periodic | 15m | Push report to central hub | diff --git a/controller/configs/controller.yaml.example b/controller/configs/controller.yaml.example index 1645910..69eed41 100644 --- a/controller/configs/controller.yaml.example +++ b/controller/configs/controller.yaml.example @@ -80,12 +80,7 @@ backup: monitoring: enabled: true healthchecks_base: "https://status.felhom.eu" - ping_uuids: - heartbeat: "" # Every 5 min — controller process alive - system_health: "CHANGEME-uuid-for-system-health" # Every 5 min — comprehensive system check - db_dump: "CHANGEME-uuid-for-db-dump" # Daily — after database dumps - backup: "CHANGEME-uuid-for-backup" # Daily — after restic snapshot - backup_integrity: "" # Weekly (Sunday) — restic check + # ping_uuids: (deprecated — monitoring is now handled by the Hub event system) system_health_interval: "5m" health_check_schedule: "06:00" thresholds: