v0.22.0: First-run setup wizard, local infra backup, hub verification

New controller features:
- Web-based setup wizard replaces docker-setup.sh interactive config
  - Dual listener: :8080 (Traefik) + :8081 (direct HTTP for LAN)
  - Drive scanner finds .felhom-infra-backup/ on all block devices
  - Hub recovery pull (GET /api/v1/recovery/{id}) with retrieval password
  - Fresh install: Hub config download or manual wizard
  - CSRF protection, state persistence, Hungarian UI
- Local infra backup written to all connected drives after each backup cycle
  - .felhom-infra-backup/backup.json + metadata.json with SHA256 checksum
- Hub verification: parse customer_blocked from report push response
  - Limited mode after 7 days without verification
- Recovery info page on Settings + recovery-info.txt file generation
- Pending events queue: DR events sent to Hub on next report push
- docker-setup.sh v6.0.0: removed interactive wizard, minimal controller.yaml only

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 12:33:17 +01:00
parent e217c3a445
commit 6eb75204b6
28 changed files with 2970 additions and 505 deletions
+151 -113
View File
@@ -2,7 +2,6 @@ package main
import (
"context"
"encoding/base64"
"encoding/json"
"flag"
"fmt"
@@ -22,14 +21,16 @@ import (
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
"gitea.dooplex.hu/admin/felhom-controller/internal/notify"
"gitea.dooplex.hu/admin/felhom-controller/internal/recovery"
"gitea.dooplex.hu/admin/felhom-controller/internal/report"
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
"gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/setup"
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
"gitea.dooplex.hu/admin/felhom-controller/internal/storage"
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
"gitea.dooplex.hu/admin/felhom-controller/internal/storage"
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
)
@@ -51,12 +52,23 @@ func main() {
}
// --- Load configuration ---
cfg, err := config.Load(*configPath)
// Use LoadPermissive to tolerate minimal configs (e.g. only domain set by docker-setup.sh).
// If even that fails (file missing/unreadable), fall back to defaults.
cfg, err := config.LoadPermissive(*configPath)
if err != nil {
log.Fatalf("[FATAL] Failed to load config from %s: %v", *configPath, err)
cfg = config.Default()
log.Printf("[WARN] Config load failed (%s), using defaults: %v", *configPath, err)
}
logger := setupLogger(cfg)
// --- Setup mode: if no customer ID configured, run setup wizard ---
if setup.NeedsSetup(cfg) {
logger.Printf("[INFO] felhom-controller %s — setup mode", Version)
runSetupMode(cfg, logger)
return
}
logger.Printf("[INFO] felhom-controller %s starting (customer: %s, domain: %s)",
Version, cfg.Customer.ID, cfg.Customer.Domain)
@@ -67,79 +79,6 @@ func main() {
logger.Fatalf("[FATAL] Failed to load settings from %s: %v", settingsPath, err)
}
// --- Detect fresh deployment (Phase 2+3: DR restore from Hub) ---
var restorePlan *backup.RestorePlan
isFreshDeployment := !fileExists(settingsPath)
if isFreshDeployment && cfg.Hub.Enabled && cfg.Hub.URL != "" {
logger.Println("[INFO] Fresh deployment detected — checking Hub for infra backup")
ib, pullErr := report.PullInfraBackup(cfg.Hub.URL, cfg.Hub.APIKey, cfg.Customer.ID)
if pullErr != nil {
logger.Printf("[WARN] Could not reach Hub for infra backup: %v", pullErr)
} else if ib != nil {
logger.Printf("[INFO] Found infra backup on Hub: %s (%s), %d stacks, synced %s",
ib.Domain, ib.CustomerID, len(ib.DeployedStacks), ib.Timestamp)
// Restore settings.json from Hub backup first
restoreSettingsFromHub(ib, cfg, logger)
// Re-load settings (now from restored file)
if restoredSett, loadErr := settings.Load(settingsPath, logger); loadErr == nil {
sett = restoredSett
logger.Println("[INFO] Settings reloaded after Hub restore")
}
// Restore restic passwords AFTER settings reload so cross-drive password persists
restorePasswordsFromHub(ib, cfg, sett, logger)
// Mount drives using stored disk layout
mountCtx, mountCancel := context.WithTimeout(context.Background(), 2*time.Minute)
mountedPaths, mountErr := backup.MountDrivesFromLayout(mountCtx, ib.DiskLayout, logger)
mountCancel()
if mountErr != nil {
logger.Printf("[WARN] Drive mounting error: %v", mountErr)
} else if len(mountedPaths) > 0 {
logger.Printf("[INFO] Mounted %d drives from Hub disk layout: %v", len(mountedPaths), mountedPaths)
} else {
logger.Println("[INFO] No matching drives found to mount from Hub disk layout")
}
// Phase 3: Scan mounted drives for backup data and build restore plan
if len(ib.DeployedStacks) > 0 {
// Collect mount paths from disk layout
var drivePaths []string
for _, dm := range ib.DiskLayout.Mounts {
if dm.MountPoint != "" {
drivePaths = append(drivePaths, dm.MountPoint)
}
}
// Convert report stacks to backup scan format
var infraStacks []backup.InfraStackInfo
for _, s := range ib.DeployedStacks {
infraStacks = append(infraStacks, backup.InfraStackInfo{
Name: s.Name,
DisplayName: s.DisplayName,
HDDPath: s.HDDPath,
NeedsHDD: s.NeedsHDD,
})
}
restorePlan = backup.ScanDrivesForBackups(drivePaths, infraStacks, logger)
if restorePlan != nil {
restorePlan.CustomerID = ib.CustomerID
restorePlan.Domain = ib.Domain
restorePlan.Timestamp = ib.Timestamp
logger.Printf("[INFO] DR restore plan ready: %d apps to restore", len(restorePlan.Apps))
} else {
logger.Println("[WARN] ScanDrivesForBackups returned nil — no restore plan created")
}
}
} else {
logger.Println("[INFO] No infra backup found on Hub for this customer")
}
}
// --- Auto-discover storage paths from deployed apps ---
discoveredPaths := discoverHDDPaths(cfg.Paths.StacksDir, logger)
sett.AutoDiscoverStoragePaths(discoveredPaths, cfg.Paths.HDDPath, logger)
@@ -304,6 +243,15 @@ func main() {
var hubPusher *report.Pusher
if cfg.Hub.URL != "" && cfg.Hub.APIKey != "" {
hubPusher = report.NewPusher(&cfg.Hub, logger)
// Wire hub verification: update settings when hub reports customer status
hubPusher.OnPushResponse = func(resp *report.PushResponse) {
if resp.CustomerBlocked {
sett.SetHubVerified(false, time.Now())
logger.Printf("[WARN] Customer blocked on Hub — new deployments may be restricted")
} else {
sett.SetHubVerified(true, time.Now())
}
}
// Wire hub push status into alert manager for dashboard alerts
alertMgr.SetHubPushStatus(func() web.HubPushStatusData {
s := hubPusher.GetStatus()
@@ -350,6 +298,8 @@ func main() {
if hubPusher != nil && cfg.Hub.Enabled {
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
}
// Write local infra backup to all connected drives
go writeLocalInfraBackup(cfg, sett, stackProv, logger)
return err
})
@@ -397,7 +347,17 @@ func main() {
}
sched.Every("hub-report", pushInterval, func(ctx context.Context) error {
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths())
return hubPusher.Push(r)
if err := hubPusher.Push(r); err != nil {
return err
}
// Drain pending events (e.g., DR recovery completed) after successful push
if events := sett.DrainPendingEvents(); len(events) > 0 {
for _, ev := range events {
notifier.Notify(ev.EventType, ev.Severity, ev.Message, ev.Details)
}
logger.Printf("[INFO] Drained %d pending events to Hub", len(events))
}
return nil
})
logger.Printf("[INFO] Hub reporting enabled (every %s to %s)", pushInterval, cfg.Hub.URL)
} else {
@@ -468,6 +428,22 @@ func main() {
sched.Start(ctx)
defer sched.Stop()
// Generate recovery info file if retrieval password is set
if rp := sett.GetRetrievalPassword(); rp != "" {
go func() {
info := recovery.Info{
CustomerID: cfg.Customer.ID,
RetrievalPassword: rp,
HubURL: cfg.Hub.URL,
SupportEmail: "support@felhom.eu",
SupportURL: "https://felhom.eu/kapcsolat",
}
if err := recovery.GenerateRecoveryFile(info, Version, cfg.Paths.DataDir); err != nil {
logger.Printf("[WARN] Failed to generate recovery-info.txt: %v", err)
}
}()
}
// Fire startup pings + hub report immediately (don't wait for first scheduler tick)
go func() {
time.Sleep(5 * time.Second) // Let all subsystems fully initialize
@@ -511,6 +487,8 @@ func main() {
}
// Also push infra backup on startup
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
// Write local infra backup to all connected drives
go writeLocalInfraBackup(cfg, sett, stackProv, logger)
} else {
// Send a minimal "disabled" notification so hub knows reporting is intentionally off
r := &report.Report{
@@ -632,12 +610,6 @@ func main() {
backupMgr.MigrationActiveCheck = driveMigrator.IsActive
}
// Phase 3: Set DR restore mode if a restore plan was built
if restorePlan != nil && len(restorePlan.Apps) > 0 {
webServer.SetRestoreState(restorePlan)
logger.Println("[INFO] DR restore mode activated — all web routes redirect to /restore")
}
// --- Build HTTP mux ---
mux := http.NewServeMux()
@@ -923,46 +895,112 @@ func fileExists(path string) bool {
return err == nil
}
// restorePasswordsFromHub restores restic passwords from a Hub infra backup.
func restorePasswordsFromHub(ib *report.InfraBackup, cfg *config.Config,
sett *settings.Settings, logger *log.Logger) {
// runSetupMode starts the setup wizard on dual listeners and blocks until signal.
func runSetupMode(cfg *config.Config, logger *log.Logger) {
ips := setup.DetectLocalIPs()
setup.LogSetupMode(cfg.Customer.Domain, ips, cfg.Web.SetupListen, logger)
if ib.ResticPassword != "" {
decoded, err := base64.StdEncoding.DecodeString(ib.ResticPassword)
if err == nil && len(decoded) > 0 {
dir := filepath.Dir(cfg.Backup.ResticPasswordFile)
if err := os.MkdirAll(dir, 0700); err != nil {
logger.Printf("[WARN] Failed to create restic password directory %s: %v", dir, err)
} else if err := os.WriteFile(cfg.Backup.ResticPasswordFile, decoded, 0600); err == nil {
logger.Println("[INFO] Primary restic password restored from Hub")
} else {
logger.Printf("[WARN] Failed to write restic password file: %v", err)
}
}
setupSrv := setup.NewServer(cfg, cfg.Paths.DataDir, logger, Version)
handler := setupSrv.Handler()
// Health endpoint wrapper (returns setup_mode: true)
healthHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"ok": true, "message": "felhom-controller is healthy",
"setup_mode": true, "version": Version,
})
})
// Mux for both listeners
mux := http.NewServeMux()
mux.HandleFunc("/api/health", healthHandler)
mux.Handle("/", handler)
// Start main listener (:8080, behind Traefik for domain access)
mainServer := &http.Server{
Addr: cfg.Web.Listen,
Handler: mux,
ReadTimeout: 30 * time.Second,
WriteTimeout: 60 * time.Second,
IdleTimeout: 120 * time.Second,
}
go func() {
logger.Printf("[INFO] Setup wizard (main) listening on %s", cfg.Web.Listen)
if err := mainServer.ListenAndServe(); err != http.ErrServerClosed {
logger.Printf("[ERROR] Main HTTP server error: %v", err)
}
}()
// Start setup-only listener (:8081, direct HTTP for LAN access)
setupServer := &http.Server{
Addr: cfg.Web.SetupListen,
Handler: mux,
ReadTimeout: 30 * time.Second,
WriteTimeout: 60 * time.Second,
IdleTimeout: 120 * time.Second,
}
go func() {
logger.Printf("[INFO] Setup wizard (LAN) listening on %s", cfg.Web.SetupListen)
if err := setupServer.ListenAndServe(); err != http.ErrServerClosed {
logger.Printf("[ERROR] Setup HTTP server error: %v", err)
}
}()
// Wait for signal
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
sig := <-sigCh
logger.Printf("[INFO] Received signal %v, shutting down setup wizard...", sig)
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second)
defer shutdownCancel()
mainServer.Shutdown(shutdownCtx)
setupServer.Shutdown(shutdownCtx)
logger.Println("[INFO] Setup wizard stopped")
}
// restoreSettingsFromHub restores settings.json from a Hub infra backup.
func restoreSettingsFromHub(ib *report.InfraBackup, cfg *config.Config, logger *log.Logger) {
if ib.SettingsJSONB64 == "" {
return
}
decoded, err := base64.StdEncoding.DecodeString(ib.SettingsJSONB64)
// writeLocalInfraBackup builds an infra snapshot and writes it to all connected drives.
func writeLocalInfraBackup(cfg *config.Config, sett *settings.Settings,
stackProv *stackAdapter, logger *log.Logger) {
ib, err := report.BuildInfraBackup(
cfg.Customer.ID, cfg.Customer.Domain, Version,
"/opt/docker/felhom-controller/controller.yaml",
filepath.Join(cfg.Paths.DataDir, "settings.json"),
cfg.Backup.ResticPasswordFile,
cfg.Paths.SystemDataPath,
sett, stackProv, logger,
)
if err != nil {
logger.Printf("[WARN] Failed to decode settings from Hub: %v", err)
logger.Printf("[WARN] Failed to build infra backup for local write: %v", err)
return
}
if err := os.MkdirAll(cfg.Paths.DataDir, 0755); err != nil {
logger.Printf("[WARN] Failed to create data directory for settings restore: %v", err)
data, err := json.Marshal(ib)
if err != nil {
logger.Printf("[WARN] Failed to marshal infra backup for local write: %v", err)
return
}
settingsPath := filepath.Join(cfg.Paths.DataDir, "settings.json")
if err := os.WriteFile(settingsPath, decoded, 0600); err != nil {
logger.Printf("[WARN] Failed to write restored settings.json: %v", err)
} else {
logger.Println("[INFO] Settings restored from Hub backup")
// Collect all connected drive paths (skip disconnected and decommissioned)
var drives []string
for _, sp := range sett.GetStoragePaths() {
if !sp.Disconnected && !sp.Decommissioned {
drives = append(drives, sp.Path)
}
}
// Also include system data path if set
if cfg.Paths.SystemDataPath != "" {
drives = append(drives, cfg.Paths.SystemDataPath)
}
if len(drives) == 0 {
logger.Println("[DEBUG] No connected drives for local infra backup")
return
}
backup.WriteLocalInfraBackup(data, cfg.Customer.ID, Version, ib.Timestamp, drives, logger)
}
// discoverHDDPaths scans deployed apps' app.yaml for HDD_PATH env values.