v0.22.0: First-run setup wizard, local infra backup, hub verification
New controller features:
- Web-based setup wizard replaces docker-setup.sh interactive config
- Dual listener: :8080 (Traefik) + :8081 (direct HTTP for LAN)
- Drive scanner finds .felhom-infra-backup/ on all block devices
- Hub recovery pull (GET /api/v1/recovery/{id}) with retrieval password
- Fresh install: Hub config download or manual wizard
- CSRF protection, state persistence, Hungarian UI
- Local infra backup written to all connected drives after each backup cycle
- .felhom-infra-backup/backup.json + metadata.json with SHA256 checksum
- Hub verification: parse customer_blocked from report push response
- Limited mode after 7 days without verification
- Recovery info page on Settings + recovery-info.txt file generation
- Pending events queue: DR events sent to Hub on next report push
- docker-setup.sh v6.0.0: removed interactive wizard, minimal controller.yaml only
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+151
-113
@@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
@@ -22,14 +21,16 @@ import (
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/notify"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/recovery"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/report"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/setup"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/storage"
|
||||
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/storage"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
|
||||
)
|
||||
|
||||
@@ -51,12 +52,23 @@ func main() {
|
||||
}
|
||||
|
||||
// --- Load configuration ---
|
||||
cfg, err := config.Load(*configPath)
|
||||
// Use LoadPermissive to tolerate minimal configs (e.g. only domain set by docker-setup.sh).
|
||||
// If even that fails (file missing/unreadable), fall back to defaults.
|
||||
cfg, err := config.LoadPermissive(*configPath)
|
||||
if err != nil {
|
||||
log.Fatalf("[FATAL] Failed to load config from %s: %v", *configPath, err)
|
||||
cfg = config.Default()
|
||||
log.Printf("[WARN] Config load failed (%s), using defaults: %v", *configPath, err)
|
||||
}
|
||||
|
||||
logger := setupLogger(cfg)
|
||||
|
||||
// --- Setup mode: if no customer ID configured, run setup wizard ---
|
||||
if setup.NeedsSetup(cfg) {
|
||||
logger.Printf("[INFO] felhom-controller %s — setup mode", Version)
|
||||
runSetupMode(cfg, logger)
|
||||
return
|
||||
}
|
||||
|
||||
logger.Printf("[INFO] felhom-controller %s starting (customer: %s, domain: %s)",
|
||||
Version, cfg.Customer.ID, cfg.Customer.Domain)
|
||||
|
||||
@@ -67,79 +79,6 @@ func main() {
|
||||
logger.Fatalf("[FATAL] Failed to load settings from %s: %v", settingsPath, err)
|
||||
}
|
||||
|
||||
// --- Detect fresh deployment (Phase 2+3: DR restore from Hub) ---
|
||||
var restorePlan *backup.RestorePlan
|
||||
isFreshDeployment := !fileExists(settingsPath)
|
||||
if isFreshDeployment && cfg.Hub.Enabled && cfg.Hub.URL != "" {
|
||||
logger.Println("[INFO] Fresh deployment detected — checking Hub for infra backup")
|
||||
|
||||
ib, pullErr := report.PullInfraBackup(cfg.Hub.URL, cfg.Hub.APIKey, cfg.Customer.ID)
|
||||
if pullErr != nil {
|
||||
logger.Printf("[WARN] Could not reach Hub for infra backup: %v", pullErr)
|
||||
} else if ib != nil {
|
||||
logger.Printf("[INFO] Found infra backup on Hub: %s (%s), %d stacks, synced %s",
|
||||
ib.Domain, ib.CustomerID, len(ib.DeployedStacks), ib.Timestamp)
|
||||
|
||||
// Restore settings.json from Hub backup first
|
||||
restoreSettingsFromHub(ib, cfg, logger)
|
||||
|
||||
// Re-load settings (now from restored file)
|
||||
if restoredSett, loadErr := settings.Load(settingsPath, logger); loadErr == nil {
|
||||
sett = restoredSett
|
||||
logger.Println("[INFO] Settings reloaded after Hub restore")
|
||||
}
|
||||
|
||||
// Restore restic passwords AFTER settings reload so cross-drive password persists
|
||||
restorePasswordsFromHub(ib, cfg, sett, logger)
|
||||
|
||||
// Mount drives using stored disk layout
|
||||
mountCtx, mountCancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
mountedPaths, mountErr := backup.MountDrivesFromLayout(mountCtx, ib.DiskLayout, logger)
|
||||
mountCancel()
|
||||
if mountErr != nil {
|
||||
logger.Printf("[WARN] Drive mounting error: %v", mountErr)
|
||||
} else if len(mountedPaths) > 0 {
|
||||
logger.Printf("[INFO] Mounted %d drives from Hub disk layout: %v", len(mountedPaths), mountedPaths)
|
||||
} else {
|
||||
logger.Println("[INFO] No matching drives found to mount from Hub disk layout")
|
||||
}
|
||||
|
||||
// Phase 3: Scan mounted drives for backup data and build restore plan
|
||||
if len(ib.DeployedStacks) > 0 {
|
||||
// Collect mount paths from disk layout
|
||||
var drivePaths []string
|
||||
for _, dm := range ib.DiskLayout.Mounts {
|
||||
if dm.MountPoint != "" {
|
||||
drivePaths = append(drivePaths, dm.MountPoint)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert report stacks to backup scan format
|
||||
var infraStacks []backup.InfraStackInfo
|
||||
for _, s := range ib.DeployedStacks {
|
||||
infraStacks = append(infraStacks, backup.InfraStackInfo{
|
||||
Name: s.Name,
|
||||
DisplayName: s.DisplayName,
|
||||
HDDPath: s.HDDPath,
|
||||
NeedsHDD: s.NeedsHDD,
|
||||
})
|
||||
}
|
||||
|
||||
restorePlan = backup.ScanDrivesForBackups(drivePaths, infraStacks, logger)
|
||||
if restorePlan != nil {
|
||||
restorePlan.CustomerID = ib.CustomerID
|
||||
restorePlan.Domain = ib.Domain
|
||||
restorePlan.Timestamp = ib.Timestamp
|
||||
logger.Printf("[INFO] DR restore plan ready: %d apps to restore", len(restorePlan.Apps))
|
||||
} else {
|
||||
logger.Println("[WARN] ScanDrivesForBackups returned nil — no restore plan created")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logger.Println("[INFO] No infra backup found on Hub for this customer")
|
||||
}
|
||||
}
|
||||
|
||||
// --- Auto-discover storage paths from deployed apps ---
|
||||
discoveredPaths := discoverHDDPaths(cfg.Paths.StacksDir, logger)
|
||||
sett.AutoDiscoverStoragePaths(discoveredPaths, cfg.Paths.HDDPath, logger)
|
||||
@@ -304,6 +243,15 @@ func main() {
|
||||
var hubPusher *report.Pusher
|
||||
if cfg.Hub.URL != "" && cfg.Hub.APIKey != "" {
|
||||
hubPusher = report.NewPusher(&cfg.Hub, logger)
|
||||
// Wire hub verification: update settings when hub reports customer status
|
||||
hubPusher.OnPushResponse = func(resp *report.PushResponse) {
|
||||
if resp.CustomerBlocked {
|
||||
sett.SetHubVerified(false, time.Now())
|
||||
logger.Printf("[WARN] Customer blocked on Hub — new deployments may be restricted")
|
||||
} else {
|
||||
sett.SetHubVerified(true, time.Now())
|
||||
}
|
||||
}
|
||||
// Wire hub push status into alert manager for dashboard alerts
|
||||
alertMgr.SetHubPushStatus(func() web.HubPushStatusData {
|
||||
s := hubPusher.GetStatus()
|
||||
@@ -350,6 +298,8 @@ func main() {
|
||||
if hubPusher != nil && cfg.Hub.Enabled {
|
||||
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
|
||||
}
|
||||
// Write local infra backup to all connected drives
|
||||
go writeLocalInfraBackup(cfg, sett, stackProv, logger)
|
||||
return err
|
||||
})
|
||||
|
||||
@@ -397,7 +347,17 @@ func main() {
|
||||
}
|
||||
sched.Every("hub-report", pushInterval, func(ctx context.Context) error {
|
||||
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths())
|
||||
return hubPusher.Push(r)
|
||||
if err := hubPusher.Push(r); err != nil {
|
||||
return err
|
||||
}
|
||||
// Drain pending events (e.g., DR recovery completed) after successful push
|
||||
if events := sett.DrainPendingEvents(); len(events) > 0 {
|
||||
for _, ev := range events {
|
||||
notifier.Notify(ev.EventType, ev.Severity, ev.Message, ev.Details)
|
||||
}
|
||||
logger.Printf("[INFO] Drained %d pending events to Hub", len(events))
|
||||
}
|
||||
return nil
|
||||
})
|
||||
logger.Printf("[INFO] Hub reporting enabled (every %s to %s)", pushInterval, cfg.Hub.URL)
|
||||
} else {
|
||||
@@ -468,6 +428,22 @@ func main() {
|
||||
sched.Start(ctx)
|
||||
defer sched.Stop()
|
||||
|
||||
// Generate recovery info file if retrieval password is set
|
||||
if rp := sett.GetRetrievalPassword(); rp != "" {
|
||||
go func() {
|
||||
info := recovery.Info{
|
||||
CustomerID: cfg.Customer.ID,
|
||||
RetrievalPassword: rp,
|
||||
HubURL: cfg.Hub.URL,
|
||||
SupportEmail: "support@felhom.eu",
|
||||
SupportURL: "https://felhom.eu/kapcsolat",
|
||||
}
|
||||
if err := recovery.GenerateRecoveryFile(info, Version, cfg.Paths.DataDir); err != nil {
|
||||
logger.Printf("[WARN] Failed to generate recovery-info.txt: %v", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Fire startup pings + hub report immediately (don't wait for first scheduler tick)
|
||||
go func() {
|
||||
time.Sleep(5 * time.Second) // Let all subsystems fully initialize
|
||||
@@ -511,6 +487,8 @@ func main() {
|
||||
}
|
||||
// Also push infra backup on startup
|
||||
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
|
||||
// Write local infra backup to all connected drives
|
||||
go writeLocalInfraBackup(cfg, sett, stackProv, logger)
|
||||
} else {
|
||||
// Send a minimal "disabled" notification so hub knows reporting is intentionally off
|
||||
r := &report.Report{
|
||||
@@ -632,12 +610,6 @@ func main() {
|
||||
backupMgr.MigrationActiveCheck = driveMigrator.IsActive
|
||||
}
|
||||
|
||||
// Phase 3: Set DR restore mode if a restore plan was built
|
||||
if restorePlan != nil && len(restorePlan.Apps) > 0 {
|
||||
webServer.SetRestoreState(restorePlan)
|
||||
logger.Println("[INFO] DR restore mode activated — all web routes redirect to /restore")
|
||||
}
|
||||
|
||||
// --- Build HTTP mux ---
|
||||
mux := http.NewServeMux()
|
||||
|
||||
@@ -923,46 +895,112 @@ func fileExists(path string) bool {
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// restorePasswordsFromHub restores restic passwords from a Hub infra backup.
|
||||
func restorePasswordsFromHub(ib *report.InfraBackup, cfg *config.Config,
|
||||
sett *settings.Settings, logger *log.Logger) {
|
||||
// runSetupMode starts the setup wizard on dual listeners and blocks until signal.
|
||||
func runSetupMode(cfg *config.Config, logger *log.Logger) {
|
||||
ips := setup.DetectLocalIPs()
|
||||
setup.LogSetupMode(cfg.Customer.Domain, ips, cfg.Web.SetupListen, logger)
|
||||
|
||||
if ib.ResticPassword != "" {
|
||||
decoded, err := base64.StdEncoding.DecodeString(ib.ResticPassword)
|
||||
if err == nil && len(decoded) > 0 {
|
||||
dir := filepath.Dir(cfg.Backup.ResticPasswordFile)
|
||||
if err := os.MkdirAll(dir, 0700); err != nil {
|
||||
logger.Printf("[WARN] Failed to create restic password directory %s: %v", dir, err)
|
||||
} else if err := os.WriteFile(cfg.Backup.ResticPasswordFile, decoded, 0600); err == nil {
|
||||
logger.Println("[INFO] Primary restic password restored from Hub")
|
||||
} else {
|
||||
logger.Printf("[WARN] Failed to write restic password file: %v", err)
|
||||
}
|
||||
}
|
||||
setupSrv := setup.NewServer(cfg, cfg.Paths.DataDir, logger, Version)
|
||||
handler := setupSrv.Handler()
|
||||
|
||||
// Health endpoint wrapper (returns setup_mode: true)
|
||||
healthHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"ok": true, "message": "felhom-controller is healthy",
|
||||
"setup_mode": true, "version": Version,
|
||||
})
|
||||
})
|
||||
|
||||
// Mux for both listeners
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/api/health", healthHandler)
|
||||
mux.Handle("/", handler)
|
||||
|
||||
// Start main listener (:8080, behind Traefik for domain access)
|
||||
mainServer := &http.Server{
|
||||
Addr: cfg.Web.Listen,
|
||||
Handler: mux,
|
||||
ReadTimeout: 30 * time.Second,
|
||||
WriteTimeout: 60 * time.Second,
|
||||
IdleTimeout: 120 * time.Second,
|
||||
}
|
||||
go func() {
|
||||
logger.Printf("[INFO] Setup wizard (main) listening on %s", cfg.Web.Listen)
|
||||
if err := mainServer.ListenAndServe(); err != http.ErrServerClosed {
|
||||
logger.Printf("[ERROR] Main HTTP server error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Start setup-only listener (:8081, direct HTTP for LAN access)
|
||||
setupServer := &http.Server{
|
||||
Addr: cfg.Web.SetupListen,
|
||||
Handler: mux,
|
||||
ReadTimeout: 30 * time.Second,
|
||||
WriteTimeout: 60 * time.Second,
|
||||
IdleTimeout: 120 * time.Second,
|
||||
}
|
||||
go func() {
|
||||
logger.Printf("[INFO] Setup wizard (LAN) listening on %s", cfg.Web.SetupListen)
|
||||
if err := setupServer.ListenAndServe(); err != http.ErrServerClosed {
|
||||
logger.Printf("[ERROR] Setup HTTP server error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for signal
|
||||
sigCh := make(chan os.Signal, 1)
|
||||
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
|
||||
sig := <-sigCh
|
||||
logger.Printf("[INFO] Received signal %v, shutting down setup wizard...", sig)
|
||||
|
||||
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer shutdownCancel()
|
||||
mainServer.Shutdown(shutdownCtx)
|
||||
setupServer.Shutdown(shutdownCtx)
|
||||
logger.Println("[INFO] Setup wizard stopped")
|
||||
}
|
||||
|
||||
// restoreSettingsFromHub restores settings.json from a Hub infra backup.
|
||||
func restoreSettingsFromHub(ib *report.InfraBackup, cfg *config.Config, logger *log.Logger) {
|
||||
if ib.SettingsJSONB64 == "" {
|
||||
return
|
||||
}
|
||||
decoded, err := base64.StdEncoding.DecodeString(ib.SettingsJSONB64)
|
||||
// writeLocalInfraBackup builds an infra snapshot and writes it to all connected drives.
|
||||
func writeLocalInfraBackup(cfg *config.Config, sett *settings.Settings,
|
||||
stackProv *stackAdapter, logger *log.Logger) {
|
||||
|
||||
ib, err := report.BuildInfraBackup(
|
||||
cfg.Customer.ID, cfg.Customer.Domain, Version,
|
||||
"/opt/docker/felhom-controller/controller.yaml",
|
||||
filepath.Join(cfg.Paths.DataDir, "settings.json"),
|
||||
cfg.Backup.ResticPasswordFile,
|
||||
cfg.Paths.SystemDataPath,
|
||||
sett, stackProv, logger,
|
||||
)
|
||||
if err != nil {
|
||||
logger.Printf("[WARN] Failed to decode settings from Hub: %v", err)
|
||||
logger.Printf("[WARN] Failed to build infra backup for local write: %v", err)
|
||||
return
|
||||
}
|
||||
if err := os.MkdirAll(cfg.Paths.DataDir, 0755); err != nil {
|
||||
logger.Printf("[WARN] Failed to create data directory for settings restore: %v", err)
|
||||
|
||||
data, err := json.Marshal(ib)
|
||||
if err != nil {
|
||||
logger.Printf("[WARN] Failed to marshal infra backup for local write: %v", err)
|
||||
return
|
||||
}
|
||||
settingsPath := filepath.Join(cfg.Paths.DataDir, "settings.json")
|
||||
if err := os.WriteFile(settingsPath, decoded, 0600); err != nil {
|
||||
logger.Printf("[WARN] Failed to write restored settings.json: %v", err)
|
||||
} else {
|
||||
logger.Println("[INFO] Settings restored from Hub backup")
|
||||
|
||||
// Collect all connected drive paths (skip disconnected and decommissioned)
|
||||
var drives []string
|
||||
for _, sp := range sett.GetStoragePaths() {
|
||||
if !sp.Disconnected && !sp.Decommissioned {
|
||||
drives = append(drives, sp.Path)
|
||||
}
|
||||
}
|
||||
// Also include system data path if set
|
||||
if cfg.Paths.SystemDataPath != "" {
|
||||
drives = append(drives, cfg.Paths.SystemDataPath)
|
||||
}
|
||||
|
||||
if len(drives) == 0 {
|
||||
logger.Println("[DEBUG] No connected drives for local infra backup")
|
||||
return
|
||||
}
|
||||
|
||||
backup.WriteLocalInfraBackup(data, cfg.Customer.ID, Version, ib.Timestamp, drives, logger)
|
||||
}
|
||||
|
||||
// discoverHDDPaths scans deployed apps' app.yaml for HDD_PATH env values.
|
||||
|
||||
Reference in New Issue
Block a user