Files
deploy-felhom-compose/controller/cmd/controller/main.go
T
admin 44f7fd2f19 feat: encrypt sensitive values in app.yaml with AES-256-GCM
Passwords and secrets from deploy fields (type: password/secret) are now
encrypted at rest in app.yaml using a per-node 32-byte key. Values stored
as ENC:base64(nonce+ciphertext), decrypted transparently for docker-compose
and web UI. Key included in infra backup bundle for disaster recovery.
Existing plaintext values migrated automatically on startup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 19:12:24 +01:00

1140 lines
38 KiB
Go

package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"os/signal"
"path/filepath"
"syscall"
"time"
"strings"
"gitea.dooplex.hu/admin/felhom-controller/internal/api"
"gitea.dooplex.hu/admin/felhom-controller/internal/assets"
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
"gitea.dooplex.hu/admin/felhom-controller/internal/crypto"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
"gitea.dooplex.hu/admin/felhom-controller/internal/notify"
"gitea.dooplex.hu/admin/felhom-controller/internal/recovery"
"gitea.dooplex.hu/admin/felhom-controller/internal/report"
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
"gitea.dooplex.hu/admin/felhom-controller/internal/selftest"
"gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/setup"
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
"gitea.dooplex.hu/admin/felhom-controller/internal/storage"
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
)
var (
// Set at build time via ldflags
Version = "dev"
BuildTime = "unknown"
GitCommit = "unknown"
)
func main() {
configPath := flag.String("config", "/opt/docker/felhom-controller/controller.yaml", "Path to configuration file")
showVersion := flag.Bool("version", false, "Show version and exit")
flag.Parse()
if *showVersion {
fmt.Printf("felhom-controller %s (built %s, commit %s)\n", Version, BuildTime, GitCommit)
os.Exit(0)
}
startTime := time.Now()
// --- Load configuration ---
// Use LoadPermissive to tolerate minimal configs (e.g. only domain set by docker-setup.sh).
// If even that fails (file missing/unreadable), fall back to defaults.
cfg, err := config.LoadPermissive(*configPath)
if err != nil {
cfg = config.Default()
log.Printf("[WARN] Config load failed (%s), using defaults: %v", *configPath, err)
}
logger, logBuffer := setupLogger(cfg)
// --- Setup mode: if no customer ID configured, run setup wizard ---
if setup.NeedsSetup(cfg) {
logger.Printf("[INFO] felhom-controller %s — setup mode", Version)
runSetupMode(cfg, logger)
return
}
logger.Printf("[INFO] felhom-controller %s starting (customer: %s, domain: %s)",
Version, cfg.Customer.ID, cfg.Customer.Domain)
// --- Load settings ---
settingsPath := cfg.Paths.DataDir + "/settings.json"
sett, err := settings.Load(settingsPath, logger)
if err != nil {
logger.Fatalf("[FATAL] Failed to load settings from %s: %v", settingsPath, err)
}
// --- Auto-discover storage paths from deployed apps ---
discoveredPaths := discoverHDDPaths(cfg.Paths.StacksDir, logger)
sett.AutoDiscoverStoragePaths(discoveredPaths, cfg.Paths.HDDPath, logger)
// --- Load or create encryption key ---
encKeyPath := filepath.Join(cfg.Paths.DataDir, "encryption.key")
encKey, err := crypto.LoadOrCreateKey(encKeyPath)
if err != nil {
logger.Fatalf("[FATAL] Failed to load encryption key: %v", err)
}
logger.Printf("[INFO] Encryption key loaded from %s", encKeyPath)
// --- Initialize stack manager ---
stackMgr, err := stacks.NewManager(cfg, logger)
if err != nil {
logger.Fatalf("[FATAL] Failed to initialize stack manager: %v", err)
}
stackMgr.SetEncryptionKey(encKey)
// Initial stack scan
if err := stackMgr.ScanStacks(); err != nil {
logger.Printf("[WARN] Initial stack scan failed: %v", err)
}
// Inject missing deploy fields for all deployed stacks on startup
if names := stackMgr.DeployedStackNames(); len(names) > 0 {
stackMgr.InjectMissingFields(names)
}
// Migrate existing plaintext passwords to encrypted
stackMgr.MigrateEncryption()
// --- Initialize catalog syncer ---
syncer := catalogsync.New(cfg, logger, stackMgr.ScanStacks, func(updated []string) {
stackMgr.InjectMissingFields(updated)
})
syncer.Start()
defer syncer.Stop()
// --- Graceful shutdown context ---
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// --- Start CPU collector ---
cpuCollector := system.NewCPUCollector(5 * time.Second)
cpuCollector.Start(ctx)
defer cpuCollector.Stop()
// --- Initialize metrics store + collector ---
metricsDBPath := "/opt/docker/felhom-controller/data/metrics.db"
metricsStore, err := metrics.NewMetricsStore(metricsDBPath, logger)
if err != nil {
logger.Printf("[WARN] Failed to initialize metrics store: %v — monitoring disabled", err)
} else {
logger.Printf("[INFO] Metrics store opened at %s", metricsDBPath)
}
if metricsStore != nil {
defer metricsStore.Close()
metricsHDDPath := cfg.Paths.HDDPath
if p := sett.GetDefaultStoragePath(); p != "" {
metricsHDDPath = p
}
metricsCollector := metrics.NewMetricsCollector(metricsStore, cpuCollector, metricsHDDPath, logger)
metricsCollector.Start(ctx)
defer metricsCollector.Stop()
logger.Println("[INFO] Metrics collector started (60s interval)")
}
// --- Initialize health pinger (legacy, will be removed) ---
pinger := monitor.NewPinger(&cfg.Monitoring, logger)
// Deprecation notice for ping UUIDs
uuids := cfg.Monitoring.PingUUIDs
if uuids.Heartbeat != "" || uuids.SystemHealth != "" || uuids.DBDump != "" || uuids.Backup != "" || uuids.BackupIntegrity != "" {
logger.Println("[INFO] Healthchecks ping UUIDs configured but no longer used — monitoring is now handled by the Hub")
}
// --- Initialize backup manager ---
var backupMgr *backup.Manager
stackProv := &stackAdapter{
mgr: stackMgr,
getStoragePaths: func() []settings.StoragePath { return sett.GetStoragePaths() },
}
if cfg.Backup.Enabled {
backupMgr = backup.NewManager(cfg, pinger, sett, logger)
backupMgr.SetStackProvider(stackProv)
backupMgr.AfterBackup = func() {
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
backupMgr.RefreshCache(nextDBDump, nextBackup)
}
go backupMgr.LoadSnapshotHistory()
}
// --- Initialize cross-drive backup runner ---
crossDriveRunner := backup.NewCrossDriveRunner(sett, stackProv, cfg.Paths.SystemDataPath, cfg.Paths.StacksDir, logger, cfg.Logging.Level == "debug")
// Wire cross-drive → backup manager for pre-backup DB dumps
if backupMgr != nil {
crossDriveRunner.SetDBDumper(backupMgr)
}
// --- Initialize alert manager ---
alertMgr := web.NewAlertManager(logger)
// --- Initialize notifier ---
notifier := notify.New(cfg.Hub.URL, cfg.Hub.APIKey, cfg.Customer.ID, sett, logger, cfg.Logging.Level == "debug")
// --- Initialize self-updater ---
var updater *selfupdate.Updater
if cfg.SelfUpdate.Enabled {
composePath := filepath.Join(filepath.Dir(cfg.Paths.DataDir), "docker-compose.yml")
updater = selfupdate.NewUpdater(&cfg.SelfUpdate, &cfg.Git, Version, cfg.Paths.DataDir, composePath, logger, cfg.Logging.Level == "debug")
updater.SetBackupRunningCheck(func() bool {
return backupMgr != nil && backupMgr.IsRunning()
})
// Check for post-update state (did a previous update succeed or fail?)
if state := updater.VerifyStartup(); state != nil {
notifier.NotifyControllerUpdated(state.PreviousVersion, state.TargetVersion, state.Status == "success")
}
logger.Printf("[INFO] Self-update enabled (check every %s, auto-update: %v, auto-update time: %s)",
cfg.SelfUpdate.CheckInterval, cfg.SelfUpdate.AutoUpdate, cfg.SelfUpdate.AutoUpdateTime)
}
// --- Initialize scheduler ---
sched := scheduler.New(logger)
// Existing periodic tasks (migrated from ad-hoc goroutines)
sched.Every("status-refresh", 30*time.Second, func(ctx context.Context) error {
return stackMgr.RefreshStatus()
})
sched.Every("stack-scan", 2*time.Minute, func(ctx context.Context) error {
return stackMgr.ScanStacks()
})
// Heartbeat — lightweight "I'm alive" signal
sched.Every("heartbeat", 5*time.Minute, func(ctx context.Context) error {
pinger.Ping(cfg.Monitoring.PingUUIDs.Heartbeat, "")
return nil
})
// System health ping
healthInterval, err := time.ParseDuration(cfg.Monitoring.SystemHealthInterval)
if err != nil {
healthInterval = 5 * time.Minute
}
sched.Every("system-health", healthInterval, func(ctx context.Context) error {
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger)
body := healthReport.FormatMessage()
healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth
if healthReport.Status == "fail" {
pinger.Fail(healthUUID, body)
} else {
pinger.Ping(healthUUID, body)
}
// Refresh dashboard alerts from health report
updateAvailable := false
latestVersion := ""
if updater != nil {
status := updater.GetStatus()
if status.LastCheck != nil {
updateAvailable = status.LastCheck.UpdateAvailable
latestVersion = status.LastCheck.LatestVersion
}
}
alertMgr.Refresh(healthReport, cfg, backupMgr, updateAvailable, latestVersion, sett.GetStoragePaths())
// Notify on health status changes
notifier.NotifyHealthChange(healthReport.Status, healthReport.Issues, healthReport.Warnings)
return nil
})
// --- Central hub pusher (declared early so backup closure can reference it) ---
var hubPusher *report.Pusher
if cfg.Hub.URL != "" && cfg.Hub.APIKey != "" {
hubPusher = report.NewPusher(&cfg.Hub, logger, cfg.Logging.Level == "debug")
// Wire hub verification: update settings when hub reports customer status
hubPusher.OnPushResponse = func(resp *report.PushResponse) {
if resp.CustomerBlocked {
sett.SetHubVerified(false, time.Now())
logger.Printf("[WARN] Customer blocked on Hub — new deployments may be restricted")
} else {
sett.SetHubVerified(true, time.Now())
}
}
// Wire hub push status into alert manager for dashboard alerts
alertMgr.SetHubPushStatus(func() web.HubPushStatusData {
s := hubPusher.GetStatus()
return web.HubPushStatusData{
LastAttempt: s.LastAttempt,
LastSuccess: s.LastSuccess,
LastError: s.LastError,
Consecutive: s.Consecutive,
}
})
}
// Backup daily jobs
if cfg.Backup.Enabled && backupMgr != nil {
sched.Daily("db-dump", cfg.Backup.DBDumpSchedule, func(ctx context.Context) error {
err := backupMgr.RunDBDumps(ctx)
if err != nil {
notifier.NotifyDBDumpFailed("Adatbázis mentés sikertelen", err.Error())
} else {
notifier.NotifyDBDumpCompleted(notify.DBDumpDetails{})
}
return err
})
sched.Daily("backup", cfg.Backup.ResticSchedule, func(ctx context.Context) error {
err := backupMgr.RunBackup(ctx)
if err != nil {
notifier.NotifyBackupFailed("Biztonsági mentés sikertelen", err.Error())
} else {
notifier.NotifyBackupCompleted(notify.BackupDetails{})
}
// Phase 3: Chain cross-drive backups immediately after restic (regardless of restic success)
// Daily jobs run every night; weekly jobs only on Sunday
if crossDriveRunner != nil {
if cdErr := crossDriveRunner.RunAllScheduled(ctx, "daily"); cdErr != nil {
logger.Printf("[WARN] Cross-drive daily backup had errors: %v", cdErr)
}
if time.Now().Weekday() == time.Sunday {
if cdErr := crossDriveRunner.RunAllScheduled(ctx, "weekly"); cdErr != nil {
logger.Printf("[WARN] Cross-drive weekly backup had errors: %v", cdErr)
}
}
}
// Push infra backup to Hub after all backup tiers complete
if hubPusher != nil && cfg.Hub.Enabled {
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
}
// Write local infra backup to all connected drives
go writeLocalInfraBackup(cfg, sett, stackProv, logger)
return err
})
// Weekly integrity check — Sunday 04:00
sched.Daily("backup-integrity", "04:00", func(ctx context.Context) error {
if time.Now().Weekday() != time.Sunday {
return nil
}
err := backupMgr.RunIntegrityCheck(ctx)
if err != nil {
notifier.NotifyIntegrityFailed("Mentés integritás ellenőrzés sikertelen", err.Error())
} else {
notifier.NotifyIntegrityOK("Mentés integritás ellenőrzés sikeres")
}
return err
})
// Cache refresh: every 5 minutes
sched.Every("backup-cache", 5*time.Minute, func(ctx context.Context) error {
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
backupMgr.RefreshCache(nextDBDump, nextBackup)
return nil
})
}
// Metrics prune — daily at 04:00
if metricsStore != nil {
sched.Daily("metrics-prune", "04:00", func(ctx context.Context) error {
deleted, err := metricsStore.Prune(30 * 24 * time.Hour)
if err != nil {
return err
}
logger.Printf("[INFO] Pruned %d old metric rows", deleted)
return nil
})
}
// --- Central hub reporting schedule ---
if hubPusher != nil {
if cfg.Hub.Enabled {
pushInterval, err := time.ParseDuration(cfg.Hub.PushInterval)
if err != nil {
pushInterval = 15 * time.Minute
}
sched.Every("hub-report", pushInterval, func(ctx context.Context) error {
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger)
if err := hubPusher.Push(r); err != nil {
return err
}
// Drain pending events (e.g., DR recovery completed) after successful push
if events := sett.DrainPendingEvents(); len(events) > 0 {
for _, ev := range events {
notifier.Notify(ev.EventType, ev.Severity, ev.Message, ev.Details)
}
logger.Printf("[INFO] Drained %d pending events to Hub", len(events))
}
return nil
})
logger.Printf("[INFO] Hub reporting enabled (every %s to %s)", pushInterval, cfg.Hub.URL)
} else {
logger.Printf("[INFO] Hub reporting disabled — will send disabled notification to %s", cfg.Hub.URL)
}
}
// Self-update scheduler jobs
if cfg.SelfUpdate.Enabled && updater != nil {
// Periodic version check (populates UI, never triggers update)
checkInterval, ciErr := time.ParseDuration(cfg.SelfUpdate.CheckInterval)
if ciErr != nil {
checkInterval = 6 * time.Hour
}
sched.Every("selfupdate-check", checkInterval, func(ctx context.Context) error {
result := updater.CheckForUpdate()
if result.UpdateAvailable {
logger.Printf("[INFO] Update available: %s -> %s", result.CurrentVersion, result.LatestVersion)
}
return nil
})
// Auto-update (daily, fires after typical backup completion)
if cfg.SelfUpdate.AutoUpdate {
sched.Daily("selfupdate-auto", cfg.SelfUpdate.AutoUpdateTime, func(ctx context.Context) error {
result := updater.CheckForUpdate()
if !result.UpdateAvailable {
return nil
}
if err := updater.TriggerUpdate("auto"); err != nil {
logger.Printf("[WARN] Auto-update skipped: %v", err)
}
return nil
})
}
}
// --- Storage watchdog ---
storageWatchdog := monitor.NewStorageWatchdog(sett, &watchdogStackAdapter{mgr: stackMgr}, notifier, cfg, logger)
storageWatchdog.SetAlertRefresh(func() {
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger)
updateAvailable := false
latestVersion := ""
if updater != nil {
status := updater.GetStatus()
if status.LastCheck != nil {
updateAvailable = status.LastCheck.UpdateAvailable
latestVersion = status.LastCheck.LatestVersion
}
}
alertMgr.Refresh(healthReport, cfg, backupMgr, updateAvailable, latestVersion, sett.GetStoragePaths())
})
if hubPusher != nil {
storageWatchdog.SetHubReportPusher(func() {
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger)
hubPusher.Push(r)
})
}
if backupMgr != nil {
storageWatchdog.SetRepoUnlocker(func(ctx context.Context, repoPath string) error {
return backupMgr.UnlockRepo(ctx, repoPath)
})
}
sched.Every("storage-watchdog", 5*time.Second, func(ctx context.Context) error {
return storageWatchdog.Check(ctx)
})
// --- Asset syncer (download from Hub) ---
var assetsSyncer *assets.Syncer
if cfg.Hub.Enabled && cfg.Assets.SyncEnabled && cfg.Hub.URL != "" && cfg.Hub.APIKey != "" {
assetsDir := filepath.Join(cfg.Paths.DataDir, "assets")
assetsSyncer = assets.New(cfg.Hub.URL, cfg.Hub.APIKey, assetsDir, "/usr/share/felhom/assets", logger, cfg.Logging.Level == "debug")
go func() {
time.Sleep(10 * time.Second)
if err := assetsSyncer.Sync(ctx); err != nil {
logger.Printf("[WARN] Initial asset sync failed: %v", err)
}
}()
sched.Daily("asset-sync", cfg.Assets.SyncSchedule, func(ctx context.Context) error {
return assetsSyncer.Sync(ctx)
})
logger.Printf("[INFO] Asset sync enabled (daily at %s from Hub)", cfg.Assets.SyncSchedule)
}
// --- Startup self-test ---
selfTestResult := selftest.Run(cfg, sett, logger)
sched.Start(ctx)
defer sched.Stop()
// Generate recovery info file if retrieval password is set
if rp := sett.GetRetrievalPassword(); rp != "" {
go func() {
info := recovery.Info{
CustomerID: cfg.Customer.ID,
RetrievalPassword: rp,
HubURL: cfg.Hub.URL,
SupportEmail: "support@felhom.eu",
SupportURL: "https://felhom.eu/kapcsolat",
}
if err := recovery.GenerateRecoveryFile(info, Version, cfg.Paths.DataDir); err != nil {
logger.Printf("[WARN] Failed to generate recovery-info.txt: %v", err)
}
}()
}
// Fire startup pings + hub report immediately (don't wait for first scheduler tick)
go func() {
time.Sleep(5 * time.Second) // Let all subsystems fully initialize
// Push controller startup event to Hub
notifier.NotifyControllerStarted(Version, map[string]interface{}{
"selftest_pass": selfTestResult.Pass,
"selftest_warn": selfTestResult.Warn,
"selftest_fail": selfTestResult.Fail,
})
// Heartbeat ping
pinger.Ping(cfg.Monitoring.PingUUIDs.Heartbeat, "startup")
logger.Println("[INFO] Startup heartbeat ping sent")
// System health ping
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger)
body := healthReport.FormatMessage()
healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth
if healthReport.Status == "fail" {
pinger.Fail(healthUUID, body)
} else {
pinger.Ping(healthUUID, body)
}
logger.Printf("[INFO] Startup health ping sent (status: %s)", healthReport.Status)
// Hub report
if hubPusher != nil {
if cfg.Hub.Enabled {
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger)
var pushErr error
for attempt := 1; attempt <= 3; attempt++ {
pushErr = hubPusher.Push(r)
if pushErr == nil {
logger.Println("[INFO] Startup hub report sent")
break
}
logger.Printf("[WARN] Startup hub report attempt %d/3 failed: %v", attempt, pushErr)
if attempt < 3 {
time.Sleep(15 * time.Second)
}
}
if pushErr != nil {
logger.Printf("[WARN] Startup hub report failed after 3 attempts — next scheduled push in %s", cfg.Hub.PushInterval)
}
// Also push infra backup on startup
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
// Write local infra backup to all connected drives
go writeLocalInfraBackup(cfg, sett, stackProv, logger)
} else {
// Send a minimal "disabled" notification so hub knows reporting is intentionally off
r := &report.Report{
Version: 1,
CustomerID: cfg.Customer.ID,
CustomerName: cfg.Customer.Name,
ControllerVersion: Version,
Timestamp: time.Now().UTC(),
ReportingDisabled: true,
Health: report.HealthReport{Status: "disabled", Issues: []string{}, Warnings: []string{}},
Stacks: report.StacksReport{Deployed: []string{}, Available: []string{}},
Containers: report.ContainerReport{List: []report.ContainerDetailReport{}},
}
hubPusher.PushOnce(r)
}
}
// Initial self-update check (so settings page shows version info quickly)
if updater != nil {
time.Sleep(25 * time.Second) // Additional delay after hub report
result := updater.CheckForUpdate()
if result.UpdateAvailable {
logger.Printf("[INFO] Startup: update available %s -> %s", result.CurrentVersion, result.LatestVersion)
} else if result.Error != "" {
logger.Printf("[DEBUG] Startup version check: %s", result.Error)
}
}
}()
// Initial backup cache population (don't block startup)
if cfg.Backup.Enabled && backupMgr != nil {
go func() {
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
backupMgr.RefreshCache(nextDBDump, nextBackup)
}()
}
// Sync notification preferences to hub on startup (handles hub DB rebuild recovery)
if notifier.IsEnabled() {
go func() {
prefs := sett.GetNotificationPrefs()
if prefs.Email != "" {
if err := notifier.SyncPreferences(prefs.Email, prefs.EnabledEvents, prefs.CooldownHours); err != nil {
logger.Printf("[WARN] Failed to sync notification preferences on startup: %v", err)
}
}
}()
}
// Initial alert refresh (so alerts appear immediately, not after first 5min health check)
go func() {
report := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger)
alertMgr.Refresh(report, cfg, backupMgr, false, "")
}()
// --- Initialize API router ---
apiRouter := api.NewRouter(cfg, *configPath, sett, stackMgr, syncer, cpuCollector, backupMgr, crossDriveRunner, metricsStore, updater, notifier, logger)
if hubPusher != nil {
apiRouter.OnConfigApplied = func() {
pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
}
apiRouter.OnCrossDriveComplete = func() {
pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
writeLocalInfraBackup(cfg, sett, stackProv, logger)
}
}
if assetsSyncer != nil {
apiRouter.SetAssetsSyncer(assetsSyncer)
}
// --- Initialize web server ---
webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, crossDriveRunner, sched, sett, alertMgr, notifier, updater, logger, Version)
webServer.SetEncryptionKey(encKey)
webServer.SetStorageWatchdog(storageWatchdog)
if assetsSyncer != nil {
webServer.SetAssetsSyncer(assetsSyncer)
}
if hubPusher != nil {
webServer.SetHubPushStatus(func() web.HubPushStatusData {
s := hubPusher.GetStatus()
return web.HubPushStatusData{
LastAttempt: s.LastAttempt,
LastSuccess: s.LastSuccess,
LastError: s.LastError,
Consecutive: s.Consecutive,
}
})
}
if logBuffer != nil {
webServer.SetLogBuffer(logBuffer)
}
webServer.SetStartTime(startTime)
// Wire debug callbacks (only in debug mode)
if cfg.Logging.Level == "debug" {
dc := &web.DebugCallbacks{}
if hubPusher != nil {
dc.TriggerHubReportPush = func() error {
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger)
return hubPusher.Push(r)
}
dc.TriggerHubInfraPush = func() error {
pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
return nil
}
}
dc.TriggerLocalInfraWrite = func() error {
writeLocalInfraBackup(cfg, sett, stackProv, logger)
return nil
}
dc.HubConnectivityTest = func() (int, int64, error) {
start := time.Now()
resp, err := http.Get(cfg.Hub.URL + "/healthz")
latency := time.Since(start).Milliseconds()
if err != nil {
return 0, latency, err
}
resp.Body.Close()
return resp.StatusCode, latency, nil
}
if cfg.Git.RepoURL != "" {
dc.GiteaConnectivityTest = func() (int, int64, error) {
start := time.Now()
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Head(cfg.Git.RepoURL)
latency := time.Since(start).Milliseconds()
if err != nil {
return 0, latency, err
}
resp.Body.Close()
return resp.StatusCode, latency, nil
}
}
dc.GetTelemetryPreview = func() ([]report.AppTelemetry, error) {
return report.BuildAppTelemetryForDebug(stackMgr, metricsStore, logger), nil
}
webServer.SetDebugCallbacks(dc)
}
// --- Initialize drive migrator ---
driveMigrator := &storage.DriveMigrator{
Sett: sett,
StackProvider: &driveMigrateStackAdapter{mgr: stackMgr},
Logger: logger,
}
// Only set BackupTrigger if backup is enabled (avoid non-nil interface with nil concrete value)
if backupMgr != nil {
driveMigrator.BackupTrigger = backupMgr
}
driveMigrator.AlertRefresh = func() {
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger)
updateAvailable := false
latestVersion := ""
if updater != nil {
status := updater.GetStatus()
if status.LastCheck != nil {
updateAvailable = status.LastCheck.UpdateAvailable
latestVersion = status.LastCheck.LatestVersion
}
}
alertMgr.Refresh(healthReport, cfg, backupMgr, updateAvailable, latestVersion, sett.GetStoragePaths())
}
if hubPusher != nil {
driveMigrator.PushHubReport = func() {
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger)
hubPusher.Push(r)
}
driveMigrator.PushInfraBackup = func() {
pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
}
}
driveMigrator.SyncFBMounts = func() {
webServer.SyncFileBrowserMounts()
}
webServer.SetDriveMigrator(driveMigrator)
// Wire migration-active check into backup manager
if backupMgr != nil {
backupMgr.MigrationActiveCheck = driveMigrator.IsActive
}
// --- Build HTTP mux ---
mux := http.NewServeMux()
// API routes (no auth for health endpoint, auth for everything else)
mux.HandleFunc("/api/health", apiRouter.HealthHandler)
// Storage API routes handled by web server (longer prefix takes precedence over /api/)
mux.Handle("/api/storage/", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(webServer.ServeStorageAPI))))
// Debug API routes handled by web server (debug-mode gating inside handler)
mux.Handle("/api/debug/", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(webServer.ServeDebugAPI))))
// Self-update API — accepts session auth OR hub API key (for external triggering)
// CsrfProtect exempts Bearer-token requests automatically.
mux.Handle("/api/selfupdate/", selfUpdateAuthMiddleware(cfg, webServer, webServer.CsrfProtect(http.HandlerFunc(apiRouter.ServeHTTP))))
// Config API — accepts session auth OR hub API key (for Hub config push)
mux.Handle("/api/config/", selfUpdateAuthMiddleware(cfg, webServer, webServer.CsrfProtect(http.HandlerFunc(apiRouter.ServeHTTP))))
mux.Handle("/api/", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(apiRouter.ServeHTTP))))
// Web UI routes (auth required)
mux.Handle("/", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(webServer.ServeHTTP))))
// --- Start HTTP server ---
server := &http.Server{
Addr: cfg.Web.Listen,
Handler: webServer.CatchAllMiddleware(mux),
ReadTimeout: 30 * time.Second,
WriteTimeout: 60 * time.Second,
IdleTimeout: 120 * time.Second,
}
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-sigCh
logger.Printf("[INFO] Received signal %v, shutting down...", sig)
cancel()
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 15*time.Second)
defer shutdownCancel()
if err := server.Shutdown(shutdownCtx); err != nil {
logger.Printf("[ERROR] HTTP server shutdown error: %v", err)
}
}()
logger.Printf("[INFO] Web UI listening on %s", cfg.Web.Listen)
if err := server.ListenAndServe(); err != http.ErrServerClosed {
logger.Fatalf("[FATAL] HTTP server error: %v", err)
}
logger.Println("[INFO] felhom-controller stopped")
}
// selfUpdateAuthMiddleware allows access via session auth (normal UI) OR hub API key bearer token (external).
func selfUpdateAuthMiddleware(cfg *config.Config, webServer *web.Server, next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Check bearer token first (for external API calls: hub, build scripts)
if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "Bearer ") {
token := strings.TrimPrefix(auth, "Bearer ")
if token != "" && cfg.Hub.APIKey != "" && token == cfg.Hub.APIKey {
next.ServeHTTP(w, r)
return
}
}
// Fall back to session auth
webServer.RequireAuth(next).ServeHTTP(w, r)
})
}
func setupLogger(cfg *config.Config) (*log.Logger, *web.LogBuffer) {
if cfg.Logging.Level == "debug" {
logBuffer := web.NewLogBuffer(1000)
logger := log.New(io.MultiWriter(os.Stdout, logBuffer), "", log.LstdFlags|log.Lshortfile)
return logger, logBuffer
}
return log.New(os.Stdout, "", log.LstdFlags), nil
}
// stackAdapter implements backup.StackDataProvider using stacks.Manager.
type stackAdapter struct {
mgr *stacks.Manager
getStoragePaths func() []settings.StoragePath
}
func (a *stackAdapter) GetStackComposePath(name string) (string, bool) {
s, ok := a.mgr.GetStack(name)
if !ok {
return "", false
}
return s.ComposePath, true
}
func (a *stackAdapter) ListDeployedStacks() []backup.StackSummary {
var result []backup.StackSummary
for _, s := range a.mgr.GetStacks() {
if !s.Deployed {
continue
}
result = append(result, backup.StackSummary{
Name: s.Name,
DisplayName: s.Meta.DisplayName,
ComposePath: s.ComposePath,
NeedsHDD: s.Meta.Resources.NeedsHDD,
})
}
return result
}
func (a *stackAdapter) StopStack(name string) error {
return a.mgr.StopStack(name)
}
func (a *stackAdapter) StartStack(name string) error {
return a.mgr.StartStack(name)
}
func (a *stackAdapter) GetStackHDDMounts(name string) []string {
s, ok := a.mgr.GetStack(name)
if !ok {
return nil
}
// Priority 1: Read the app's own HDD_PATH from its app.yaml
stackDir := filepath.Dir(s.ComposePath)
appCfg := stacks.LoadAppConfig(stackDir)
if appCfg != nil && appCfg.Env["HDD_PATH"] != "" {
return stacks.ParseComposeHDDMounts(s.ComposePath, appCfg.Env["HDD_PATH"])
}
// Priority 2: Try all registered storage paths (fallback)
var allMounts []string
seen := make(map[string]bool)
for _, sp := range a.getStoragePaths() {
mounts := stacks.ParseComposeHDDMounts(s.ComposePath, sp.Path)
for _, m := range mounts {
if !seen[m] {
seen[m] = true
allMounts = append(allMounts, m)
}
}
}
return allMounts
}
func (a *stackAdapter) GetStackHDDPath(name string) string {
s, ok := a.mgr.GetStack(name)
if !ok {
return ""
}
stackDir := filepath.Dir(s.ComposePath)
appCfg := stacks.LoadAppConfig(stackDir)
if appCfg != nil && appCfg.Env["HDD_PATH"] != "" {
return filepath.Clean(appCfg.Env["HDD_PATH"])
}
return ""
}
// watchdogStackAdapter implements monitor.WatchdogStackProvider using stacks.Manager.
type watchdogStackAdapter struct {
mgr *stacks.Manager
}
func (a *watchdogStackAdapter) ListDeployedStacks() []monitor.WatchdogStackInfo {
var result []monitor.WatchdogStackInfo
for _, s := range a.mgr.GetStacks() {
if !s.Deployed {
continue
}
result = append(result, monitor.WatchdogStackInfo{Name: s.Name})
}
return result
}
func (a *watchdogStackAdapter) GetStackHDDPath(name string) string {
s, ok := a.mgr.GetStack(name)
if !ok {
return ""
}
stackDir := filepath.Dir(s.ComposePath)
appCfg := stacks.LoadAppConfig(stackDir)
if appCfg != nil && appCfg.Env["HDD_PATH"] != "" {
return filepath.Clean(appCfg.Env["HDD_PATH"])
}
return ""
}
func (a *watchdogStackAdapter) StopStack(name string) error {
return a.mgr.StopStack(name)
}
func (a *watchdogStackAdapter) StartStack(name string) error {
return a.mgr.StartStack(name)
}
// driveMigrateStackAdapter implements storage.StackProviderForMigration using stacks.Manager.
type driveMigrateStackAdapter struct {
mgr *stacks.Manager
}
func (a *driveMigrateStackAdapter) ListDeployedStacks() []storage.StackSummaryForMigration {
var result []storage.StackSummaryForMigration
for _, s := range a.mgr.GetStacks() {
if !s.Deployed {
continue
}
result = append(result, storage.StackSummaryForMigration{
Name: s.Name,
DisplayName: s.Meta.DisplayName,
})
}
return result
}
func (a *driveMigrateStackAdapter) GetStackHDDPath(name string) string {
s, ok := a.mgr.GetStack(name)
if !ok {
return ""
}
stackDir := filepath.Dir(s.ComposePath)
appCfg := stacks.LoadAppConfig(stackDir)
if appCfg != nil && appCfg.Env["HDD_PATH"] != "" {
return filepath.Clean(appCfg.Env["HDD_PATH"])
}
return ""
}
func (a *driveMigrateStackAdapter) StopStack(name string) error {
return a.mgr.StopStack(name)
}
func (a *driveMigrateStackAdapter) StartStack(name string) error {
return a.mgr.StartStack(name)
}
func (a *driveMigrateStackAdapter) UpdateStackHDDPath(name, newPath string) error {
s, ok := a.mgr.GetStack(name)
if !ok {
return fmt.Errorf("stack not found: %s", name)
}
stackDir := filepath.Dir(s.ComposePath)
appCfg := stacks.LoadAppConfig(stackDir)
if appCfg == nil {
return fmt.Errorf("app.yaml not found for stack: %s", name)
}
appCfg.Env["HDD_PATH"] = newPath
return stacks.SaveAppConfig(stackDir, appCfg, nil, nil)
}
func (a *driveMigrateStackAdapter) StackExists(name string) bool {
_, ok := a.mgr.GetStack(name)
return ok
}
// pushInfraBackup builds and sends the infrastructure snapshot to the Hub.
func pushInfraBackup(cfg *config.Config, sett *settings.Settings,
stackProv *stackAdapter, pusher *report.Pusher, logger *log.Logger) {
encKeyPath := filepath.Join(cfg.Paths.DataDir, "encryption.key")
ib, err := report.BuildInfraBackup(
cfg.Customer.ID, cfg.Customer.Domain, Version,
"/opt/docker/felhom-controller/controller.yaml",
filepath.Join(cfg.Paths.DataDir, "settings.json"),
cfg.Backup.ResticPasswordFile,
encKeyPath,
cfg.Paths.SystemDataPath,
sett, stackProv, logger,
)
if err != nil {
logger.Printf("[WARN] Failed to build infra backup: %v", err)
return
}
data, err := json.Marshal(ib)
if err != nil {
logger.Printf("[WARN] Failed to marshal infra backup: %v", err)
return
}
if err := pusher.PushInfraBackup(data); err != nil {
logger.Printf("[WARN] Failed to push infra backup to Hub: %v", err)
}
}
// fileExists returns true if the path exists (file or directory).
func fileExists(path string) bool {
_, err := os.Stat(path)
return err == nil
}
// runSetupMode starts the setup wizard on dual listeners and blocks until signal.
func runSetupMode(cfg *config.Config, logger *log.Logger) {
ips := setup.DetectLocalIPs()
setup.LogSetupMode(cfg.Customer.Domain, ips, cfg.Web.SetupListen, logger)
setupSrv := setup.NewServer(cfg, cfg.Paths.DataDir, logger, Version)
handler := setupSrv.Handler()
// Health endpoint wrapper (returns setup_mode: true)
healthHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"ok": true, "message": "felhom-controller is healthy",
"setup_mode": true, "version": Version,
})
})
// Mux for both listeners
mux := http.NewServeMux()
mux.HandleFunc("/api/health", healthHandler)
mux.Handle("/", handler)
// Start main listener (:8080, behind Traefik for domain access)
mainServer := &http.Server{
Addr: cfg.Web.Listen,
Handler: mux,
ReadTimeout: 30 * time.Second,
WriteTimeout: 60 * time.Second,
IdleTimeout: 120 * time.Second,
}
go func() {
logger.Printf("[INFO] Setup wizard (main) listening on %s", cfg.Web.Listen)
if err := mainServer.ListenAndServe(); err != http.ErrServerClosed {
logger.Printf("[ERROR] Main HTTP server error: %v", err)
}
}()
// Start setup-only listener (:8081, direct HTTP for LAN access)
setupServer := &http.Server{
Addr: cfg.Web.SetupListen,
Handler: mux,
ReadTimeout: 30 * time.Second,
WriteTimeout: 60 * time.Second,
IdleTimeout: 120 * time.Second,
}
go func() {
logger.Printf("[INFO] Setup wizard (LAN) listening on %s", cfg.Web.SetupListen)
if err := setupServer.ListenAndServe(); err != http.ErrServerClosed {
logger.Printf("[ERROR] Setup HTTP server error: %v", err)
}
}()
// Wait for signal
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
sig := <-sigCh
logger.Printf("[INFO] Received signal %v, shutting down setup wizard...", sig)
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second)
defer shutdownCancel()
mainServer.Shutdown(shutdownCtx)
setupServer.Shutdown(shutdownCtx)
logger.Println("[INFO] Setup wizard stopped")
}
// writeLocalInfraBackup builds an infra snapshot and writes it to all connected drives.
func writeLocalInfraBackup(cfg *config.Config, sett *settings.Settings,
stackProv *stackAdapter, logger *log.Logger) {
encKeyPath := filepath.Join(cfg.Paths.DataDir, "encryption.key")
ib, err := report.BuildInfraBackup(
cfg.Customer.ID, cfg.Customer.Domain, Version,
"/opt/docker/felhom-controller/controller.yaml",
filepath.Join(cfg.Paths.DataDir, "settings.json"),
cfg.Backup.ResticPasswordFile,
encKeyPath,
cfg.Paths.SystemDataPath,
sett, stackProv, logger,
)
if err != nil {
logger.Printf("[WARN] Failed to build infra backup for local write: %v", err)
return
}
data, err := json.Marshal(ib)
if err != nil {
logger.Printf("[WARN] Failed to marshal infra backup for local write: %v", err)
return
}
// Collect all connected drive paths (skip disconnected and decommissioned)
var drives []string
for _, sp := range sett.GetStoragePaths() {
if !sp.Disconnected && !sp.Decommissioned {
drives = append(drives, sp.Path)
}
}
// Also include system data path if set
if cfg.Paths.SystemDataPath != "" {
drives = append(drives, cfg.Paths.SystemDataPath)
}
if len(drives) == 0 {
logger.Println("[DEBUG] No connected drives for local infra backup")
return
}
backup.WriteLocalInfraBackup(data, cfg.Customer.ID, Version, ib.Timestamp, drives, logger, cfg.Logging.Level == "debug")
}
// discoverHDDPaths scans deployed apps' app.yaml for HDD_PATH env values.
func discoverHDDPaths(stacksDir string, logger *log.Logger) []string {
entries, err := os.ReadDir(stacksDir)
if err != nil {
logger.Printf("[WARN] Cannot read stacks dir for HDD path discovery: %v", err)
return nil
}
seen := make(map[string]bool)
var paths []string
for _, e := range entries {
if !e.IsDir() {
continue
}
appCfg := stacks.LoadAppConfig(filepath.Join(stacksDir, e.Name()))
if appCfg == nil || !appCfg.Deployed {
continue
}
if hddPath, ok := appCfg.Env["HDD_PATH"]; ok && hddPath != "" {
cleaned := filepath.Clean(hddPath)
if !seen[cleaned] {
seen[cleaned] = true
paths = append(paths, cleaned)
}
}
}
return paths
}