bdbe170a54
New storage watchdog monitors registered storage paths every 5s. On disconnect (3 consecutive probe failures), auto-stops affected apps, lazy-unmounts stale VFS entries, fires alerts/notifications/hub report. On reconnect (UUID detected), auto-remounts via fstab, cleans stale restic locks, offers app restart. Safe disconnect UI for USB drives: confirmation dialog, stop apps, sync, unmount. Disconnected state visible across all pages (dashboard, settings, backups, monitoring) with hatched red bars and badges. Backup guards skip disconnected drives. 22 files changed (1 new: monitor/watchdog.go), ~1500 lines added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
854 lines
28 KiB
Go
854 lines
28 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"path/filepath"
|
|
"syscall"
|
|
"time"
|
|
|
|
"strings"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/api"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/notify"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/report"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
|
|
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
|
|
)
|
|
|
|
var (
|
|
// Set at build time via ldflags
|
|
Version = "dev"
|
|
BuildTime = "unknown"
|
|
GitCommit = "unknown"
|
|
)
|
|
|
|
func main() {
|
|
configPath := flag.String("config", "/opt/docker/felhom-controller/controller.yaml", "Path to configuration file")
|
|
showVersion := flag.Bool("version", false, "Show version and exit")
|
|
flag.Parse()
|
|
|
|
if *showVersion {
|
|
fmt.Printf("felhom-controller %s (built %s, commit %s)\n", Version, BuildTime, GitCommit)
|
|
os.Exit(0)
|
|
}
|
|
|
|
// --- Load configuration ---
|
|
cfg, err := config.Load(*configPath)
|
|
if err != nil {
|
|
log.Fatalf("[FATAL] Failed to load config from %s: %v", *configPath, err)
|
|
}
|
|
|
|
logger := setupLogger(cfg)
|
|
logger.Printf("[INFO] felhom-controller %s starting (customer: %s, domain: %s)",
|
|
Version, cfg.Customer.ID, cfg.Customer.Domain)
|
|
|
|
// --- Load settings ---
|
|
settingsPath := cfg.Paths.DataDir + "/settings.json"
|
|
sett, err := settings.Load(settingsPath, logger)
|
|
if err != nil {
|
|
logger.Fatalf("[FATAL] Failed to load settings from %s: %v", settingsPath, err)
|
|
}
|
|
|
|
// --- Detect fresh deployment (Phase 2+3: DR restore from Hub) ---
|
|
var restorePlan *backup.RestorePlan
|
|
isFreshDeployment := !fileExists(settingsPath)
|
|
if isFreshDeployment && cfg.Hub.Enabled && cfg.Hub.URL != "" {
|
|
logger.Println("[INFO] Fresh deployment detected — checking Hub for infra backup")
|
|
|
|
ib, pullErr := report.PullInfraBackup(cfg.Hub.URL, cfg.Hub.APIKey, cfg.Customer.ID)
|
|
if pullErr != nil {
|
|
logger.Printf("[WARN] Could not reach Hub for infra backup: %v", pullErr)
|
|
} else if ib != nil {
|
|
logger.Printf("[INFO] Found infra backup on Hub: %s (%s), %d stacks, synced %s",
|
|
ib.Domain, ib.CustomerID, len(ib.DeployedStacks), ib.Timestamp)
|
|
|
|
// Restore settings.json from Hub backup first
|
|
restoreSettingsFromHub(ib, cfg, logger)
|
|
|
|
// Re-load settings (now from restored file)
|
|
if restoredSett, loadErr := settings.Load(settingsPath, logger); loadErr == nil {
|
|
sett = restoredSett
|
|
logger.Println("[INFO] Settings reloaded after Hub restore")
|
|
}
|
|
|
|
// Restore restic passwords AFTER settings reload so cross-drive password persists
|
|
restorePasswordsFromHub(ib, cfg, sett, logger)
|
|
|
|
// Mount drives using stored disk layout
|
|
mountCtx, mountCancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
|
mountedPaths, mountErr := backup.MountDrivesFromLayout(mountCtx, ib.DiskLayout, logger)
|
|
mountCancel()
|
|
if mountErr != nil {
|
|
logger.Printf("[WARN] Drive mounting error: %v", mountErr)
|
|
} else if len(mountedPaths) > 0 {
|
|
logger.Printf("[INFO] Mounted %d drives from Hub disk layout: %v", len(mountedPaths), mountedPaths)
|
|
} else {
|
|
logger.Println("[INFO] No matching drives found to mount from Hub disk layout")
|
|
}
|
|
|
|
// Phase 3: Scan mounted drives for backup data and build restore plan
|
|
if len(ib.DeployedStacks) > 0 {
|
|
// Collect mount paths from disk layout
|
|
var drivePaths []string
|
|
for _, dm := range ib.DiskLayout.Mounts {
|
|
if dm.MountPoint != "" {
|
|
drivePaths = append(drivePaths, dm.MountPoint)
|
|
}
|
|
}
|
|
|
|
// Convert report stacks to backup scan format
|
|
var infraStacks []backup.InfraStackInfo
|
|
for _, s := range ib.DeployedStacks {
|
|
infraStacks = append(infraStacks, backup.InfraStackInfo{
|
|
Name: s.Name,
|
|
DisplayName: s.DisplayName,
|
|
HDDPath: s.HDDPath,
|
|
NeedsHDD: s.NeedsHDD,
|
|
})
|
|
}
|
|
|
|
restorePlan = backup.ScanDrivesForBackups(drivePaths, infraStacks, logger)
|
|
if restorePlan != nil {
|
|
restorePlan.CustomerID = ib.CustomerID
|
|
restorePlan.Domain = ib.Domain
|
|
restorePlan.Timestamp = ib.Timestamp
|
|
logger.Printf("[INFO] DR restore plan ready: %d apps to restore", len(restorePlan.Apps))
|
|
} else {
|
|
logger.Println("[WARN] ScanDrivesForBackups returned nil — no restore plan created")
|
|
}
|
|
}
|
|
} else {
|
|
logger.Println("[INFO] No infra backup found on Hub for this customer")
|
|
}
|
|
}
|
|
|
|
// --- Auto-discover storage paths from deployed apps ---
|
|
discoveredPaths := discoverHDDPaths(cfg.Paths.StacksDir, logger)
|
|
sett.AutoDiscoverStoragePaths(discoveredPaths, cfg.Paths.HDDPath, logger)
|
|
|
|
// --- Initialize stack manager ---
|
|
stackMgr, err := stacks.NewManager(cfg, logger)
|
|
if err != nil {
|
|
logger.Fatalf("[FATAL] Failed to initialize stack manager: %v", err)
|
|
}
|
|
|
|
// Initial stack scan
|
|
if err := stackMgr.ScanStacks(); err != nil {
|
|
logger.Printf("[WARN] Initial stack scan failed: %v", err)
|
|
}
|
|
|
|
// --- Initialize catalog syncer ---
|
|
syncer := catalogsync.New(cfg, logger, stackMgr.ScanStacks)
|
|
syncer.Start()
|
|
defer syncer.Stop()
|
|
|
|
// --- Graceful shutdown context ---
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
// --- Start CPU collector ---
|
|
cpuCollector := system.NewCPUCollector(5 * time.Second)
|
|
cpuCollector.Start(ctx)
|
|
defer cpuCollector.Stop()
|
|
|
|
// --- Initialize metrics store + collector ---
|
|
metricsDBPath := "/opt/docker/felhom-controller/data/metrics.db"
|
|
metricsStore, err := metrics.NewMetricsStore(metricsDBPath, logger)
|
|
if err != nil {
|
|
logger.Printf("[WARN] Failed to initialize metrics store: %v — monitoring disabled", err)
|
|
} else {
|
|
logger.Printf("[INFO] Metrics store opened at %s", metricsDBPath)
|
|
}
|
|
|
|
if metricsStore != nil {
|
|
defer metricsStore.Close()
|
|
metricsHDDPath := cfg.Paths.HDDPath
|
|
if p := sett.GetDefaultStoragePath(); p != "" {
|
|
metricsHDDPath = p
|
|
}
|
|
metricsCollector := metrics.NewMetricsCollector(metricsStore, cpuCollector, metricsHDDPath, logger)
|
|
metricsCollector.Start(ctx)
|
|
defer metricsCollector.Stop()
|
|
logger.Println("[INFO] Metrics collector started (60s interval)")
|
|
}
|
|
|
|
// --- Initialize health pinger ---
|
|
pinger := monitor.NewPinger(&cfg.Monitoring, logger)
|
|
|
|
// --- Initialize backup manager ---
|
|
var backupMgr *backup.Manager
|
|
stackProv := &stackAdapter{
|
|
mgr: stackMgr,
|
|
getStoragePaths: func() []settings.StoragePath { return sett.GetStoragePaths() },
|
|
}
|
|
if cfg.Backup.Enabled {
|
|
backupMgr = backup.NewManager(cfg, pinger, sett, logger)
|
|
backupMgr.SetStackProvider(stackProv)
|
|
backupMgr.AfterBackup = func() {
|
|
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
|
|
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
|
|
backupMgr.RefreshCache(nextDBDump, nextBackup)
|
|
}
|
|
go backupMgr.LoadSnapshotHistory()
|
|
}
|
|
|
|
// --- Initialize cross-drive backup runner ---
|
|
crossDriveRunner := backup.NewCrossDriveRunner(sett, stackProv, cfg.Paths.SystemDataPath, cfg.Paths.StacksDir, logger)
|
|
|
|
// Wire cross-drive → backup manager for pre-backup DB dumps
|
|
if backupMgr != nil {
|
|
crossDriveRunner.SetDBDumper(backupMgr)
|
|
}
|
|
|
|
// --- Initialize alert manager ---
|
|
alertMgr := web.NewAlertManager(logger)
|
|
|
|
// --- Initialize notifier ---
|
|
notifier := notify.New(cfg.Hub.URL, cfg.Hub.APIKey, cfg.Customer.ID, sett, logger)
|
|
|
|
// --- Initialize self-updater ---
|
|
var updater *selfupdate.Updater
|
|
if cfg.SelfUpdate.Enabled {
|
|
composePath := filepath.Join(filepath.Dir(cfg.Paths.DataDir), "docker-compose.yml")
|
|
updater = selfupdate.NewUpdater(&cfg.SelfUpdate, &cfg.Git, Version, cfg.Paths.DataDir, composePath, logger)
|
|
updater.SetBackupRunningCheck(func() bool {
|
|
return backupMgr != nil && backupMgr.IsRunning()
|
|
})
|
|
// Check for post-update state (did a previous update succeed or fail?)
|
|
if state := updater.VerifyStartup(); state != nil {
|
|
if state.Status == "success" {
|
|
notifier.NotifyUpdateSuccess(state.PreviousVersion, state.TargetVersion)
|
|
} else if state.Status == "failed" {
|
|
notifier.NotifyUpdateFailed(state.TargetVersion, state.Error)
|
|
}
|
|
}
|
|
logger.Printf("[INFO] Self-update enabled (check every %s, auto-update: %v, auto-update time: %s)",
|
|
cfg.SelfUpdate.CheckInterval, cfg.SelfUpdate.AutoUpdate, cfg.SelfUpdate.AutoUpdateTime)
|
|
}
|
|
|
|
// --- Initialize scheduler ---
|
|
sched := scheduler.New(logger)
|
|
|
|
// Existing periodic tasks (migrated from ad-hoc goroutines)
|
|
sched.Every("status-refresh", 30*time.Second, func(ctx context.Context) error {
|
|
return stackMgr.RefreshStatus()
|
|
})
|
|
sched.Every("stack-scan", 2*time.Minute, func(ctx context.Context) error {
|
|
return stackMgr.ScanStacks()
|
|
})
|
|
|
|
// Heartbeat — lightweight "I'm alive" signal
|
|
sched.Every("heartbeat", 5*time.Minute, func(ctx context.Context) error {
|
|
pinger.Ping(cfg.Monitoring.PingUUIDs.Heartbeat, "")
|
|
return nil
|
|
})
|
|
|
|
// System health ping
|
|
healthInterval, err := time.ParseDuration(cfg.Monitoring.SystemHealthInterval)
|
|
if err != nil {
|
|
healthInterval = 5 * time.Minute
|
|
}
|
|
sched.Every("system-health", healthInterval, func(ctx context.Context) error {
|
|
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths())
|
|
body := healthReport.FormatMessage()
|
|
healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth
|
|
if healthReport.Status == "fail" {
|
|
pinger.Fail(healthUUID, body)
|
|
} else {
|
|
pinger.Ping(healthUUID, body)
|
|
}
|
|
// Refresh dashboard alerts from health report
|
|
updateAvailable := false
|
|
latestVersion := ""
|
|
if updater != nil {
|
|
status := updater.GetStatus()
|
|
if status.LastCheck != nil {
|
|
updateAvailable = status.LastCheck.UpdateAvailable
|
|
latestVersion = status.LastCheck.LatestVersion
|
|
}
|
|
}
|
|
alertMgr.Refresh(healthReport, cfg, backupMgr, updateAvailable, latestVersion, sett.GetStoragePaths())
|
|
// Notify on health status changes
|
|
notifier.NotifyHealthChange(healthReport.Status, healthReport.Issues, healthReport.Warnings)
|
|
return nil
|
|
})
|
|
|
|
// --- Central hub pusher (declared early so backup closure can reference it) ---
|
|
var hubPusher *report.Pusher
|
|
if cfg.Hub.URL != "" && cfg.Hub.APIKey != "" {
|
|
hubPusher = report.NewPusher(&cfg.Hub, logger)
|
|
}
|
|
|
|
// Backup daily jobs
|
|
if cfg.Backup.Enabled && backupMgr != nil {
|
|
sched.Daily("db-dump", cfg.Backup.DBDumpSchedule, func(ctx context.Context) error {
|
|
err := backupMgr.RunDBDumps(ctx)
|
|
if err != nil {
|
|
notifier.NotifyDBDumpFailed("Adatbázis mentés sikertelen", err.Error())
|
|
}
|
|
return err
|
|
})
|
|
sched.Daily("backup", cfg.Backup.ResticSchedule, func(ctx context.Context) error {
|
|
err := backupMgr.RunBackup(ctx)
|
|
if err != nil {
|
|
notifier.NotifyBackupFailed("Biztonsági mentés sikertelen", err.Error())
|
|
}
|
|
// Phase 3: Chain cross-drive backups immediately after restic (regardless of restic success)
|
|
// Daily jobs run every night; weekly jobs only on Sunday
|
|
if crossDriveRunner != nil {
|
|
if cdErr := crossDriveRunner.RunAllScheduled(ctx, "daily"); cdErr != nil {
|
|
logger.Printf("[WARN] Cross-drive daily backup had errors: %v", cdErr)
|
|
}
|
|
if time.Now().Weekday() == time.Sunday {
|
|
if cdErr := crossDriveRunner.RunAllScheduled(ctx, "weekly"); cdErr != nil {
|
|
logger.Printf("[WARN] Cross-drive weekly backup had errors: %v", cdErr)
|
|
}
|
|
}
|
|
}
|
|
// Push infra backup to Hub after all backup tiers complete
|
|
if hubPusher != nil && cfg.Hub.Enabled {
|
|
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
|
|
}
|
|
return err
|
|
})
|
|
|
|
// Weekly integrity check — Sunday 04:00
|
|
sched.Daily("backup-integrity", "04:00", func(ctx context.Context) error {
|
|
if time.Now().Weekday() != time.Sunday {
|
|
return nil
|
|
}
|
|
err := backupMgr.RunIntegrityCheck(ctx)
|
|
if err != nil {
|
|
notifier.NotifyIntegrityFailed("Mentés integritás ellenőrzés sikertelen", err.Error())
|
|
}
|
|
return err
|
|
})
|
|
|
|
// Cache refresh: every 5 minutes
|
|
sched.Every("backup-cache", 5*time.Minute, func(ctx context.Context) error {
|
|
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
|
|
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
|
|
backupMgr.RefreshCache(nextDBDump, nextBackup)
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// Metrics prune — daily at 04:00
|
|
if metricsStore != nil {
|
|
sched.Daily("metrics-prune", "04:00", func(ctx context.Context) error {
|
|
deleted, err := metricsStore.Prune(30 * 24 * time.Hour)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
logger.Printf("[INFO] Pruned %d old metric rows", deleted)
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// --- Central hub reporting schedule ---
|
|
if hubPusher != nil {
|
|
if cfg.Hub.Enabled {
|
|
pushInterval, err := time.ParseDuration(cfg.Hub.PushInterval)
|
|
if err != nil {
|
|
pushInterval = 15 * time.Minute
|
|
}
|
|
sched.Every("hub-report", pushInterval, func(ctx context.Context) error {
|
|
r := report.BuildReport(cfg, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths())
|
|
return hubPusher.Push(r)
|
|
})
|
|
logger.Printf("[INFO] Hub reporting enabled (every %s to %s)", pushInterval, cfg.Hub.URL)
|
|
} else {
|
|
logger.Printf("[INFO] Hub reporting disabled — will send disabled notification to %s", cfg.Hub.URL)
|
|
}
|
|
}
|
|
|
|
// Self-update scheduler jobs
|
|
if cfg.SelfUpdate.Enabled && updater != nil {
|
|
// Periodic version check (populates UI, never triggers update)
|
|
checkInterval, ciErr := time.ParseDuration(cfg.SelfUpdate.CheckInterval)
|
|
if ciErr != nil {
|
|
checkInterval = 6 * time.Hour
|
|
}
|
|
sched.Every("selfupdate-check", checkInterval, func(ctx context.Context) error {
|
|
result := updater.CheckForUpdate()
|
|
if result.UpdateAvailable {
|
|
logger.Printf("[INFO] Update available: %s -> %s", result.CurrentVersion, result.LatestVersion)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
// Auto-update (daily, fires after typical backup completion)
|
|
if cfg.SelfUpdate.AutoUpdate {
|
|
sched.Daily("selfupdate-auto", cfg.SelfUpdate.AutoUpdateTime, func(ctx context.Context) error {
|
|
result := updater.CheckForUpdate()
|
|
if !result.UpdateAvailable {
|
|
return nil
|
|
}
|
|
if err := updater.TriggerUpdate("auto"); err != nil {
|
|
logger.Printf("[WARN] Auto-update skipped: %v", err)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
}
|
|
|
|
// --- Storage watchdog ---
|
|
storageWatchdog := monitor.NewStorageWatchdog(sett, &watchdogStackAdapter{mgr: stackMgr}, notifier, cfg, logger)
|
|
storageWatchdog.SetAlertRefresh(func() {
|
|
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths())
|
|
updateAvailable := false
|
|
latestVersion := ""
|
|
if updater != nil {
|
|
status := updater.GetStatus()
|
|
if status.LastCheck != nil {
|
|
updateAvailable = status.LastCheck.UpdateAvailable
|
|
latestVersion = status.LastCheck.LatestVersion
|
|
}
|
|
}
|
|
alertMgr.Refresh(healthReport, cfg, backupMgr, updateAvailable, latestVersion, sett.GetStoragePaths())
|
|
})
|
|
if hubPusher != nil {
|
|
storageWatchdog.SetHubReportPusher(func() {
|
|
r := report.BuildReport(cfg, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths())
|
|
hubPusher.Push(r)
|
|
})
|
|
}
|
|
if backupMgr != nil {
|
|
storageWatchdog.SetRepoUnlocker(func(ctx context.Context, repoPath string) error {
|
|
return backupMgr.UnlockRepo(ctx, repoPath)
|
|
})
|
|
}
|
|
sched.Every("storage-watchdog", 5*time.Second, func(ctx context.Context) error {
|
|
return storageWatchdog.Check(ctx)
|
|
})
|
|
|
|
sched.Start(ctx)
|
|
defer sched.Stop()
|
|
|
|
// Fire startup pings + hub report immediately (don't wait for first scheduler tick)
|
|
go func() {
|
|
time.Sleep(5 * time.Second) // Let all subsystems fully initialize
|
|
|
|
// Heartbeat ping
|
|
pinger.Ping(cfg.Monitoring.PingUUIDs.Heartbeat, "startup")
|
|
logger.Println("[INFO] Startup heartbeat ping sent")
|
|
|
|
// System health ping
|
|
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths())
|
|
body := healthReport.FormatMessage()
|
|
healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth
|
|
if healthReport.Status == "fail" {
|
|
pinger.Fail(healthUUID, body)
|
|
} else {
|
|
pinger.Ping(healthUUID, body)
|
|
}
|
|
logger.Printf("[INFO] Startup health ping sent (status: %s)", healthReport.Status)
|
|
|
|
// Hub report
|
|
if hubPusher != nil {
|
|
if cfg.Hub.Enabled {
|
|
r := report.BuildReport(cfg, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths())
|
|
var pushErr error
|
|
for attempt := 1; attempt <= 3; attempt++ {
|
|
pushErr = hubPusher.Push(r)
|
|
if pushErr == nil {
|
|
logger.Println("[INFO] Startup hub report sent")
|
|
break
|
|
}
|
|
logger.Printf("[WARN] Startup hub report attempt %d/3 failed: %v", attempt, pushErr)
|
|
if attempt < 3 {
|
|
time.Sleep(15 * time.Second)
|
|
}
|
|
}
|
|
if pushErr != nil {
|
|
logger.Printf("[WARN] Startup hub report failed after 3 attempts — next scheduled push in %s", cfg.Hub.PushInterval)
|
|
}
|
|
// Also push infra backup on startup
|
|
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
|
|
} else {
|
|
// Send a minimal "disabled" notification so hub knows reporting is intentionally off
|
|
r := &report.Report{
|
|
Version: 1,
|
|
CustomerID: cfg.Customer.ID,
|
|
CustomerName: cfg.Customer.Name,
|
|
ControllerVersion: Version,
|
|
Timestamp: time.Now().UTC(),
|
|
ReportingDisabled: true,
|
|
Health: report.HealthReport{Status: "disabled", Issues: []string{}, Warnings: []string{}},
|
|
Stacks: report.StacksReport{Deployed: []string{}, Available: []string{}},
|
|
Containers: report.ContainerReport{List: []report.ContainerDetailReport{}},
|
|
}
|
|
hubPusher.PushOnce(r)
|
|
}
|
|
}
|
|
|
|
// Initial self-update check (so settings page shows version info quickly)
|
|
if updater != nil {
|
|
time.Sleep(25 * time.Second) // Additional delay after hub report
|
|
result := updater.CheckForUpdate()
|
|
if result.UpdateAvailable {
|
|
logger.Printf("[INFO] Startup: update available %s -> %s", result.CurrentVersion, result.LatestVersion)
|
|
} else if result.Error != "" {
|
|
logger.Printf("[DEBUG] Startup version check: %s", result.Error)
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Initial backup cache population (don't block startup)
|
|
if cfg.Backup.Enabled && backupMgr != nil {
|
|
go func() {
|
|
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
|
|
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
|
|
backupMgr.RefreshCache(nextDBDump, nextBackup)
|
|
}()
|
|
}
|
|
|
|
// Sync notification preferences to hub on startup (handles hub DB rebuild recovery)
|
|
if notifier.IsEnabled() {
|
|
go func() {
|
|
prefs := sett.GetNotificationPrefs()
|
|
if prefs.Email != "" {
|
|
if err := notifier.SyncPreferences(prefs.Email, prefs.EnabledEvents); err != nil {
|
|
logger.Printf("[WARN] Failed to sync notification preferences on startup: %v", err)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
// Initial alert refresh (so alerts appear immediately, not after first 5min health check)
|
|
go func() {
|
|
report := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths())
|
|
alertMgr.Refresh(report, cfg, backupMgr, false, "")
|
|
}()
|
|
|
|
// --- Initialize API router ---
|
|
apiRouter := api.NewRouter(cfg, sett, stackMgr, syncer, cpuCollector, backupMgr, crossDriveRunner, metricsStore, updater, logger)
|
|
|
|
// --- Initialize web server ---
|
|
webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, crossDriveRunner, sched, sett, alertMgr, notifier, updater, logger, Version)
|
|
webServer.SetStorageWatchdog(storageWatchdog)
|
|
|
|
// Phase 3: Set DR restore mode if a restore plan was built
|
|
if restorePlan != nil && len(restorePlan.Apps) > 0 {
|
|
webServer.SetRestoreState(restorePlan)
|
|
logger.Println("[INFO] DR restore mode activated — all web routes redirect to /restore")
|
|
}
|
|
|
|
// --- Build HTTP mux ---
|
|
mux := http.NewServeMux()
|
|
|
|
// API routes (no auth for health endpoint, auth for everything else)
|
|
mux.HandleFunc("/api/health", apiRouter.HealthHandler)
|
|
// Storage API routes handled by web server (longer prefix takes precedence over /api/)
|
|
mux.Handle("/api/storage/", webServer.RequireAuth(http.HandlerFunc(webServer.ServeStorageAPI)))
|
|
// Self-update API — accepts session auth OR hub API key (for external triggering)
|
|
mux.Handle("/api/selfupdate/", selfUpdateAuthMiddleware(cfg, webServer, http.HandlerFunc(apiRouter.ServeHTTP)))
|
|
mux.Handle("/api/", webServer.RequireAuth(http.HandlerFunc(apiRouter.ServeHTTP)))
|
|
|
|
// Web UI routes (auth required)
|
|
mux.Handle("/", webServer.RequireAuth(http.HandlerFunc(webServer.ServeHTTP)))
|
|
|
|
// --- Start HTTP server ---
|
|
server := &http.Server{
|
|
Addr: cfg.Web.Listen,
|
|
Handler: mux,
|
|
ReadTimeout: 30 * time.Second,
|
|
WriteTimeout: 60 * time.Second,
|
|
IdleTimeout: 120 * time.Second,
|
|
}
|
|
|
|
sigCh := make(chan os.Signal, 1)
|
|
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
|
|
|
|
go func() {
|
|
sig := <-sigCh
|
|
logger.Printf("[INFO] Received signal %v, shutting down...", sig)
|
|
cancel()
|
|
|
|
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 15*time.Second)
|
|
defer shutdownCancel()
|
|
|
|
if err := server.Shutdown(shutdownCtx); err != nil {
|
|
logger.Printf("[ERROR] HTTP server shutdown error: %v", err)
|
|
}
|
|
}()
|
|
|
|
logger.Printf("[INFO] Web UI listening on %s", cfg.Web.Listen)
|
|
if err := server.ListenAndServe(); err != http.ErrServerClosed {
|
|
logger.Fatalf("[FATAL] HTTP server error: %v", err)
|
|
}
|
|
|
|
logger.Println("[INFO] felhom-controller stopped")
|
|
}
|
|
|
|
// selfUpdateAuthMiddleware allows access via session auth (normal UI) OR hub API key bearer token (external).
|
|
func selfUpdateAuthMiddleware(cfg *config.Config, webServer *web.Server, next http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
// Check bearer token first (for external API calls: hub, build scripts)
|
|
if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "Bearer ") {
|
|
token := strings.TrimPrefix(auth, "Bearer ")
|
|
if token != "" && cfg.Hub.APIKey != "" && token == cfg.Hub.APIKey {
|
|
next.ServeHTTP(w, r)
|
|
return
|
|
}
|
|
}
|
|
// Fall back to session auth
|
|
webServer.RequireAuth(next).ServeHTTP(w, r)
|
|
})
|
|
}
|
|
|
|
func setupLogger(cfg *config.Config) *log.Logger {
|
|
// For now, log to stdout. File logging will be added later.
|
|
logger := log.New(os.Stdout, "", log.LstdFlags)
|
|
|
|
if cfg.Logging.Level == "debug" {
|
|
logger.SetFlags(log.LstdFlags | log.Lshortfile)
|
|
}
|
|
|
|
return logger
|
|
}
|
|
|
|
// stackAdapter implements backup.StackDataProvider using stacks.Manager.
|
|
type stackAdapter struct {
|
|
mgr *stacks.Manager
|
|
getStoragePaths func() []settings.StoragePath
|
|
}
|
|
|
|
func (a *stackAdapter) GetStackComposePath(name string) (string, bool) {
|
|
s, ok := a.mgr.GetStack(name)
|
|
if !ok {
|
|
return "", false
|
|
}
|
|
return s.ComposePath, true
|
|
}
|
|
|
|
func (a *stackAdapter) ListDeployedStacks() []backup.StackSummary {
|
|
var result []backup.StackSummary
|
|
for _, s := range a.mgr.GetStacks() {
|
|
if !s.Deployed {
|
|
continue
|
|
}
|
|
result = append(result, backup.StackSummary{
|
|
Name: s.Name,
|
|
DisplayName: s.Meta.DisplayName,
|
|
ComposePath: s.ComposePath,
|
|
NeedsHDD: s.Meta.Resources.NeedsHDD,
|
|
})
|
|
}
|
|
return result
|
|
}
|
|
|
|
func (a *stackAdapter) StopStack(name string) error {
|
|
return a.mgr.StopStack(name)
|
|
}
|
|
|
|
func (a *stackAdapter) StartStack(name string) error {
|
|
return a.mgr.StartStack(name)
|
|
}
|
|
|
|
func (a *stackAdapter) GetStackHDDMounts(name string) []string {
|
|
s, ok := a.mgr.GetStack(name)
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
// Priority 1: Read the app's own HDD_PATH from its app.yaml
|
|
stackDir := filepath.Dir(s.ComposePath)
|
|
appCfg := stacks.LoadAppConfig(stackDir)
|
|
if appCfg != nil && appCfg.Env["HDD_PATH"] != "" {
|
|
return stacks.ParseComposeHDDMounts(s.ComposePath, appCfg.Env["HDD_PATH"])
|
|
}
|
|
|
|
// Priority 2: Try all registered storage paths (fallback)
|
|
var allMounts []string
|
|
seen := make(map[string]bool)
|
|
for _, sp := range a.getStoragePaths() {
|
|
mounts := stacks.ParseComposeHDDMounts(s.ComposePath, sp.Path)
|
|
for _, m := range mounts {
|
|
if !seen[m] {
|
|
seen[m] = true
|
|
allMounts = append(allMounts, m)
|
|
}
|
|
}
|
|
}
|
|
return allMounts
|
|
}
|
|
|
|
func (a *stackAdapter) GetStackHDDPath(name string) string {
|
|
s, ok := a.mgr.GetStack(name)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
stackDir := filepath.Dir(s.ComposePath)
|
|
appCfg := stacks.LoadAppConfig(stackDir)
|
|
if appCfg != nil && appCfg.Env["HDD_PATH"] != "" {
|
|
return filepath.Clean(appCfg.Env["HDD_PATH"])
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// watchdogStackAdapter implements monitor.WatchdogStackProvider using stacks.Manager.
|
|
type watchdogStackAdapter struct {
|
|
mgr *stacks.Manager
|
|
}
|
|
|
|
func (a *watchdogStackAdapter) ListDeployedStacks() []monitor.WatchdogStackInfo {
|
|
var result []monitor.WatchdogStackInfo
|
|
for _, s := range a.mgr.GetStacks() {
|
|
if !s.Deployed {
|
|
continue
|
|
}
|
|
result = append(result, monitor.WatchdogStackInfo{Name: s.Name})
|
|
}
|
|
return result
|
|
}
|
|
|
|
func (a *watchdogStackAdapter) GetStackHDDPath(name string) string {
|
|
s, ok := a.mgr.GetStack(name)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
stackDir := filepath.Dir(s.ComposePath)
|
|
appCfg := stacks.LoadAppConfig(stackDir)
|
|
if appCfg != nil && appCfg.Env["HDD_PATH"] != "" {
|
|
return filepath.Clean(appCfg.Env["HDD_PATH"])
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (a *watchdogStackAdapter) StopStack(name string) error {
|
|
return a.mgr.StopStack(name)
|
|
}
|
|
|
|
func (a *watchdogStackAdapter) StartStack(name string) error {
|
|
return a.mgr.StartStack(name)
|
|
}
|
|
|
|
// pushInfraBackup builds and sends the infrastructure snapshot to the Hub.
|
|
func pushInfraBackup(cfg *config.Config, sett *settings.Settings,
|
|
stackProv *stackAdapter, pusher *report.Pusher, logger *log.Logger) {
|
|
|
|
ib, err := report.BuildInfraBackup(
|
|
cfg.Customer.ID, cfg.Customer.Domain, Version,
|
|
"/opt/docker/felhom-controller/controller.yaml",
|
|
filepath.Join(cfg.Paths.DataDir, "settings.json"),
|
|
cfg.Backup.ResticPasswordFile,
|
|
cfg.Paths.SystemDataPath,
|
|
sett, stackProv, logger,
|
|
)
|
|
if err != nil {
|
|
logger.Printf("[WARN] Failed to build infra backup: %v", err)
|
|
return
|
|
}
|
|
|
|
data, err := json.Marshal(ib)
|
|
if err != nil {
|
|
logger.Printf("[WARN] Failed to marshal infra backup: %v", err)
|
|
return
|
|
}
|
|
|
|
if err := pusher.PushInfraBackup(data); err != nil {
|
|
logger.Printf("[WARN] Failed to push infra backup to Hub: %v", err)
|
|
}
|
|
}
|
|
|
|
// fileExists returns true if the path exists (file or directory).
|
|
func fileExists(path string) bool {
|
|
_, err := os.Stat(path)
|
|
return err == nil
|
|
}
|
|
|
|
// restorePasswordsFromHub restores restic passwords from a Hub infra backup.
|
|
func restorePasswordsFromHub(ib *report.InfraBackup, cfg *config.Config,
|
|
sett *settings.Settings, logger *log.Logger) {
|
|
|
|
if ib.ResticPassword != "" {
|
|
decoded, err := base64.StdEncoding.DecodeString(ib.ResticPassword)
|
|
if err == nil && len(decoded) > 0 {
|
|
dir := filepath.Dir(cfg.Backup.ResticPasswordFile)
|
|
if err := os.MkdirAll(dir, 0700); err != nil {
|
|
logger.Printf("[WARN] Failed to create restic password directory %s: %v", dir, err)
|
|
} else if err := os.WriteFile(cfg.Backup.ResticPasswordFile, decoded, 0600); err == nil {
|
|
logger.Println("[INFO] Primary restic password restored from Hub")
|
|
} else {
|
|
logger.Printf("[WARN] Failed to write restic password file: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
if ib.CrossDrivePassword != "" {
|
|
if err := sett.SetCrossDriveResticPassword(ib.CrossDrivePassword); err == nil {
|
|
logger.Println("[INFO] Cross-drive restic password restored from Hub")
|
|
} else {
|
|
logger.Printf("[WARN] Failed to set cross-drive password: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// restoreSettingsFromHub restores settings.json from a Hub infra backup.
|
|
func restoreSettingsFromHub(ib *report.InfraBackup, cfg *config.Config, logger *log.Logger) {
|
|
if ib.SettingsJSONB64 == "" {
|
|
return
|
|
}
|
|
decoded, err := base64.StdEncoding.DecodeString(ib.SettingsJSONB64)
|
|
if err != nil {
|
|
logger.Printf("[WARN] Failed to decode settings from Hub: %v", err)
|
|
return
|
|
}
|
|
if err := os.MkdirAll(cfg.Paths.DataDir, 0755); err != nil {
|
|
logger.Printf("[WARN] Failed to create data directory for settings restore: %v", err)
|
|
return
|
|
}
|
|
settingsPath := filepath.Join(cfg.Paths.DataDir, "settings.json")
|
|
if err := os.WriteFile(settingsPath, decoded, 0600); err != nil {
|
|
logger.Printf("[WARN] Failed to write restored settings.json: %v", err)
|
|
} else {
|
|
logger.Println("[INFO] Settings restored from Hub backup")
|
|
}
|
|
}
|
|
|
|
// discoverHDDPaths scans deployed apps' app.yaml for HDD_PATH env values.
|
|
func discoverHDDPaths(stacksDir string, logger *log.Logger) []string {
|
|
entries, err := os.ReadDir(stacksDir)
|
|
if err != nil {
|
|
logger.Printf("[WARN] Cannot read stacks dir for HDD path discovery: %v", err)
|
|
return nil
|
|
}
|
|
seen := make(map[string]bool)
|
|
var paths []string
|
|
for _, e := range entries {
|
|
if !e.IsDir() {
|
|
continue
|
|
}
|
|
appCfg := stacks.LoadAppConfig(filepath.Join(stacksDir, e.Name()))
|
|
if appCfg == nil || !appCfg.Deployed {
|
|
continue
|
|
}
|
|
if hddPath, ok := appCfg.Env["HDD_PATH"]; ok && hddPath != "" {
|
|
cleaned := filepath.Clean(hddPath)
|
|
if !seen[cleaned] {
|
|
seen[cleaned] = true
|
|
paths = append(paths, cleaned)
|
|
}
|
|
}
|
|
}
|
|
return paths
|
|
}
|