slice 8C Phase B.2 + C.1/C.2: retire disk subsystem + rewire disk mgmt to agent

Retired (~12.3k LOC): internal/storage/* (scan/format/attach/migrate/safety),
backup restic/crossdrive/restore_drives/disk_layout/local_infra/restore_scan/
paths + restore_app, report/infra_backup*/infra_pull, setup/scanner,
monitor/watchdog+pinger, web/storage_handlers+handler_restore. Surgically split
backup.Manager to app-data only (DB dumps + volume tars + app restore; dropped
restic + cross-drive + snapshot history). Fixed router/main/web wiring.
Added agent-backed disk API (web/agent_disk_handlers.go): /api/disks list/
assign/eject/format proxying agentapi; data-bearing format refusal -> HTTP 409
'operator authorization required'. report/config_pull.go keeps the setup
fresh-install config download. go build + go test green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 13:57:27 +02:00
parent 0294513906
commit abe4e8e619
47 changed files with 404 additions and 12317 deletions
+23 -361
View File
@@ -25,8 +25,8 @@ import (
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
"gitea.dooplex.hu/admin/felhom-controller/internal/bootstrap"
cf "gitea.dooplex.hu/admin/felhom-controller/internal/cloudflare"
"gitea.dooplex.hu/admin/felhom-controller/internal/crypto"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/crypto"
"gitea.dooplex.hu/admin/felhom-controller/internal/integrations"
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
@@ -40,7 +40,6 @@ import (
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/setup"
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
"gitea.dooplex.hu/admin/felhom-controller/internal/storage"
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
@@ -186,40 +185,22 @@ func main() {
logger.Println("[INFO] Metrics collector started (60s interval)")
}
// --- Initialize health pinger (legacy, will be removed) ---
pinger := monitor.NewPinger(&cfg.Monitoring, logger)
pinger.SetDebug(cfg.Logging.Level == "debug")
// Deprecation notice for ping UUIDs
// Deprecation notice for ping UUIDs (Healthchecks pinging retired — the Hub
// now owns monitoring; disk-tier backup moved to the host agent in slice 8C).
uuids := cfg.Monitoring.PingUUIDs
if uuids.Heartbeat != "" || uuids.SystemHealth != "" || uuids.DBDump != "" || uuids.Backup != "" || uuids.BackupIntegrity != "" {
logger.Println("[INFO] Healthchecks ping UUIDs configured but no longer used — monitoring is now handled by the Hub")
}
// --- Initialize backup manager ---
// --- Initialize backup manager (app-data only: DB dumps + Docker-volume tars) ---
var backupMgr *backup.Manager
stackProv := &stackAdapter{
mgr: stackMgr,
getStoragePaths: func() []settings.StoragePath { return sett.GetStoragePaths() },
}
if cfg.Backup.Enabled {
backupMgr = backup.NewManager(cfg, pinger, sett, logger)
backupMgr = backup.NewManager(cfg, sett, logger)
backupMgr.SetStackProvider(stackProv)
backupMgr.AfterBackup = func() {
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
backupMgr.RefreshCache(nextDBDump, nextBackup)
}
go backupMgr.LoadSnapshotHistory()
}
// --- Initialize cross-drive backup runner ---
crossDriveRunner := backup.NewCrossDriveRunner(sett, stackProv, cfg.Paths.SystemDataPath, cfg.Paths.StacksDir, logger, cfg.Logging.Level == "debug")
// Wire cross-drive → backup manager for pre-backup DB dumps
if backupMgr != nil {
crossDriveRunner.SetDBDumper(backupMgr)
crossDriveRunner.SetVolumeDumper(backupMgr)
}
// --- Initialize alert manager ---
@@ -259,26 +240,14 @@ func main() {
return stackMgr.RunHealthProbes()
})
// Heartbeat — lightweight "I'm alive" signal
sched.Every("heartbeat", 5*time.Minute, func(ctx context.Context) error {
pinger.Ping(cfg.Monitoring.PingUUIDs.Heartbeat, "")
return nil
})
// System health ping
// System health check — refreshes dashboard alerts and notifies on changes.
// Healthchecks.io pinging has been retired (the Hub now owns monitoring).
healthInterval, err := time.ParseDuration(cfg.Monitoring.SystemHealthInterval)
if err != nil {
healthInterval = 5 * time.Minute
}
sched.Every("system-health", healthInterval, func(ctx context.Context) error {
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger)
body := healthReport.FormatMessage()
healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth
if healthReport.Status == "fail" {
pinger.Fail(healthUUID, body)
} else {
pinger.Ping(healthUUID, body)
}
// Refresh dashboard alerts from health report
updateAvailable := false
latestVersion := ""
@@ -322,6 +291,8 @@ func main() {
// Backup daily jobs
if cfg.Backup.Enabled && backupMgr != nil {
// App-data backup: daily database dumps. Disk-tier (restic snapshots,
// cross-drive, integrity check, infra backup) has moved to the host agent.
sched.Daily("db-dump", cfg.Backup.DBDumpSchedule, func(ctx context.Context) error {
err := backupMgr.RunDBDumps(ctx)
if err != nil {
@@ -331,53 +302,11 @@ func main() {
}
return err
})
sched.Daily("backup", cfg.Backup.ResticSchedule, func(ctx context.Context) error {
err := backupMgr.RunBackup(ctx)
if err != nil {
notifier.NotifyBackupFailed("Biztonsági mentés sikertelen", err.Error())
} else {
notifier.NotifyBackupCompleted(notify.BackupDetails{})
}
// Phase 3: Chain cross-drive backups immediately after restic (regardless of restic success)
// Daily jobs run every night; weekly jobs only on Sunday
if crossDriveRunner != nil {
if cdErr := crossDriveRunner.RunAllScheduled(ctx, "daily"); cdErr != nil {
logger.Printf("[WARN] Cross-drive daily backup had errors: %v", cdErr)
}
if time.Now().Weekday() == time.Sunday {
if cdErr := crossDriveRunner.RunAllScheduled(ctx, "weekly"); cdErr != nil {
logger.Printf("[WARN] Cross-drive weekly backup had errors: %v", cdErr)
}
}
}
// Push infra backup to Hub after all backup tiers complete
if hubPusher != nil && cfg.Hub.Enabled {
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
}
// Write local infra backup to all connected drives
go writeLocalInfraBackup(cfg, sett, stackProv, logger)
return err
})
// Weekly integrity check — Sunday 04:00
sched.Daily("backup-integrity", "04:00", func(ctx context.Context) error {
if time.Now().Weekday() != time.Sunday {
return nil
}
err := backupMgr.RunIntegrityCheck(ctx)
if err != nil {
notifier.NotifyIntegrityFailed("Mentés integritás ellenőrzés sikertelen", err.Error())
} else {
notifier.NotifyIntegrityOK("Mentés integritás ellenőrzés sikeres")
}
return err
})
// Cache refresh: every 5 minutes
sched.Every("backup-cache", 5*time.Minute, func(ctx context.Context) error {
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
backupMgr.RefreshCache(nextDBDump, nextBackup)
backupMgr.RefreshCache(nextDBDump)
return nil
})
}
@@ -451,35 +380,8 @@ func main() {
}
}
// --- Storage watchdog ---
storageWatchdog := monitor.NewStorageWatchdog(sett, &watchdogStackAdapter{mgr: stackMgr}, notifier, cfg, logger)
storageWatchdog.SetAlertRefresh(func() {
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger)
updateAvailable := false
latestVersion := ""
if updater != nil {
status := updater.GetStatus()
if status.LastCheck != nil {
updateAvailable = status.LastCheck.UpdateAvailable
latestVersion = status.LastCheck.LatestVersion
}
}
alertMgr.Refresh(healthReport, cfg, backupMgr, updateAvailable, latestVersion, sett.GetStoragePaths())
})
if hubPusher != nil {
storageWatchdog.SetHubReportPusher(func() {
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), sett.GetGeoRestriction(), logger)
hubPusher.Push(r)
})
}
if backupMgr != nil {
storageWatchdog.SetRepoUnlocker(func(ctx context.Context, repoPath string) error {
return backupMgr.UnlockRepo(ctx, repoPath)
})
}
sched.Every("storage-watchdog", 5*time.Second, func(ctx context.Context) error {
return storageWatchdog.Check(ctx)
})
// Storage watchdog (disk disconnect/reconnect detection) has moved to the host
// agent (slice 8C) — the controller no longer owns disk-level monitoring.
// --- Asset syncer (download from Hub) ---
var assetsSyncer *assets.Syncer
@@ -531,21 +433,6 @@ func main() {
"selftest_fail": selfTestResult.Fail,
})
// Heartbeat ping
pinger.Ping(cfg.Monitoring.PingUUIDs.Heartbeat, "startup")
logger.Println("[INFO] Startup heartbeat ping sent")
// System health ping
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger)
body := healthReport.FormatMessage()
healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth
if healthReport.Status == "fail" {
pinger.Fail(healthUUID, body)
} else {
pinger.Ping(healthUUID, body)
}
logger.Printf("[INFO] Startup health ping sent (status: %s)", healthReport.Status)
// Hub report
if hubPusher != nil {
if cfg.Hub.Enabled {
@@ -565,10 +452,6 @@ func main() {
if pushErr != nil {
logger.Printf("[WARN] Startup hub report failed after 3 attempts — next scheduled push in %s", cfg.Hub.PushInterval)
}
// Also push infra backup on startup
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
// Write local infra backup to all connected drives
go writeLocalInfraBackup(cfg, sett, stackProv, logger)
} else {
// Send a minimal "disabled" notification so hub knows reporting is intentionally off
r := &report.Report{
@@ -602,8 +485,7 @@ func main() {
if cfg.Backup.Enabled && backupMgr != nil {
go func() {
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
backupMgr.RefreshCache(nextDBDump, nextBackup)
backupMgr.RefreshCache(nextDBDump)
}()
}
@@ -626,14 +508,11 @@ func main() {
}()
// --- Initialize API router ---
apiRouter := api.NewRouter(cfg, *configPath, sett, stackMgr, syncer, cpuCollector, backupMgr, crossDriveRunner, metricsStore, updater, notifier, logger)
apiRouter := api.NewRouter(cfg, *configPath, sett, stackMgr, syncer, cpuCollector, backupMgr, metricsStore, updater, notifier, logger)
if hubPusher != nil {
apiRouter.OnConfigApplied = func() {
pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
}
apiRouter.OnCrossDriveComplete = func() {
pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
writeLocalInfraBackup(cfg, sett, stackProv, logger)
// Infra backup push is now the host agent's responsibility; the controller
// only refreshes the Hub report after a config apply.
}
}
if assetsSyncer != nil {
@@ -694,11 +573,10 @@ func main() {
apiRouter.SetDebug(cfg.Logging.Level == "debug")
// --- Initialize web server ---
webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, crossDriveRunner, sched, sett, alertMgr, notifier, updater, logger, Version)
webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, sched, sett, alertMgr, notifier, updater, logger, Version)
webServer.SetEncryptionKey(encKey)
webServer.SetAppExporter(appExporter)
webServer.SetIntegrationManager(integrationMgr)
webServer.SetStorageWatchdog(storageWatchdog)
if assetsSyncer != nil {
webServer.SetAssetsSyncer(assetsSyncer)
}
@@ -726,14 +604,6 @@ func main() {
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), sett.GetGeoRestriction(), logger)
return hubPusher.Push(r)
}
dc.TriggerHubInfraPush = func() error {
pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
return nil
}
}
dc.TriggerLocalInfraWrite = func() error {
writeLocalInfraBackup(cfg, sett, stackProv, logger)
return nil
}
dc.HubConnectivityTest = func() (int, int64, error) {
start := time.Now()
@@ -765,55 +635,18 @@ func main() {
webServer.SetDebugCallbacks(dc)
}
// --- Initialize drive migrator ---
driveMigrator := &storage.DriveMigrator{
Sett: sett,
StackProvider: &driveMigrateStackAdapter{mgr: stackMgr},
Logger: logger,
}
// Only set BackupTrigger if backup is enabled (avoid non-nil interface with nil concrete value)
if backupMgr != nil {
driveMigrator.BackupTrigger = backupMgr
}
driveMigrator.AlertRefresh = func() {
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger)
updateAvailable := false
latestVersion := ""
if updater != nil {
status := updater.GetStatus()
if status.LastCheck != nil {
updateAvailable = status.LastCheck.UpdateAvailable
latestVersion = status.LastCheck.LatestVersion
}
}
alertMgr.Refresh(healthReport, cfg, backupMgr, updateAvailable, latestVersion, sett.GetStoragePaths())
}
if hubPusher != nil {
driveMigrator.PushHubReport = func() {
r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), sett.GetGeoRestriction(), logger)
hubPusher.Push(r)
}
driveMigrator.PushInfraBackup = func() {
pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
}
}
driveMigrator.SyncFBMounts = func() {
webServer.SyncFileBrowserMounts()
}
webServer.SetDriveMigrator(driveMigrator)
// Wire migration-active check into backup manager
if backupMgr != nil {
backupMgr.MigrationActiveCheck = driveMigrator.IsActive
}
// Drive migration (full-drive move) has moved to the host agent (slice 8C);
// the controller no longer runs a DriveMigrator.
// --- Build HTTP mux ---
mux := http.NewServeMux()
// API routes (no auth for health endpoint, auth for everything else)
mux.HandleFunc("/api/health", apiRouter.HealthHandler)
// Storage API routes handled by web server (longer prefix takes precedence over /api/)
mux.Handle("/api/storage/", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(webServer.ServeStorageAPI))))
// Disk management API — thin proxy to the host agent (slice 8C). The agent owns
// disk execution; the controller forwards list/assign/eject/format.
mux.Handle("/api/disks", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(webServer.ServeDiskAPI))))
mux.Handle("/api/disks/", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(webServer.ServeDiskAPI))))
// App export/import API routes handled by web server
mux.Handle("/api/export/", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(webServer.ServeExportAPI))))
// Debug API routes handled by web server (debug-mode gating inside handler)
@@ -1028,102 +861,6 @@ func (a *geoStackAdapter) GetDeployedHostnames() map[string]string {
return result
}
// watchdogStackAdapter implements monitor.WatchdogStackProvider using stacks.Manager.
type watchdogStackAdapter struct {
mgr *stacks.Manager
}
func (a *watchdogStackAdapter) ListDeployedStacks() []monitor.WatchdogStackInfo {
var result []monitor.WatchdogStackInfo
for _, s := range a.mgr.GetStacks() {
if !s.Deployed {
continue
}
result = append(result, monitor.WatchdogStackInfo{Name: s.Name})
}
return result
}
func (a *watchdogStackAdapter) GetStackHDDPath(name string) string {
s, ok := a.mgr.GetStack(name)
if !ok {
return ""
}
stackDir := filepath.Dir(s.ComposePath)
appCfg := stacks.LoadAppConfig(stackDir)
if appCfg != nil && appCfg.Env["HDD_PATH"] != "" {
return filepath.Clean(appCfg.Env["HDD_PATH"])
}
return ""
}
func (a *watchdogStackAdapter) StopStack(name string) error {
return a.mgr.StopStack(name)
}
func (a *watchdogStackAdapter) StartStack(name string) error {
return a.mgr.StartStack(name)
}
// driveMigrateStackAdapter implements storage.StackProviderForMigration using stacks.Manager.
type driveMigrateStackAdapter struct {
mgr *stacks.Manager
}
func (a *driveMigrateStackAdapter) ListDeployedStacks() []storage.StackSummaryForMigration {
var result []storage.StackSummaryForMigration
for _, s := range a.mgr.GetStacks() {
if !s.Deployed {
continue
}
result = append(result, storage.StackSummaryForMigration{
Name: s.Name,
DisplayName: s.Meta.DisplayName,
})
}
return result
}
func (a *driveMigrateStackAdapter) GetStackHDDPath(name string) string {
s, ok := a.mgr.GetStack(name)
if !ok {
return ""
}
stackDir := filepath.Dir(s.ComposePath)
appCfg := stacks.LoadAppConfig(stackDir)
if appCfg != nil && appCfg.Env["HDD_PATH"] != "" {
return filepath.Clean(appCfg.Env["HDD_PATH"])
}
return ""
}
func (a *driveMigrateStackAdapter) StopStack(name string) error {
return a.mgr.StopStack(name)
}
func (a *driveMigrateStackAdapter) StartStack(name string) error {
return a.mgr.StartStack(name)
}
func (a *driveMigrateStackAdapter) UpdateStackHDDPath(name, newPath string) error {
s, ok := a.mgr.GetStack(name)
if !ok {
return fmt.Errorf("stack not found: %s", name)
}
stackDir := filepath.Dir(s.ComposePath)
appCfg := stacks.LoadAppConfig(stackDir)
if appCfg == nil {
return fmt.Errorf("app.yaml not found for stack: %s", name)
}
appCfg.Env["HDD_PATH"] = newPath
return stacks.SaveAppConfig(stackDir, appCfg, nil, nil)
}
func (a *driveMigrateStackAdapter) StackExists(name string) bool {
_, ok := a.mgr.GetStack(name)
return ok
}
// exportAdapter implements appexport.ExportStackProvider using stacks.Manager.
type exportAdapter struct {
mgr *stacks.Manager
@@ -1271,36 +1008,6 @@ func (a *exportAdapter) RemoveStackVolumes(name string) error {
return nil
}
// pushInfraBackup builds and sends the infrastructure snapshot to the Hub.
func pushInfraBackup(cfg *config.Config, sett *settings.Settings,
stackProv *stackAdapter, pusher *report.Pusher, logger *log.Logger) {
encKeyPath := filepath.Join(cfg.Paths.DataDir, "encryption.key")
ib, err := report.BuildInfraBackup(
cfg.Customer.ID, cfg.Customer.Domain, Version,
"/opt/docker/felhom-controller/controller.yaml",
filepath.Join(cfg.Paths.DataDir, "settings.json"),
cfg.Backup.ResticPasswordFile,
encKeyPath,
cfg.Paths.SystemDataPath,
sett, stackProv, logger,
)
if err != nil {
logger.Printf("[WARN] Failed to build infra backup: %v", err)
return
}
data, err := json.Marshal(ib)
if err != nil {
logger.Printf("[WARN] Failed to marshal infra backup: %v", err)
return
}
if err := pusher.PushInfraBackup(data); err != nil {
logger.Printf("[WARN] Failed to push infra backup to Hub: %v", err)
}
}
// fileExists returns true if the path exists (file or directory).
func fileExists(path string) bool {
_, err := os.Stat(path)
@@ -1461,51 +1168,6 @@ func runSetupMode(cfg *config.Config, logger *log.Logger) {
logger.Println("[INFO] Setup wizard stopped")
}
// writeLocalInfraBackup builds an infra snapshot and writes it to all connected drives.
func writeLocalInfraBackup(cfg *config.Config, sett *settings.Settings,
stackProv *stackAdapter, logger *log.Logger) {
encKeyPath := filepath.Join(cfg.Paths.DataDir, "encryption.key")
ib, err := report.BuildInfraBackup(
cfg.Customer.ID, cfg.Customer.Domain, Version,
"/opt/docker/felhom-controller/controller.yaml",
filepath.Join(cfg.Paths.DataDir, "settings.json"),
cfg.Backup.ResticPasswordFile,
encKeyPath,
cfg.Paths.SystemDataPath,
sett, stackProv, logger,
)
if err != nil {
logger.Printf("[WARN] Failed to build infra backup for local write: %v", err)
return
}
data, err := json.Marshal(ib)
if err != nil {
logger.Printf("[WARN] Failed to marshal infra backup for local write: %v", err)
return
}
// Collect all connected drive paths (skip disconnected and decommissioned)
var drives []string
for _, sp := range sett.GetStoragePaths() {
if !sp.Disconnected && !sp.Decommissioned {
drives = append(drives, sp.Path)
}
}
// Also include system data path if set
if cfg.Paths.SystemDataPath != "" {
drives = append(drives, cfg.Paths.SystemDataPath)
}
if len(drives) == 0 {
logger.Println("[DEBUG] No connected drives for local infra backup")
return
}
backup.WriteLocalInfraBackup(data, cfg.Customer.ID, Version, ib.Timestamp, drives, logger, cfg.Logging.Level == "debug")
}
// discoverHDDPaths scans deployed apps' app.yaml for HDD_PATH env values.
func discoverHDDPaths(stacksDir string, logger *log.Logger) []string {
entries, err := os.ReadDir(stacksDir)