package backup import ( "context" "fmt" "log" "os" "path/filepath" "strings" "sync" "time" "gitea.dooplex.hu/admin/felhom-controller/internal/config" "gitea.dooplex.hu/admin/felhom-controller/internal/monitor" "gitea.dooplex.hu/admin/felhom-controller/internal/settings" ) // Manager orchestrates database dumps and restic backups. type Manager struct { cfg *config.Config restic *ResticManager logger *log.Logger pinger *monitor.Pinger settings *settings.Settings stackProvider StackDataProvider mu sync.Mutex lastDBDump *DBDumpStatus lastBackup *BackupStatus running bool snapshotHistory []SnapshotRecord // ring buffer, last 20 entries lastCheckTime time.Time lastCheckOK bool // Cached status for page rendering (refreshed periodically) cachedStatus *FullBackupStatus cacheTime time.Time // AfterBackup is called after a backup completes to refresh the cache. // Set by main.go to avoid circular import with scheduler. AfterBackup func() } // SnapshotRecord combines restic snapshot metadata with our run stats. type SnapshotRecord struct { SnapshotID string `json:"snapshot_id"` Time time.Time `json:"time"` FilesNew int `json:"files_new"` FilesChanged int `json:"files_changed"` DataAdded string `json:"data_added"` Duration time.Duration `json:"duration"` Success bool `json:"success"` HasStats bool `json:"has_stats"` // false for historical entries loaded from restic } // CrossDriveSummaryItem holds display data for one app's cross-drive backup. type CrossDriveSummaryItem struct { StackName string DisplayName string Method string // "rsync" or "restic" MethodLabel string // "Egyszerű másolat" or "Restic" DestPath string DestLabel string // storage path label Schedule string ScheduleLabel string // "Naponta" or "Hetente" or "Kézi" LastStatus string // "ok", "error", "running", "" LastRunShort string // formatted short time e.g. "03:15" SizeHuman string } // FullBackupStatus contains everything the backup page needs. type FullBackupStatus struct { Enabled bool Running bool // DB Dumps LastDBDump *DBDumpStatus DumpFiles []DumpFileInfo DiscoveredDBs []DiscoveredDB // Restic LastBackup *BackupStatus SnapshotHistory []SnapshotRecord RepoStats *RepoStats // Schedule DBDumpSchedule string ResticSchedule string PruneSchedule string NextDBDump time.Time NextBackup time.Time Retention config.RetentionConfig // Repository health RepoPath string BackupPaths []string LastCheckTime time.Time LastCheckOK bool // Remote (placeholder) RemoteEnabled bool // App data backup AppDataInfo []AppBackupInfo // Cross-drive backup summary CrossDriveSummary []CrossDriveSummaryItem UnconfiguredApps []CrossDriveSummaryItem // apps with HDD data but no cross-drive config CrossDriveWarnings []string // destination health warnings // Flash messages (set by handlers, passed through redirect) FlashSuccess string FlashError string } // DBDumpStatus holds the last DB dump result. type DBDumpStatus struct { LastRun time.Time Results []DumpResult Success bool Duration time.Duration } // BackupStatus holds the last backup result. type BackupStatus struct { LastRun time.Time Snapshot *SnapshotResult Success bool Duration time.Duration RepoStats *RepoStats } // NewManager creates a new backup manager. func NewManager(cfg *config.Config, pinger *monitor.Pinger, sett *settings.Settings, logger *log.Logger) *Manager { return &Manager{ cfg: cfg, restic: NewResticManager(cfg, logger), logger: logger, pinger: pinger, settings: sett, } } // RunDBDumps discovers and dumps all databases. func (m *Manager) RunDBDumps(ctx context.Context) error { start := time.Now() m.logger.Printf("[INFO] Starting database dump run") dbs, err := DiscoverDatabases(ctx, m.logger) if err != nil { m.logger.Printf("[ERROR] Database discovery failed: %v", err) return err } if len(dbs) == 0 { m.logger.Printf("[INFO] No database containers found") m.mu.Lock() m.lastDBDump = &DBDumpStatus{ LastRun: time.Now(), Success: true, Duration: time.Since(start), } m.mu.Unlock() return nil } m.logger.Printf("[INFO] Discovered %d database(s): %s", len(dbs), dbNames(dbs)) results := DumpAll(ctx, dbs, m.cfg.Paths.DBDumpDir, m.logger) // Check results and persist validations allOK := true var summary []string var totalSize int64 for _, r := range results { if r.Error != nil { allOK = false summary = append(summary, fmt.Sprintf("FAIL %s: %v", r.DB.ContainerName, r.Error)) m.logger.Printf("[ERROR] DB dump failed for %s: %v", r.DB.ContainerName, r.Error) } else { totalSize += r.Size summary = append(summary, fmt.Sprintf("OK %s (%s)", r.DB.ContainerName, formatBytes(r.Size))) // Persist validation result to settings.json if m.settings != nil && r.FilePath != "" { filename := filepath.Base(r.FilePath) cache := settings.DBValidationCache{ ValidatedAt: time.Now().Format(time.RFC3339), TableCount: r.Validation.TableCount, HasHeader: r.Validation.Valid, } if !r.Validation.Valid { cache.Error = r.Validation.Error } if err := m.settings.SetDBValidation(filename, cache); err != nil { m.logger.Printf("[WARN] Failed to cache validation for %s: %v", filename, err) } } } } duration := time.Since(start) m.mu.Lock() m.lastDBDump = &DBDumpStatus{ LastRun: time.Now(), Results: results, Success: allOK, Duration: duration, } m.mu.Unlock() // Ping healthcheck uuid := m.cfg.Monitoring.PingUUIDs.DBDump body := fmt.Sprintf("DB dump: %d databases, %s total\n%s", len(results), formatBytes(totalSize), strings.Join(summary, "\n")) if allOK { m.pinger.Ping(uuid, body) m.logger.Printf("[INFO] DB dump completed: %d databases, %s total (%s)", len(results), formatBytes(totalSize), duration.Round(time.Millisecond)) } else { m.pinger.Fail(uuid, body) return fmt.Errorf("some database dumps failed") } return nil } // RunBackup runs a restic backup snapshot. func (m *Manager) RunBackup(ctx context.Context) error { start := time.Now() m.logger.Printf("[INFO] Starting restic backup") // Ensure repo is initialized if err := m.restic.EnsureInitialized(); err != nil { m.logger.Printf("[ERROR] Restic init failed: %v", err) m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Restic init failed: %v", err)) return err } // Backup paths: base + dynamic app data paths := []string{ m.cfg.Paths.StacksDir, m.cfg.Paths.DBDumpDir, "/opt/docker/felhom-controller/controller.yaml", } appPaths := m.resolveAppBackupPaths() if len(appPaths) > 0 { paths = append(paths, appPaths...) m.logger.Printf("[INFO] Backup paths (%d total, %d app data): %v", len(paths), len(appPaths), paths) } tags := []string{"felhom", m.cfg.Customer.ID} result, err := m.restic.Snapshot(paths, tags) if err != nil { m.logger.Printf("[ERROR] Restic backup failed: %v", err) m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", err)) m.mu.Lock() m.lastBackup = &BackupStatus{ LastRun: time.Now(), Success: false, Duration: time.Since(start), } m.mu.Unlock() return err } // Prune check (weekly — Sunday) if shouldPrune(m.cfg.Backup.PruneSchedule) { m.logger.Printf("[INFO] Running weekly prune") if err := m.restic.Prune(m.cfg.Backup.Retention); err != nil { m.logger.Printf("[WARN] Restic prune failed: %v", err) } checkErr := m.restic.Check() if checkErr != nil { m.logger.Printf("[WARN] Restic check failed: %v", checkErr) } m.mu.Lock() m.lastCheckTime = time.Now() m.lastCheckOK = checkErr == nil m.mu.Unlock() } // Get stats stats, _ := m.restic.Stats() duration := time.Since(start) m.mu.Lock() m.lastBackup = &BackupStatus{ LastRun: time.Now(), Snapshot: result, Success: true, Duration: duration, RepoStats: stats, } // Append to snapshot history m.appendSnapshotRecord(SnapshotRecord{ SnapshotID: result.SnapshotID, Time: time.Now(), FilesNew: result.FilesNew, FilesChanged: result.FilesChanged, DataAdded: result.DataAdded, Duration: duration, Success: true, HasStats: true, }) m.mu.Unlock() body := fmt.Sprintf("Backup OK\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s", result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded, duration.Round(time.Second)) m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body) m.logger.Printf("[INFO] Restic backup completed: snapshot %s, %d new, %d changed, %s added (%s)", result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded, duration.Round(time.Millisecond)) // Refresh cache so the page shows updated data immediately if m.AfterBackup != nil { m.AfterBackup() } return nil } // RunIntegrityCheck runs restic check and pings healthchecks with the result. func (m *Manager) RunIntegrityCheck(ctx context.Context) error { m.logger.Printf("[INFO] Starting restic integrity check") start := time.Now() if err := m.restic.EnsureInitialized(); err != nil { m.logger.Printf("[ERROR] Restic init failed for integrity check: %v", err) return err } err := m.restic.Check() duration := time.Since(start) uuid := m.cfg.Monitoring.PingUUIDs.BackupIntegrity m.mu.Lock() m.lastCheckTime = time.Now() m.lastCheckOK = err == nil m.mu.Unlock() if err != nil { m.logger.Printf("[ERROR] Restic integrity check failed (%s): %v", duration.Round(time.Second), err) m.pinger.Fail(uuid, fmt.Sprintf("restic check failed: %v", err)) return err } m.logger.Printf("[INFO] Restic integrity check passed (%s)", duration.Round(time.Second)) m.pinger.Ping(uuid, fmt.Sprintf("restic check passed (%s)", duration.Round(time.Second))) return nil } // RunFullBackup runs DB dumps followed by restic backup. func (m *Manager) RunFullBackup(ctx context.Context) error { m.mu.Lock() if m.running { m.mu.Unlock() return fmt.Errorf("backup already in progress") } m.running = true m.mu.Unlock() defer func() { m.mu.Lock() m.running = false m.mu.Unlock() }() // Step 1: DB dumps if err := m.RunDBDumps(ctx); err != nil { m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway") } // Step 2: Restic backup return m.RunBackup(ctx) } // GetStatus returns the current backup status. func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) { m.mu.Lock() defer m.mu.Unlock() return m.lastDBDump, m.lastBackup } // GetRepoStats returns repository statistics. func (m *Manager) GetRepoStats() (*RepoStats, error) { return m.restic.Stats() } // IsRunning returns whether a backup or restore is currently in progress. func (m *Manager) IsRunning() bool { m.mu.Lock() defer m.mu.Unlock() return m.running } // GetResticPassword returns the restic repository encryption password. func (m *Manager) GetResticPassword() (string, error) { return m.restic.GetPassword() } // ListSnapshots returns snapshots from the restic repository. func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) { return m.restic.ListSnapshots(limit) } // SetStackProvider sets the stack data provider for app data discovery. func (m *Manager) SetStackProvider(provider StackDataProvider) { m.stackProvider = provider } // GetStackHDDMounts returns HDD mount paths for the named stack via the stack provider. func (m *Manager) GetStackHDDMounts(name string) []string { if m.stackProvider == nil { return nil } return m.stackProvider.GetStackHDDMounts(name) } // resolveAppBackupPaths returns HDD paths for all enabled app backups. func (m *Manager) resolveAppBackupPaths() []string { if m.stackProvider == nil || m.settings == nil { return nil } appBackupMap := m.settings.GetAppBackupMap() if len(appBackupMap) == 0 { return nil } var paths []string seen := make(map[string]bool) for stackName, enabled := range appBackupMap { if !enabled { continue } hddMounts := m.stackProvider.GetStackHDDMounts(stackName) for _, mount := range hddMounts { if seen[mount] { continue } if _, err := os.Stat(mount); err == nil { paths = append(paths, mount) seen[mount] = true m.logger.Printf("[DEBUG] Including app data: %s (from %s)", mount, stackName) } } } return paths } func shouldPrune(schedule string) bool { loc, err := time.LoadLocation("Europe/Budapest") if err != nil { loc = time.UTC } now := time.Now().In(loc) switch strings.ToLower(schedule) { case "weekly": return now.Weekday() == time.Sunday case "daily": return true default: return now.Weekday() == time.Sunday } } // appendSnapshotRecord adds a record to the ring buffer (max 20). Caller must hold m.mu. func (m *Manager) appendSnapshotRecord(rec SnapshotRecord) { m.snapshotHistory = append(m.snapshotHistory, rec) if len(m.snapshotHistory) > 20 { m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:] } } // LoadSnapshotHistory populates the snapshot history from restic on startup. func (m *Manager) LoadSnapshotHistory() { snapshots, err := m.restic.ListSnapshots(20) if err != nil { m.logger.Printf("[WARN] Could not load snapshot history: %v", err) return } m.mu.Lock() defer m.mu.Unlock() for _, s := range snapshots { m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{ SnapshotID: s.ID, Time: s.Time, HasStats: false, // historical — no delta stats available Success: true, }) } if len(m.snapshotHistory) > 20 { m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:] } m.logger.Printf("[INFO] Loaded %d historical snapshots", len(m.snapshotHistory)) } // RefreshCache updates the cached full status. Called by scheduler every 5 minutes // and after each backup run. func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) { status := &FullBackupStatus{ Enabled: m.cfg.Backup.Enabled, DBDumpSchedule: m.cfg.Backup.DBDumpSchedule, ResticSchedule: m.cfg.Backup.ResticSchedule, PruneSchedule: m.cfg.Backup.PruneSchedule, NextDBDump: nextDBDump, NextBackup: nextBackup, Retention: m.cfg.Backup.Retention, RepoPath: m.cfg.Backup.ResticRepo, BackupPaths: []string{ m.cfg.Paths.StacksDir, m.cfg.Paths.DBDumpDir, "/opt/docker/felhom-controller/controller.yaml", }, } // Expensive calls (outside lock) if stats, err := m.restic.Stats(); err == nil { status.RepoStats = stats } files, filesErr := ListDumpFiles(m.cfg.Paths.DBDumpDir) if filesErr == nil { status.DumpFiles = files } ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil { status.DiscoveredDBs = dbs } // Discover app data (for per-app backup toggles) if m.stackProvider != nil { backupPrefs := m.settings.GetAppBackupMap() status.AppDataInfo = DiscoverAppData(m.stackProvider, backupPrefs, status.DiscoveredDBs) // Include enabled app backup paths in the displayed BackupPaths appPaths := m.resolveAppBackupPaths() if len(appPaths) > 0 { status.BackupPaths = append(status.BackupPaths, appPaths...) } } // Cross-check: if LastDBDump results have empty validation but files exist, // re-validate from disk. This handles controller restarts and race conditions. if m.lastDBDump != nil && filesErr == nil { fileValidation := make(map[string]DumpValidation) // keyed by filename for _, f := range files { fileValidation[f.FileName] = f.Validation } for i, r := range m.lastDBDump.Results { if !r.Validation.Valid && r.Validation.Error == "" && r.FilePath != "" { filename := filepath.Base(r.FilePath) if fv, ok := fileValidation[filename]; ok { m.lastDBDump.Results[i].Validation = fv m.logger.Printf("[INFO] Re-validated %s from disk: valid=%v tables=%d", filename, fv.Valid, fv.TableCount) } } } } // Fill in dynamic fields under lock m.mu.Lock() status.Running = m.running status.LastDBDump = m.lastDBDump status.LastBackup = m.lastBackup status.LastCheckTime = m.lastCheckTime status.LastCheckOK = m.lastCheckOK status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory)) copy(status.SnapshotHistory, m.snapshotHistory) m.cachedStatus = status m.cacheTime = time.Now() m.mu.Unlock() // Reverse so newest first for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 { status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i] } m.logger.Printf("[INFO] Backup status cache refreshed") } // GetFullStatus returns the cached backup status for page rendering. // Returns instantly — no subprocess calls. // Returns a deep copy so callers can safely append to slice fields without // polluting the cache (which would cause duplicate entries on repeated calls). func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupStatus { m.mu.Lock() defer m.mu.Unlock() if m.cachedStatus != nil { // Deep copy — callers (backupsHandler) append to CrossDriveSummary, // UnconfiguredApps, and CrossDriveWarnings. If we returned the cache // pointer directly, every page load would accumulate more entries. status := *m.cachedStatus status.AppDataInfo = make([]AppBackupInfo, len(m.cachedStatus.AppDataInfo)) copy(status.AppDataInfo, m.cachedStatus.AppDataInfo) // These three slices are assembled by the handler from AppDataInfo + settings; // they must always start empty so the handler builds them fresh. status.CrossDriveSummary = nil status.UnconfiguredApps = nil status.CrossDriveWarnings = nil // Update dynamic fields that don't need subprocess calls status.Running = m.running status.NextDBDump = nextDBDump status.NextBackup = nextBackup status.LastDBDump = m.lastDBDump status.LastBackup = m.lastBackup // Update snapshot history status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory)) copy(status.SnapshotHistory, m.snapshotHistory) // Reverse so newest first for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 { status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i] } // Synthesize LastBackup from snapshot history if not in memory (e.g., after restart) if status.LastBackup == nil && len(status.SnapshotHistory) > 0 { latest := status.SnapshotHistory[0] // already reversed, newest first status.LastBackup = &BackupStatus{ LastRun: latest.Time, Success: latest.Success, Snapshot: &SnapshotResult{ SnapshotID: latest.SnapshotID, }, } } // Synthesize LastDBDump from DumpFiles on disk if not in memory if status.LastDBDump == nil && len(status.DumpFiles) > 0 { var results []DumpResult var latestTime time.Time for _, f := range status.DumpFiles { results = append(results, DumpResult{ DB: DiscoveredDB{StackName: f.StackName, DBType: f.DBType, ContainerName: f.StackName}, FilePath: f.FileName, Size: f.Size, }) if f.ModTime.After(latestTime) { latestTime = f.ModTime } } status.LastDBDump = &DBDumpStatus{ LastRun: latestTime, Results: results, Success: true, } } return &status } // No cache yet — return a minimal status (first page load before cache is populated) return &FullBackupStatus{ Enabled: m.cfg.Backup.Enabled, Running: m.running, LastDBDump: m.lastDBDump, LastBackup: m.lastBackup, DBDumpSchedule: m.cfg.Backup.DBDumpSchedule, ResticSchedule: m.cfg.Backup.ResticSchedule, PruneSchedule: m.cfg.Backup.PruneSchedule, NextDBDump: nextDBDump, NextBackup: nextBackup, Retention: m.cfg.Backup.Retention, RepoPath: m.cfg.Backup.ResticRepo, LastCheckTime: m.lastCheckTime, LastCheckOK: m.lastCheckOK, BackupPaths: []string{ m.cfg.Paths.StacksDir, m.cfg.Paths.DBDumpDir, "/opt/docker/felhom-controller/controller.yaml", }, } } func dbNames(dbs []DiscoveredDB) string { var names []string for _, db := range dbs { names = append(names, fmt.Sprintf("%s(%s)", db.ContainerName, db.DBType)) } return strings.Join(names, ", ") }