package backup import ( "context" "encoding/json" "fmt" "log" "os" "path/filepath" "sort" "strings" "sync" "time" "gitea.dooplex.hu/admin/felhom-controller/internal/config" "gitea.dooplex.hu/admin/felhom-controller/internal/monitor" "gitea.dooplex.hu/admin/felhom-controller/internal/settings" ) // Manager orchestrates database dumps and restic backups. type Manager struct { cfg *config.Config restic *ResticManager logger *log.Logger pinger *monitor.Pinger settings *settings.Settings stackProvider StackDataProvider systemDataPath string // fallback drive for SSD-only apps mu sync.Mutex lastDBDump *DBDumpStatus lastBackup *BackupStatus running bool snapshotHistory []SnapshotRecord // ring buffer, last 20 entries snapshotHistoryFile string // path to persist snapshot history JSON lastCheckTime time.Time lastCheckOK bool // Cached status for page rendering (refreshed periodically) cachedStatus *FullBackupStatus cacheTime time.Time // AfterBackup is called after a backup completes to refresh the cache. // Set by main.go to avoid circular import with scheduler. AfterBackup func() // MigrationActiveCheck returns true if a full drive migration is in progress. // Set by main.go to coordinate with DriveMigrator. MigrationActiveCheck func() bool } // SnapshotRecord combines restic snapshot metadata with our run stats. type SnapshotRecord struct { SnapshotID string `json:"snapshot_id"` Time time.Time `json:"time"` FilesNew int `json:"files_new"` FilesChanged int `json:"files_changed"` DataAdded string `json:"data_added"` Duration time.Duration `json:"duration"` Success bool `json:"success"` HasStats bool `json:"has_stats"` // false for historical entries loaded from restic } // DriveRepoInfo holds per-drive restic repository statistics for the Részletek section. type DriveRepoInfo struct { DrivePath string DriveLabel string // filled by handler from settings TotalSize string TotalSizeBytes int64 SnapshotCount int } // CrossDriveSummaryItem holds display data for one app's cross-drive backup. type CrossDriveSummaryItem struct { StackName string DisplayName string Method string // "rsync" or "restic" MethodLabel string // "Egyszerű másolat" or "Restic" DestPath string DestLabel string // storage path label Schedule string ScheduleLabel string // "Naponta" or "Hetente" or "Kézi" LastStatus string // "ok", "error", "running", "" LastRunShort string // formatted short time e.g. "03:15" SizeHuman string } // FullBackupStatus contains everything the backup page needs. type FullBackupStatus struct { Enabled bool Running bool // DB Dumps LastDBDump *DBDumpStatus DumpFiles []DumpFileInfo DiscoveredDBs []DiscoveredDB // Restic LastBackup *BackupStatus SnapshotHistory []SnapshotRecord RepoStats *RepoStats PerDriveRepoStats []DriveRepoInfo // per-drive Tier 1 restic stats // Schedule DBDumpSchedule string ResticSchedule string PruneSchedule string NextDBDump time.Time NextBackup time.Time Retention config.RetentionConfig // Repository health LastCheckTime time.Time LastCheckOK bool // Remote (placeholder) RemoteEnabled bool // App data backup AppDataInfo []AppBackupInfo // Cross-drive backup summary CrossDriveSummary []CrossDriveSummaryItem UnconfiguredApps []CrossDriveSummaryItem // apps with HDD data but no cross-drive config CrossDriveWarnings []string // destination health warnings // Flash messages (set by handlers, passed through redirect) FlashSuccess string FlashError string } // DBDumpStatus holds the last DB dump result. type DBDumpStatus struct { LastRun time.Time Results []DumpResult Success bool Duration time.Duration } // BackupStatus holds the last backup result. type BackupStatus struct { LastRun time.Time Snapshot *SnapshotResult Success bool Duration time.Duration RepoStats *RepoStats } // NewManager creates a new backup manager. func NewManager(cfg *config.Config, pinger *monitor.Pinger, sett *settings.Settings, logger *log.Logger) *Manager { if cfg.Paths.SystemDataPath == "" { logger.Printf("[WARN] SystemDataPath is empty in config — SSD-only apps will not have correct backup paths") } dataDir := cfg.Paths.DataDir if dataDir == "" { dataDir = "/opt/docker/felhom-controller/data" } return &Manager{ cfg: cfg, restic: NewResticManager(cfg, logger), logger: logger, pinger: pinger, settings: sett, systemDataPath: cfg.Paths.SystemDataPath, snapshotHistoryFile: filepath.Join(dataDir, "snapshot-history.json"), } } // GetAppDrivePath returns the drive path for an app. // Uses HDD_PATH from app.yaml if set, otherwise falls back to system data path. func (m *Manager) GetAppDrivePath(stackName string) string { if m.stackProvider != nil { if hddPath := m.stackProvider.GetStackHDDPath(stackName); hddPath != "" { return hddPath } } if m.systemDataPath == "" { m.logger.Printf("[ERROR] systemDataPath is empty — cannot determine drive for %s", stackName) } return m.systemDataPath } // groupStacksByDrive groups deployed stacks by their home drive path. func (m *Manager) groupStacksByDrive() map[string][]StackSummary { if m.stackProvider == nil { return nil } result := make(map[string][]StackSummary) for _, stack := range m.stackProvider.ListDeployedStacks() { drive := m.GetAppDrivePath(stack.Name) result[drive] = append(result[drive], stack) } if m.isDebug() { for drive, stacks := range result { names := make([]string, len(stacks)) for i, s := range stacks { names[i] = s.Name } m.logger.Printf("[DEBUG] groupStacksByDrive: %s → [%s]", drive, strings.Join(names, ", ")) } } return result } // activeDrives returns sorted list of drives that have deployed apps. func (m *Manager) activeDrives() []string { groups := m.groupStacksByDrive() var drives []string var disconnected []string for d := range groups { if m.settings != nil && (m.settings.IsDisconnected(d) || m.settings.IsDecommissioned(d)) { disconnected = append(disconnected, d) } drives = append(drives, d) } sort.Strings(drives) if m.isDebug() { m.logger.Printf("[DEBUG] activeDrives: %d total (%s), %d disconnected/decommissioned", len(drives), strings.Join(drives, ", "), len(disconnected)) } return drives } // RunDBDumps discovers and dumps all databases to per-drive, per-app paths. func (m *Manager) RunDBDumps(ctx context.Context) error { if err := m.acquireRunning(); err != nil { return err } defer m.releaseRunning() return m.runDBDumpsInternal(ctx) } // runDBDumpsInternal is the implementation of RunDBDumps. Caller must hold the running flag. func (m *Manager) runDBDumpsInternal(ctx context.Context) error { start := time.Now() m.logger.Printf("[INFO] Starting database dump run") dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()) if err != nil { m.logger.Printf("[ERROR] Database discovery failed: %v", err) return err } if len(dbs) == 0 { m.logger.Printf("[INFO] No database containers found") m.mu.Lock() m.lastDBDump = &DBDumpStatus{ LastRun: time.Now(), Success: true, Duration: time.Since(start), } m.mu.Unlock() return nil } m.logger.Printf("[INFO] Discovered %d database(s): %s", len(dbs), dbNames(dbs)) // Dump each DB to its app's drive path var results []DumpResult allOK := true var summary []string var totalSize int64 for _, db := range dbs { drivePath := m.GetAppDrivePath(db.StackName) // Skip if drive is disconnected or decommissioned if m.settings != nil && m.settings.IsDisconnected(drivePath) { m.logger.Printf("[WARN] Skipping DB dump for %s — drive disconnected: %s", db.StackName, drivePath) summary = append(summary, fmt.Sprintf("SKIP %s (drive disconnected)", db.ContainerName)) continue } if m.settings != nil && m.settings.IsDecommissioned(drivePath) { m.logger.Printf("[WARN] Skipping DB dump for %s — drive decommissioned: %s", db.StackName, drivePath) summary = append(summary, fmt.Sprintf("SKIP %s (drive decommissioned)", db.ContainerName)) continue } dumpDir := AppDBDumpPath(drivePath, db.StackName) result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug()) results = append(results, result) if result.Error != nil { allOK = false summary = append(summary, fmt.Sprintf("FAIL %s: %v", result.DB.ContainerName, result.Error)) m.logger.Printf("[ERROR] DB dump failed for %s: %v", result.DB.ContainerName, result.Error) } else { totalSize += result.Size summary = append(summary, fmt.Sprintf("OK %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size))) // Persist validation result to settings.json if m.settings != nil && result.FilePath != "" { filename := filepath.Base(result.FilePath) cache := settings.DBValidationCache{ ValidatedAt: time.Now().Format(time.RFC3339), TableCount: result.Validation.TableCount, HasHeader: result.Validation.Valid, } if !result.Validation.Valid { cache.Error = result.Validation.Error } if err := m.settings.SetDBValidation(filename, cache); err != nil { m.logger.Printf("[WARN] Failed to cache validation for %s: %v", filename, err) } } } } duration := time.Since(start) m.mu.Lock() m.lastDBDump = &DBDumpStatus{ LastRun: time.Now(), Results: results, Success: allOK, Duration: duration, } m.mu.Unlock() // Ping healthcheck uuid := m.cfg.Monitoring.PingUUIDs.DBDump body := fmt.Sprintf("DB dump: %d databases, %s total\n%s", len(results), humanizeBytes(totalSize), strings.Join(summary, "\n")) if allOK { m.pinger.Ping(uuid, body) m.logger.Printf("[INFO] DB dump completed: %d databases, %s total (%s)", len(results), humanizeBytes(totalSize), duration.Round(time.Millisecond)) } else { m.pinger.Fail(uuid, body) return fmt.Errorf("some database dumps failed") } return nil } // RunBackup runs per-drive restic backup snapshots. func (m *Manager) RunBackup(ctx context.Context) error { if err := m.acquireRunning(); err != nil { return err } defer m.releaseRunning() return m.runBackupInternal(ctx) } // runBackupInternal is the implementation of RunBackup. Caller must hold the running flag. func (m *Manager) runBackupInternal(ctx context.Context) error { // Skip if a full drive migration is in progress if m.MigrationActiveCheck != nil && m.MigrationActiveCheck() { m.logger.Printf("[WARN] Skipping nightly backup — drive migration in progress") return nil } start := time.Now() m.logger.Printf("[INFO] Starting restic backup (per-drive)") driveStacks := m.groupStacksByDrive() if len(driveStacks) == 0 { m.logger.Printf("[INFO] No deployed stacks — skipping backup") return nil } // Infrastructure paths included in every drive's primary repo infraPaths := []string{ m.cfg.Paths.StacksDir, "/opt/docker/felhom-controller/controller.yaml", } var lastResult *SnapshotResult var anyErr error driveCount := 0 for drivePath, stacks := range driveStacks { if m.isDebug() { m.logger.Printf("[DEBUG] runBackupInternal: processing drive %s (%d stacks)", drivePath, len(stacks)) } result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths) if err != nil { anyErr = err continue } if result != nil { lastResult = result driveCount++ } } duration := time.Since(start) if anyErr != nil && driveCount == 0 { // All drives failed m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", anyErr)) m.mu.Lock() m.lastBackup = &BackupStatus{ LastRun: time.Now(), Success: false, Duration: duration, } m.mu.Unlock() return anyErr } // Get aggregated stats stats := m.aggregateRepoStats() m.mu.Lock() m.lastBackup = &BackupStatus{ LastRun: time.Now(), Snapshot: lastResult, Success: anyErr == nil, Duration: duration, RepoStats: stats, } if lastResult != nil { m.appendSnapshotRecord(SnapshotRecord{ SnapshotID: lastResult.SnapshotID, Time: time.Now(), FilesNew: lastResult.FilesNew, FilesChanged: lastResult.FilesChanged, DataAdded: lastResult.DataAdded, Duration: duration, Success: true, HasStats: true, }) } m.mu.Unlock() if lastResult != nil { body := fmt.Sprintf("Backup OK (%d drives)\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s", driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded, duration.Round(time.Second)) m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body) m.logger.Printf("[INFO] Restic backup completed: %d drives, snapshot %s, %d new, %d changed, %s added (%s)", driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded, duration.Round(time.Millisecond)) } // Refresh cache so the page shows updated data immediately if m.AfterBackup != nil { m.AfterBackup() } return anyErr } // backupDrive runs restic backup for a single drive. Returns nil result if skipped. // Caller must hold the running flag. func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []StackSummary, infraPaths []string) (*SnapshotResult, error) { // Skip disconnected or decommissioned drives if m.settings != nil && m.settings.IsDisconnected(drivePath) { m.logger.Printf("[WARN] Skipping backup for drive %s — disconnected", drivePath) return nil, nil } if m.settings != nil && m.settings.IsDecommissioned(drivePath) { m.logger.Printf("[WARN] Skipping backup for drive %s — decommissioned", drivePath) return nil, nil } repoPath := PrimaryResticRepoPath(drivePath) // Ensure repo is initialized if err := m.restic.EnsureInitialized(repoPath); err != nil { m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err) return nil, err } // Build paths for this drive var paths []string paths = append(paths, infraPaths...) for _, stack := range stacks { // App data (appdata//) appData := AppDataDir(drivePath, stack.Name) if _, err := os.Stat(appData); err == nil { paths = append(paths, appData) } // HDD mounts (for apps with custom mount points) if m.stackProvider != nil { for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) { if _, err := os.Stat(mount); err == nil { paths = append(paths, mount) } } } // DB dumps for this stack dumpDir := AppDBDumpPath(drivePath, stack.Name) if _, err := os.Stat(dumpDir); err == nil { paths = append(paths, dumpDir) } } // Deduplicate paths paths = dedup(paths) if m.isDebug() { m.logger.Printf("[DEBUG] backupDrive %s: repo=%s, %d include paths:", drivePath, repoPath, len(paths)) for _, p := range paths { m.logger.Printf("[DEBUG] %s", p) } } tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)} m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths)) result, err := m.restic.Snapshot(repoPath, paths, tags) if err != nil { m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err) return nil, err } // Prune check (weekly — Sunday) if shouldPrune(m.cfg.Backup.PruneSchedule) { m.logger.Printf("[INFO] Running weekly prune for %s", repoPath) if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil { m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err) } } return result, nil } // tryAcquireRunning attempts to set the running flag without blocking. // Returns true if acquired, false if already running. func (m *Manager) tryAcquireRunning() bool { m.mu.Lock() defer m.mu.Unlock() if m.running { return false } m.running = true return true } // TryRunDriveBackup runs a backup for a single drive if no other backup is in progress. // Returns error if the backup lock cannot be acquired or if backup fails. func (m *Manager) TryRunDriveBackup(ctx context.Context, drivePath string) error { if !m.tryAcquireRunning() { return fmt.Errorf("backup already in progress") } defer m.releaseRunning() driveStacks := m.groupStacksByDrive() stacks, ok := driveStacks[drivePath] if !ok || len(stacks) == 0 { m.logger.Printf("[INFO] No deployed stacks on drive %s — skipping backup", drivePath) return nil } infraPaths := []string{ m.cfg.Paths.StacksDir, "/opt/docker/felhom-controller/controller.yaml", } result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths) if err != nil { return err } if result != nil { m.logger.Printf("[INFO] Single-drive backup for %s: snapshot %s, %d new, %d changed, %s added", drivePath, result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded) } return nil } // RunIntegrityCheck runs restic check on all primary repos and pings healthchecks. func (m *Manager) RunIntegrityCheck(ctx context.Context) error { m.logger.Printf("[INFO] Starting restic integrity check") start := time.Now() drives := m.activeDrives() if len(drives) == 0 { m.logger.Printf("[INFO] No active drives — skipping integrity check") return nil } if m.isDebug() { m.logger.Printf("[DEBUG] RunIntegrityCheck: checking %d drives", len(drives)) } var checkErr error for _, drive := range drives { repoPath := PrimaryResticRepoPath(drive) if !m.restic.RepoExists(repoPath) { if m.isDebug() { m.logger.Printf("[DEBUG] RunIntegrityCheck: skipping %s (repo does not exist)", repoPath) } continue } if m.isDebug() { m.logger.Printf("[DEBUG] RunIntegrityCheck: checking repo %s", repoPath) } if err := m.restic.Check(repoPath); err != nil { m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err) checkErr = err } else if m.isDebug() { m.logger.Printf("[DEBUG] RunIntegrityCheck: repo %s OK", repoPath) } } duration := time.Since(start) uuid := m.cfg.Monitoring.PingUUIDs.BackupIntegrity m.mu.Lock() m.lastCheckTime = time.Now() m.lastCheckOK = checkErr == nil m.mu.Unlock() if checkErr != nil { m.logger.Printf("[ERROR] Restic integrity check failed (%s): %v", duration.Round(time.Second), checkErr) m.pinger.Fail(uuid, fmt.Sprintf("restic check failed: %v", checkErr)) return checkErr } m.logger.Printf("[INFO] Restic integrity check passed (%d repos, %s)", len(drives), duration.Round(time.Second)) m.pinger.Ping(uuid, fmt.Sprintf("restic check passed (%d repos, %s)", len(drives), duration.Round(time.Second))) return nil } // RunFullBackup runs DB dumps followed by restic backup. func (m *Manager) RunFullBackup(ctx context.Context) error { if err := m.acquireRunning(); err != nil { return err } defer m.releaseRunning() if m.isDebug() { drives := m.activeDrives() driveStacks := m.groupStacksByDrive() totalStacks := 0 for _, s := range driveStacks { totalStacks += len(s) } m.logger.Printf("[DEBUG] RunFullBackup: starting full backup — %d active drives, %d stacks", len(drives), totalStacks) } // Step 1: DB dumps if m.isDebug() { m.logger.Printf("[DEBUG] RunFullBackup: phase 1 — database dumps") } if err := m.runDBDumpsInternal(ctx); err != nil { m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway") } // Step 2: Restic backup if m.isDebug() { m.logger.Printf("[DEBUG] RunFullBackup: phase 2 — restic snapshots") } return m.runBackupInternal(ctx) } // GetStatus returns the current backup status. func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) { m.mu.Lock() defer m.mu.Unlock() return m.lastDBDump, m.lastBackup } // GetRepoStats returns aggregated repository statistics across all primary repos. func (m *Manager) GetRepoStats() (*RepoStats, error) { stats := m.aggregateRepoStats() if stats.SnapshotCount == 0 && stats.TotalSize == "" { return stats, fmt.Errorf("no repos available") } return stats, nil } // IsRunning returns whether a backup or restore is currently in progress. func (m *Manager) IsRunning() bool { m.mu.Lock() defer m.mu.Unlock() return m.running } // acquireRunning atomically sets the running flag. Returns error if already running. func (m *Manager) acquireRunning() error { m.mu.Lock() defer m.mu.Unlock() if m.running { return fmt.Errorf("backup already in progress") } m.running = true return nil } // releaseRunning clears the running flag. func (m *Manager) releaseRunning() { m.mu.Lock() m.running = false m.mu.Unlock() } // GetResticPassword returns the restic repository encryption password. func (m *Manager) GetResticPassword() (string, error) { return m.restic.GetPassword() } // ListSnapshots returns snapshots from all primary restic repositories, merged and sorted. func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) { drives := m.activeDrives() var allSnapshots []SnapshotInfo for _, drive := range drives { repoPath := PrimaryResticRepoPath(drive) if !m.restic.RepoExists(repoPath) { continue } snapshots, err := m.restic.ListSnapshots(repoPath, 0) if err != nil { m.logger.Printf("[WARN] Could not list snapshots from %s: %v", repoPath, err) continue } for i := range snapshots { snapshots[i].RepoPath = repoPath } allSnapshots = append(allSnapshots, snapshots...) } // Sort newest first sort.Slice(allSnapshots, func(i, j int) bool { return allSnapshots[i].Time.After(allSnapshots[j].Time) }) if limit > 0 && len(allSnapshots) > limit { allSnapshots = allSnapshots[:limit] } return allSnapshots, nil } // ListAllSnapshots returns snapshots from primary restic repos across all active drives. // All snapshots get Tier=1. func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) { drives := m.activeDrives() var allSnapshots []SnapshotInfo for _, drive := range drives { repoPath := PrimaryResticRepoPath(drive) if !m.restic.RepoExists(repoPath) { continue } snapshots, err := m.restic.ListSnapshots(repoPath, 0) if err != nil { m.logger.Printf("[WARN] Could not list snapshots from %s: %v", repoPath, err) continue } for i := range snapshots { snapshots[i].RepoPath = repoPath snapshots[i].Tier = 1 } allSnapshots = append(allSnapshots, snapshots...) } // Sort newest first sort.Slice(allSnapshots, func(i, j int) bool { return allSnapshots[i].Time.After(allSnapshots[j].Time) }) if limit > 0 && len(allSnapshots) > limit { allSnapshots = allSnapshots[:limit] } return allSnapshots, nil } // SetStackProvider sets the stack data provider for app data discovery. // C3: Write is protected by mutex since stackProvider is read by concurrent goroutines. func (m *Manager) SetStackProvider(provider StackDataProvider) { m.mu.Lock() m.stackProvider = provider m.mu.Unlock() } // UnlockRepo runs restic unlock on the given repo path. func (m *Manager) UnlockRepo(ctx context.Context, repoPath string) error { if !m.restic.RepoExists(repoPath) { return nil // no repo to unlock } cmd := m.restic.UnlockCommand(ctx, repoPath) out, err := cmd.CombinedOutput() if err != nil { return fmt.Errorf("restic unlock: %v (%s)", err, strings.TrimSpace(string(out))) } m.logger.Printf("[INFO] Restic repo unlocked: %s", repoPath) return nil } // GetStackHDDMounts returns HDD mount paths for the named stack via the stack provider. func (m *Manager) GetStackHDDMounts(name string) []string { if m.stackProvider == nil { return nil } return m.stackProvider.GetStackHDDMounts(name) } // DumpStackDB runs a database dump for containers belonging to a specific stack. // Dumps to the stack's home drive: /backups/primary//db-dumps/. func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error { dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()) if err != nil { return fmt.Errorf("database discovery failed: %w", err) } var stackDBs []DiscoveredDB for _, db := range dbs { if db.StackName == stackName { stackDBs = append(stackDBs, db) } } if len(stackDBs) == 0 { m.logger.Printf("[DEBUG] No databases found for stack %s — skipping pre-backup dump", stackName) return nil } drivePath := m.GetAppDrivePath(stackName) if drivePath == "" || !filepath.IsAbs(drivePath) { return fmt.Errorf("cannot determine absolute drive path for %s (systemDataPath not configured?)", stackName) } dumpDir := AppDBDumpPath(drivePath, stackName) m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir) for _, db := range stackDBs { result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug()) if result.Error != nil { return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error) } m.logger.Printf("[INFO] Pre-backup DB dump OK: %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size)) // Persist validation to settings if m.settings != nil && result.FilePath != "" { filename := filepath.Base(result.FilePath) cache := settings.DBValidationCache{ ValidatedAt: time.Now().Format(time.RFC3339), TableCount: result.Validation.TableCount, HasHeader: result.Validation.Valid, } if !result.Validation.Valid { cache.Error = result.Validation.Error } _ = m.settings.SetDBValidation(filename, cache) } } return nil } // perDriveRepoStats returns per-drive restic repository statistics for the Részletek section. func (m *Manager) perDriveRepoStats() []DriveRepoInfo { drives := m.activeDrives() var infos []DriveRepoInfo for _, drive := range drives { repoPath := PrimaryResticRepoPath(drive) if !m.restic.RepoExists(repoPath) { continue } stats, err := m.restic.Stats(repoPath) if err != nil { continue } infos = append(infos, DriveRepoInfo{ DrivePath: drive, TotalSize: stats.TotalSize, TotalSizeBytes: stats.TotalSizeBytes, SnapshotCount: stats.SnapshotCount, }) } return infos } // aggregateRepoStats combines stats from all primary restic repos. func (m *Manager) aggregateRepoStats() *RepoStats { drives := m.activeDrives() agg := &RepoStats{} var totalBytes int64 for _, drive := range drives { repoPath := PrimaryResticRepoPath(drive) if !m.restic.RepoExists(repoPath) { continue } stats, err := m.restic.Stats(repoPath) if err != nil { continue } agg.SnapshotCount += stats.SnapshotCount totalBytes += stats.TotalSizeBytes if stats.LatestSnapshot != nil { if agg.LatestSnapshot == nil || stats.LatestSnapshot.Time.After(agg.LatestSnapshot.Time) { agg.LatestSnapshot = stats.LatestSnapshot } } } agg.TotalSizeBytes = totalBytes if totalBytes > 0 { agg.TotalSize = humanizeBytes(totalBytes) } return agg } // listAllDumpFiles scans per-drive per-stack DB dump directories. func (m *Manager) listAllDumpFiles() []DumpFileInfo { var allFiles []DumpFileInfo for drive, stacks := range m.groupStacksByDrive() { for _, stack := range stacks { dumpDir := AppDBDumpPath(drive, stack.Name) if files, err := ListDumpFiles(dumpDir); err == nil { allFiles = append(allFiles, files...) } } } return allFiles } func shouldPrune(schedule string) bool { loc, err := time.LoadLocation("Europe/Budapest") if err != nil { loc = time.UTC } now := time.Now().In(loc) switch strings.ToLower(schedule) { case "weekly": return now.Weekday() == time.Sunday case "daily": return true default: return now.Weekday() == time.Sunday } } // appendSnapshotRecord adds a record to the ring buffer (max 20). Caller must hold m.mu. func (m *Manager) appendSnapshotRecord(rec SnapshotRecord) { m.snapshotHistory = append(m.snapshotHistory, rec) if len(m.snapshotHistory) > 20 { m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:] } m.saveSnapshotHistory() } // saveSnapshotHistory persists the current snapshotHistory to disk as JSON. // Caller must hold m.mu. Writes atomically (tmp file + rename). func (m *Manager) saveSnapshotHistory() { if m.snapshotHistoryFile == "" { return } data, err := json.Marshal(m.snapshotHistory) if err != nil { m.logger.Printf("[WARN] Could not marshal snapshot history: %v", err) return } tmp := m.snapshotHistoryFile + ".tmp" if err := os.WriteFile(tmp, data, 0644); err != nil { m.logger.Printf("[WARN] Could not write snapshot history tmp file: %v", err) return } if err := os.Rename(tmp, m.snapshotHistoryFile); err != nil { m.logger.Printf("[WARN] Could not rename snapshot history file: %v", err) } } // loadSnapshotHistoryFromFile reads the persisted snapshot history from disk. // Returns nil if the file does not exist or cannot be read. func (m *Manager) loadSnapshotHistoryFromFile() []SnapshotRecord { if m.snapshotHistoryFile == "" { return nil } data, err := os.ReadFile(m.snapshotHistoryFile) if err != nil { if !os.IsNotExist(err) { m.logger.Printf("[WARN] Could not read snapshot history file: %v", err) } return nil } var records []SnapshotRecord if err := json.Unmarshal(data, &records); err != nil { m.logger.Printf("[WARN] Could not parse snapshot history file: %v", err) return nil } return records } // LoadSnapshotHistory populates the snapshot history on startup. // First tries to load persisted history (with delta stats) from disk. // Merges with restic repo snapshots to pick up any entries not in the persisted file. func (m *Manager) LoadSnapshotHistory() { // Try loading persisted records (contains delta stats from actual backup runs) persisted := m.loadSnapshotHistoryFromFile() // Build a lookup map of persisted records by SnapshotID persistedByID := make(map[string]SnapshotRecord, len(persisted)) for _, r := range persisted { persistedByID[r.SnapshotID] = r } // Query restic repos for any snapshots not in the persisted file drives := m.activeDrives() var allSnapshots []SnapshotInfo for _, drive := range drives { repoPath := PrimaryResticRepoPath(drive) if !m.restic.RepoExists(repoPath) { continue } snapshots, err := m.restic.ListSnapshots(repoPath, 20) if err != nil { m.logger.Printf("[WARN] Could not load snapshot history from %s: %v", repoPath, err) continue } allSnapshots = append(allSnapshots, snapshots...) } // Sort by time (oldest first for ring buffer) sort.Slice(allSnapshots, func(i, j int) bool { return allSnapshots[i].Time.Before(allSnapshots[j].Time) }) m.mu.Lock() defer m.mu.Unlock() if len(persisted) > 0 { // Start from persisted records, add any restic snapshots not already there m.snapshotHistory = persisted for _, s := range allSnapshots { if _, found := persistedByID[s.ID]; !found { m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{ SnapshotID: s.ID, Time: s.Time, HasStats: false, Success: true, }) } } // Re-sort by time after merge (oldest first for ring buffer) sort.Slice(m.snapshotHistory, func(i, j int) bool { return m.snapshotHistory[i].Time.Before(m.snapshotHistory[j].Time) }) m.logger.Printf("[INFO] Loaded %d snapshots from persisted history (merged with %d restic entries)", len(persisted), len(allSnapshots)) } else { // No persisted file — fall back to restic-only loading (first run) for _, s := range allSnapshots { m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{ SnapshotID: s.ID, Time: s.Time, HasStats: false, Success: true, }) } m.logger.Printf("[INFO] Loaded %d historical snapshots from %d restic repos (no persisted history)", len(m.snapshotHistory), len(drives)) } if len(m.snapshotHistory) > 20 { m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:] } } // RefreshCache updates the cached full status. Called by scheduler every 5 minutes // and after each backup run. func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) { status := &FullBackupStatus{ Enabled: m.cfg.Backup.Enabled, DBDumpSchedule: m.cfg.Backup.DBDumpSchedule, ResticSchedule: m.cfg.Backup.ResticSchedule, PruneSchedule: m.cfg.Backup.PruneSchedule, NextDBDump: nextDBDump, NextBackup: nextBackup, Retention: m.cfg.Backup.Retention, } // Expensive calls (outside lock) status.RepoStats = m.aggregateRepoStats() status.PerDriveRepoStats = m.perDriveRepoStats() // Scan dump files from per-drive per-stack paths files := m.listAllDumpFiles() status.DumpFiles = files ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()); err == nil { status.DiscoveredDBs = dbs } // Discover app data — all deployed stacks, backup is mandatory if m.stackProvider != nil { status.AppDataInfo = DiscoverAppData(m.stackProvider, status.DiscoveredDBs) } // Fill in dynamic fields under lock. // C1: lastDBDump mutation also happens here to prevent data races with GetFullStatus. // C2: snapshot history reversal happens before cachedStatus assignment (inside lock). m.mu.Lock() status.Running = m.running status.LastDBDump = m.lastDBDump status.LastBackup = m.lastBackup status.LastCheckTime = m.lastCheckTime status.LastCheckOK = m.lastCheckOK status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory)) copy(status.SnapshotHistory, m.snapshotHistory) // C1: Cross-check lastDBDump results inside lock to prevent torn writes. if m.lastDBDump != nil && len(files) > 0 { fileValidation := make(map[string]DumpValidation) // keyed by filename for _, f := range files { fileValidation[f.FileName] = f.Validation } for i, r := range m.lastDBDump.Results { if !r.Validation.Valid && r.Validation.Error == "" && r.FilePath != "" { filename := filepath.Base(r.FilePath) if fv, ok := fileValidation[filename]; ok { m.lastDBDump.Results[i].Validation = fv m.logger.Printf("[INFO] Re-validated %s from disk: valid=%v tables=%d", filename, fv.Valid, fv.TableCount) } } } } // C2: Reverse snapshot history before assigning to cachedStatus (inside lock). for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 { status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i] } m.cachedStatus = status m.cacheTime = time.Now() m.mu.Unlock() m.logger.Printf("[INFO] Backup status cache refreshed") } // GetFullStatus returns the cached backup status for page rendering. // Returns instantly — no subprocess calls. // Returns a deep copy so callers can safely append to slice fields without // polluting the cache (which would cause duplicate entries on repeated calls). func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupStatus { m.mu.Lock() defer m.mu.Unlock() if m.cachedStatus != nil { // Deep copy — callers (backupsHandler) append to CrossDriveSummary, // UnconfiguredApps, and CrossDriveWarnings. If we returned the cache // pointer directly, every page load would accumulate more entries. status := *m.cachedStatus status.AppDataInfo = make([]AppBackupInfo, len(m.cachedStatus.AppDataInfo)) copy(status.AppDataInfo, m.cachedStatus.AppDataInfo) status.PerDriveRepoStats = make([]DriveRepoInfo, len(m.cachedStatus.PerDriveRepoStats)) copy(status.PerDriveRepoStats, m.cachedStatus.PerDriveRepoStats) // These three slices are assembled by the handler from AppDataInfo + settings; // they must always start empty so the handler builds them fresh. status.CrossDriveSummary = nil status.UnconfiguredApps = nil status.CrossDriveWarnings = nil // Update dynamic fields that don't need subprocess calls status.Running = m.running status.NextDBDump = nextDBDump status.NextBackup = nextBackup // C4: Deep-copy lastDBDump and lastBackup so callers cannot mutate shared state. if m.lastDBDump != nil { copyDump := *m.lastDBDump if len(m.lastDBDump.Results) > 0 { copyDump.Results = make([]DumpResult, len(m.lastDBDump.Results)) copy(copyDump.Results, m.lastDBDump.Results) } status.LastDBDump = ©Dump } if m.lastBackup != nil { copyBackup := *m.lastBackup status.LastBackup = ©Backup } // Update snapshot history status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory)) copy(status.SnapshotHistory, m.snapshotHistory) // Reverse so newest first for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 { status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i] } // Synthesize LastBackup from snapshot history if not in memory (e.g., after restart) if status.LastBackup == nil && len(status.SnapshotHistory) > 0 { latest := status.SnapshotHistory[0] // already reversed, newest first status.LastBackup = &BackupStatus{ LastRun: latest.Time, Success: latest.Success, Snapshot: &SnapshotResult{ SnapshotID: latest.SnapshotID, }, } } // Synthesize LastDBDump from DumpFiles on disk if not in memory if status.LastDBDump == nil && len(status.DumpFiles) > 0 { var results []DumpResult var latestTime time.Time for _, f := range status.DumpFiles { results = append(results, DumpResult{ DB: DiscoveredDB{StackName: f.StackName, DBType: f.DBType, ContainerName: f.StackName}, FilePath: f.FileName, Size: f.Size, Validation: f.Validation, }) if f.ModTime.After(latestTime) { latestTime = f.ModTime } } status.LastDBDump = &DBDumpStatus{ LastRun: latestTime, Results: results, Success: true, } } return &status } // No cache yet — return a minimal status (first page load before cache is populated) return &FullBackupStatus{ Enabled: m.cfg.Backup.Enabled, Running: m.running, LastDBDump: m.lastDBDump, LastBackup: m.lastBackup, DBDumpSchedule: m.cfg.Backup.DBDumpSchedule, ResticSchedule: m.cfg.Backup.ResticSchedule, PruneSchedule: m.cfg.Backup.PruneSchedule, NextDBDump: nextDBDump, NextBackup: nextBackup, Retention: m.cfg.Backup.Retention, LastCheckTime: m.lastCheckTime, LastCheckOK: m.lastCheckOK, } } // isDebug returns true if logging level is "debug". func (m *Manager) isDebug() bool { return m.cfg.Logging.Level == "debug" } func dbNames(dbs []DiscoveredDB) string { var names []string for _, db := range dbs { names = append(names, fmt.Sprintf("%s(%s)", db.ContainerName, db.DBType)) } return strings.Join(names, ", ") } // dedup removes duplicate strings from a slice, preserving order. func dedup(items []string) []string { seen := make(map[string]bool) var result []string for _, item := range items { if !seen[item] { seen[item] = true result = append(result, item) } } return result }