package backup import ( "context" "fmt" "log" "strings" "sync" "time" "gitea.dooplex.hu/admin/felhom-controller/internal/config" "gitea.dooplex.hu/admin/felhom-controller/internal/monitor" ) // Manager orchestrates database dumps and restic backups. type Manager struct { cfg *config.Config restic *ResticManager logger *log.Logger pinger *monitor.Pinger mu sync.Mutex lastDBDump *DBDumpStatus lastBackup *BackupStatus running bool snapshotHistory []SnapshotRecord // ring buffer, last 20 entries lastCheckTime time.Time lastCheckOK bool // Cached status for page rendering (refreshed periodically) cachedStatus *FullBackupStatus cacheTime time.Time // AfterBackup is called after a backup completes to refresh the cache. // Set by main.go to avoid circular import with scheduler. AfterBackup func() } // SnapshotRecord combines restic snapshot metadata with our run stats. type SnapshotRecord struct { SnapshotID string `json:"snapshot_id"` Time time.Time `json:"time"` FilesNew int `json:"files_new"` FilesChanged int `json:"files_changed"` DataAdded string `json:"data_added"` Duration time.Duration `json:"duration"` Success bool `json:"success"` HasStats bool `json:"has_stats"` // false for historical entries loaded from restic } // FullBackupStatus contains everything the backup page needs. type FullBackupStatus struct { Enabled bool Running bool // DB Dumps LastDBDump *DBDumpStatus DumpFiles []DumpFileInfo DiscoveredDBs []DiscoveredDB // Restic LastBackup *BackupStatus SnapshotHistory []SnapshotRecord RepoStats *RepoStats // Schedule DBDumpSchedule string ResticSchedule string PruneSchedule string NextDBDump time.Time NextBackup time.Time Retention config.RetentionConfig // Repository health RepoPath string BackupPaths []string LastCheckTime time.Time LastCheckOK bool // Remote (placeholder) RemoteEnabled bool } // DBDumpStatus holds the last DB dump result. type DBDumpStatus struct { LastRun time.Time Results []DumpResult Success bool Duration time.Duration } // BackupStatus holds the last backup result. type BackupStatus struct { LastRun time.Time Snapshot *SnapshotResult Success bool Duration time.Duration RepoStats *RepoStats } // NewManager creates a new backup manager. func NewManager(cfg *config.Config, pinger *monitor.Pinger, logger *log.Logger) *Manager { return &Manager{ cfg: cfg, restic: NewResticManager(cfg, logger), logger: logger, pinger: pinger, } } // RunDBDumps discovers and dumps all databases. func (m *Manager) RunDBDumps(ctx context.Context) error { start := time.Now() m.logger.Printf("[INFO] Starting database dump run") dbs, err := DiscoverDatabases(ctx, m.logger) if err != nil { m.logger.Printf("[ERROR] Database discovery failed: %v", err) return err } if len(dbs) == 0 { m.logger.Printf("[INFO] No database containers found") m.mu.Lock() m.lastDBDump = &DBDumpStatus{ LastRun: time.Now(), Success: true, Duration: time.Since(start), } m.mu.Unlock() return nil } m.logger.Printf("[INFO] Discovered %d database(s): %s", len(dbs), dbNames(dbs)) results := DumpAll(ctx, dbs, m.cfg.Paths.DBDumpDir, m.logger) // Check results allOK := true var summary []string var totalSize int64 for _, r := range results { if r.Error != nil { allOK = false summary = append(summary, fmt.Sprintf("FAIL %s: %v", r.DB.ContainerName, r.Error)) m.logger.Printf("[ERROR] DB dump failed for %s: %v", r.DB.ContainerName, r.Error) } else { totalSize += r.Size summary = append(summary, fmt.Sprintf("OK %s (%s)", r.DB.ContainerName, formatBytes(r.Size))) } } duration := time.Since(start) m.mu.Lock() m.lastDBDump = &DBDumpStatus{ LastRun: time.Now(), Results: results, Success: allOK, Duration: duration, } m.mu.Unlock() // Ping healthcheck uuid := m.cfg.Monitoring.PingUUIDs.DBDump body := fmt.Sprintf("DB dump: %d databases, %s total\n%s", len(results), formatBytes(totalSize), strings.Join(summary, "\n")) if allOK { m.pinger.Ping(uuid, body) m.logger.Printf("[INFO] DB dump completed: %d databases, %s total (%s)", len(results), formatBytes(totalSize), duration.Round(time.Millisecond)) } else { m.pinger.Fail(uuid, body) return fmt.Errorf("some database dumps failed") } return nil } // RunBackup runs a restic backup snapshot. func (m *Manager) RunBackup(ctx context.Context) error { start := time.Now() m.logger.Printf("[INFO] Starting restic backup") // Ensure repo is initialized if err := m.restic.EnsureInitialized(); err != nil { m.logger.Printf("[ERROR] Restic init failed: %v", err) m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Restic init failed: %v", err)) return err } // Backup paths paths := []string{ m.cfg.Paths.StacksDir, m.cfg.Paths.DBDumpDir, "/opt/docker/felhom-controller/controller.yaml", } tags := []string{"felhom", m.cfg.Customer.ID} result, err := m.restic.Snapshot(paths, tags) if err != nil { m.logger.Printf("[ERROR] Restic backup failed: %v", err) m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", err)) m.mu.Lock() m.lastBackup = &BackupStatus{ LastRun: time.Now(), Success: false, Duration: time.Since(start), } m.mu.Unlock() return err } // Prune check (weekly — Sunday) if shouldPrune(m.cfg.Backup.PruneSchedule) { m.logger.Printf("[INFO] Running weekly prune") if err := m.restic.Prune(m.cfg.Backup.Retention); err != nil { m.logger.Printf("[WARN] Restic prune failed: %v", err) } checkErr := m.restic.Check() if checkErr != nil { m.logger.Printf("[WARN] Restic check failed: %v", checkErr) } m.mu.Lock() m.lastCheckTime = time.Now() m.lastCheckOK = checkErr == nil m.mu.Unlock() } // Get stats stats, _ := m.restic.Stats() duration := time.Since(start) m.mu.Lock() m.lastBackup = &BackupStatus{ LastRun: time.Now(), Snapshot: result, Success: true, Duration: duration, RepoStats: stats, } // Append to snapshot history m.appendSnapshotRecord(SnapshotRecord{ SnapshotID: result.SnapshotID, Time: time.Now(), FilesNew: result.FilesNew, FilesChanged: result.FilesChanged, DataAdded: result.DataAdded, Duration: duration, Success: true, HasStats: true, }) m.mu.Unlock() body := fmt.Sprintf("Backup OK\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s", result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded, duration.Round(time.Second)) m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body) m.logger.Printf("[INFO] Restic backup completed: snapshot %s, %d new, %d changed, %s added (%s)", result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded, duration.Round(time.Millisecond)) // Refresh cache so the page shows updated data immediately if m.AfterBackup != nil { m.AfterBackup() } return nil } // RunFullBackup runs DB dumps followed by restic backup. func (m *Manager) RunFullBackup(ctx context.Context) error { m.mu.Lock() if m.running { m.mu.Unlock() return fmt.Errorf("backup already in progress") } m.running = true m.mu.Unlock() defer func() { m.mu.Lock() m.running = false m.mu.Unlock() }() // Step 1: DB dumps if err := m.RunDBDumps(ctx); err != nil { m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway") } // Step 2: Restic backup return m.RunBackup(ctx) } // GetStatus returns the current backup status. func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) { m.mu.Lock() defer m.mu.Unlock() return m.lastDBDump, m.lastBackup } // GetRepoStats returns repository statistics. func (m *Manager) GetRepoStats() (*RepoStats, error) { return m.restic.Stats() } // IsRunning returns whether a backup is currently in progress. func (m *Manager) IsRunning() bool { m.mu.Lock() defer m.mu.Unlock() return m.running } func shouldPrune(schedule string) bool { loc, err := time.LoadLocation("Europe/Budapest") if err != nil { loc = time.UTC } now := time.Now().In(loc) switch strings.ToLower(schedule) { case "weekly": return now.Weekday() == time.Sunday case "daily": return true default: return now.Weekday() == time.Sunday } } // appendSnapshotRecord adds a record to the ring buffer (max 20). Caller must hold m.mu. func (m *Manager) appendSnapshotRecord(rec SnapshotRecord) { m.snapshotHistory = append(m.snapshotHistory, rec) if len(m.snapshotHistory) > 20 { m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:] } } // LoadSnapshotHistory populates the snapshot history from restic on startup. func (m *Manager) LoadSnapshotHistory() { snapshots, err := m.restic.ListSnapshots(20) if err != nil { m.logger.Printf("[WARN] Could not load snapshot history: %v", err) return } m.mu.Lock() defer m.mu.Unlock() for _, s := range snapshots { m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{ SnapshotID: s.ID, Time: s.Time, HasStats: false, // historical — no delta stats available Success: true, }) } if len(m.snapshotHistory) > 20 { m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:] } m.logger.Printf("[INFO] Loaded %d historical snapshots", len(m.snapshotHistory)) } // RefreshCache updates the cached full status. Called by scheduler every 5 minutes // and after each backup run. func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) { status := &FullBackupStatus{ Enabled: m.cfg.Backup.Enabled, DBDumpSchedule: m.cfg.Backup.DBDumpSchedule, ResticSchedule: m.cfg.Backup.ResticSchedule, PruneSchedule: m.cfg.Backup.PruneSchedule, NextDBDump: nextDBDump, NextBackup: nextBackup, Retention: m.cfg.Backup.Retention, RepoPath: m.cfg.Backup.ResticRepo, BackupPaths: []string{ m.cfg.Paths.StacksDir, m.cfg.Paths.DBDumpDir, "/opt/docker/felhom-controller/controller.yaml", }, } // Expensive calls (outside lock) if stats, err := m.restic.Stats(); err == nil { status.RepoStats = stats } if files, err := ListDumpFiles(m.cfg.Paths.DBDumpDir); err == nil { status.DumpFiles = files } ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil { status.DiscoveredDBs = dbs } // Fill in dynamic fields under lock m.mu.Lock() status.Running = m.running status.LastDBDump = m.lastDBDump status.LastBackup = m.lastBackup status.LastCheckTime = m.lastCheckTime status.LastCheckOK = m.lastCheckOK status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory)) copy(status.SnapshotHistory, m.snapshotHistory) m.cachedStatus = status m.cacheTime = time.Now() m.mu.Unlock() // Reverse so newest first for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 { status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i] } m.logger.Printf("[INFO] Backup status cache refreshed") } // GetFullStatus returns the cached backup status for page rendering. // Returns instantly — no subprocess calls. func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupStatus { m.mu.Lock() defer m.mu.Unlock() if m.cachedStatus != nil { // Update dynamic fields that don't need subprocess calls m.cachedStatus.Running = m.running m.cachedStatus.NextDBDump = nextDBDump m.cachedStatus.NextBackup = nextBackup m.cachedStatus.LastDBDump = m.lastDBDump m.cachedStatus.LastBackup = m.lastBackup // Update snapshot history m.cachedStatus.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory)) copy(m.cachedStatus.SnapshotHistory, m.snapshotHistory) // Reverse so newest first for i, j := 0, len(m.cachedStatus.SnapshotHistory)-1; i < j; i, j = i+1, j-1 { m.cachedStatus.SnapshotHistory[i], m.cachedStatus.SnapshotHistory[j] = m.cachedStatus.SnapshotHistory[j], m.cachedStatus.SnapshotHistory[i] } // Synthesize LastBackup from snapshot history if not in memory (e.g., after restart) if m.cachedStatus.LastBackup == nil && len(m.cachedStatus.SnapshotHistory) > 0 { latest := m.cachedStatus.SnapshotHistory[0] // already reversed, newest first m.cachedStatus.LastBackup = &BackupStatus{ LastRun: latest.Time, Success: latest.Success, Snapshot: &SnapshotResult{ SnapshotID: latest.SnapshotID, }, } } // Synthesize LastDBDump from DumpFiles on disk if not in memory if m.cachedStatus.LastDBDump == nil && len(m.cachedStatus.DumpFiles) > 0 { var results []DumpResult var latestTime time.Time for _, f := range m.cachedStatus.DumpFiles { results = append(results, DumpResult{ DB: DiscoveredDB{StackName: f.StackName, DBType: f.DBType, ContainerName: f.StackName}, FilePath: f.FileName, Size: f.Size, }) if f.ModTime.After(latestTime) { latestTime = f.ModTime } } m.cachedStatus.LastDBDump = &DBDumpStatus{ LastRun: latestTime, Results: results, Success: true, } } return m.cachedStatus } // No cache yet — return a minimal status (first page load before cache is populated) return &FullBackupStatus{ Enabled: m.cfg.Backup.Enabled, Running: m.running, LastDBDump: m.lastDBDump, LastBackup: m.lastBackup, DBDumpSchedule: m.cfg.Backup.DBDumpSchedule, ResticSchedule: m.cfg.Backup.ResticSchedule, PruneSchedule: m.cfg.Backup.PruneSchedule, NextDBDump: nextDBDump, NextBackup: nextBackup, Retention: m.cfg.Backup.Retention, RepoPath: m.cfg.Backup.ResticRepo, LastCheckTime: m.lastCheckTime, LastCheckOK: m.lastCheckOK, BackupPaths: []string{ m.cfg.Paths.StacksDir, m.cfg.Paths.DBDumpDir, "/opt/docker/felhom-controller/controller.yaml", }, } } func dbNames(dbs []DiscoveredDB) string { var names []string for _, db := range dbs { names = append(names, fmt.Sprintf("%s(%s)", db.ContainerName, db.DBType)) } return strings.Join(names, ", ") }