Files
deploy-felhom-compose/controller/internal/backup/backup.go
T

1119 lines
34 KiB
Go

package backup
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
)
// Manager orchestrates database dumps and restic backups.
type Manager struct {
cfg *config.Config
restic *ResticManager
logger *log.Logger
pinger *monitor.Pinger
settings *settings.Settings
stackProvider StackDataProvider
systemDataPath string // fallback drive for SSD-only apps
mu sync.Mutex
lastDBDump *DBDumpStatus
lastBackup *BackupStatus
running bool
snapshotHistory []SnapshotRecord // ring buffer, last 20 entries
snapshotHistoryFile string // path to persist snapshot history JSON
lastCheckTime time.Time
lastCheckOK bool
// Cached status for page rendering (refreshed periodically)
cachedStatus *FullBackupStatus
cacheTime time.Time
// AfterBackup is called after a backup completes to refresh the cache.
// Set by main.go to avoid circular import with scheduler.
AfterBackup func()
}
// SnapshotRecord combines restic snapshot metadata with our run stats.
type SnapshotRecord struct {
SnapshotID string `json:"snapshot_id"`
Time time.Time `json:"time"`
FilesNew int `json:"files_new"`
FilesChanged int `json:"files_changed"`
DataAdded string `json:"data_added"`
Duration time.Duration `json:"duration"`
Success bool `json:"success"`
HasStats bool `json:"has_stats"` // false for historical entries loaded from restic
}
// DriveRepoInfo holds per-drive restic repository statistics for the Részletek section.
type DriveRepoInfo struct {
DrivePath string
DriveLabel string // filled by handler from settings
TotalSize string
TotalSizeBytes int64
SnapshotCount int
}
// CrossDriveSummaryItem holds display data for one app's cross-drive backup.
type CrossDriveSummaryItem struct {
StackName string
DisplayName string
Method string // "rsync" or "restic"
MethodLabel string // "Egyszerű másolat" or "Restic"
DestPath string
DestLabel string // storage path label
Schedule string
ScheduleLabel string // "Naponta" or "Hetente" or "Kézi"
LastStatus string // "ok", "error", "running", ""
LastRunShort string // formatted short time e.g. "03:15"
SizeHuman string
}
// FullBackupStatus contains everything the backup page needs.
type FullBackupStatus struct {
Enabled bool
Running bool
// DB Dumps
LastDBDump *DBDumpStatus
DumpFiles []DumpFileInfo
DiscoveredDBs []DiscoveredDB
// Restic
LastBackup *BackupStatus
SnapshotHistory []SnapshotRecord
RepoStats *RepoStats
PerDriveRepoStats []DriveRepoInfo // per-drive Tier 1 restic stats
// Schedule
DBDumpSchedule string
ResticSchedule string
PruneSchedule string
NextDBDump time.Time
NextBackup time.Time
Retention config.RetentionConfig
// Repository health
LastCheckTime time.Time
LastCheckOK bool
// Remote (placeholder)
RemoteEnabled bool
// App data backup
AppDataInfo []AppBackupInfo
// Cross-drive backup summary
CrossDriveSummary []CrossDriveSummaryItem
UnconfiguredApps []CrossDriveSummaryItem // apps with HDD data but no cross-drive config
CrossDriveWarnings []string // destination health warnings
// Flash messages (set by handlers, passed through redirect)
FlashSuccess string
FlashError string
}
// DBDumpStatus holds the last DB dump result.
type DBDumpStatus struct {
LastRun time.Time
Results []DumpResult
Success bool
Duration time.Duration
}
// BackupStatus holds the last backup result.
type BackupStatus struct {
LastRun time.Time
Snapshot *SnapshotResult
Success bool
Duration time.Duration
RepoStats *RepoStats
}
// NewManager creates a new backup manager.
func NewManager(cfg *config.Config, pinger *monitor.Pinger, sett *settings.Settings, logger *log.Logger) *Manager {
if cfg.Paths.SystemDataPath == "" {
logger.Printf("[WARN] SystemDataPath is empty in config — SSD-only apps will not have correct backup paths")
}
dataDir := cfg.Paths.DataDir
if dataDir == "" {
dataDir = "/opt/docker/felhom-controller/data"
}
return &Manager{
cfg: cfg,
restic: NewResticManager(cfg, logger),
logger: logger,
pinger: pinger,
settings: sett,
systemDataPath: cfg.Paths.SystemDataPath,
snapshotHistoryFile: filepath.Join(dataDir, "snapshot-history.json"),
}
}
// GetAppDrivePath returns the drive path for an app.
// Uses HDD_PATH from app.yaml if set, otherwise falls back to system data path.
func (m *Manager) GetAppDrivePath(stackName string) string {
if m.stackProvider != nil {
if hddPath := m.stackProvider.GetStackHDDPath(stackName); hddPath != "" {
return hddPath
}
}
if m.systemDataPath == "" {
m.logger.Printf("[ERROR] systemDataPath is empty — cannot determine drive for %s", stackName)
}
return m.systemDataPath
}
// groupStacksByDrive groups deployed stacks by their home drive path.
func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
if m.stackProvider == nil {
return nil
}
result := make(map[string][]StackSummary)
for _, stack := range m.stackProvider.ListDeployedStacks() {
drive := m.GetAppDrivePath(stack.Name)
result[drive] = append(result[drive], stack)
}
return result
}
// activeDrives returns sorted list of drives that have deployed apps.
func (m *Manager) activeDrives() []string {
groups := m.groupStacksByDrive()
var drives []string
for d := range groups {
drives = append(drives, d)
}
sort.Strings(drives)
return drives
}
// RunDBDumps discovers and dumps all databases to per-drive, per-app paths.
func (m *Manager) RunDBDumps(ctx context.Context) error {
if err := m.acquireRunning(); err != nil {
return err
}
defer m.releaseRunning()
return m.runDBDumpsInternal(ctx)
}
// runDBDumpsInternal is the implementation of RunDBDumps. Caller must hold the running flag.
func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
start := time.Now()
m.logger.Printf("[INFO] Starting database dump run")
dbs, err := DiscoverDatabases(ctx, m.logger)
if err != nil {
m.logger.Printf("[ERROR] Database discovery failed: %v", err)
return err
}
if len(dbs) == 0 {
m.logger.Printf("[INFO] No database containers found")
m.mu.Lock()
m.lastDBDump = &DBDumpStatus{
LastRun: time.Now(),
Success: true,
Duration: time.Since(start),
}
m.mu.Unlock()
return nil
}
m.logger.Printf("[INFO] Discovered %d database(s): %s", len(dbs), dbNames(dbs))
// Dump each DB to its app's drive path
var results []DumpResult
allOK := true
var summary []string
var totalSize int64
for _, db := range dbs {
drivePath := m.GetAppDrivePath(db.StackName)
dumpDir := AppDBDumpPath(drivePath, db.StackName)
result := DumpOne(ctx, db, dumpDir, m.logger)
results = append(results, result)
if result.Error != nil {
allOK = false
summary = append(summary, fmt.Sprintf("FAIL %s: %v", result.DB.ContainerName, result.Error))
m.logger.Printf("[ERROR] DB dump failed for %s: %v", result.DB.ContainerName, result.Error)
} else {
totalSize += result.Size
summary = append(summary, fmt.Sprintf("OK %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size)))
// Persist validation result to settings.json
if m.settings != nil && result.FilePath != "" {
filename := filepath.Base(result.FilePath)
cache := settings.DBValidationCache{
ValidatedAt: time.Now().Format(time.RFC3339),
TableCount: result.Validation.TableCount,
HasHeader: result.Validation.Valid,
}
if !result.Validation.Valid {
cache.Error = result.Validation.Error
}
if err := m.settings.SetDBValidation(filename, cache); err != nil {
m.logger.Printf("[WARN] Failed to cache validation for %s: %v", filename, err)
}
}
}
}
duration := time.Since(start)
m.mu.Lock()
m.lastDBDump = &DBDumpStatus{
LastRun: time.Now(),
Results: results,
Success: allOK,
Duration: duration,
}
m.mu.Unlock()
// Ping healthcheck
uuid := m.cfg.Monitoring.PingUUIDs.DBDump
body := fmt.Sprintf("DB dump: %d databases, %s total\n%s",
len(results), humanizeBytes(totalSize), strings.Join(summary, "\n"))
if allOK {
m.pinger.Ping(uuid, body)
m.logger.Printf("[INFO] DB dump completed: %d databases, %s total (%s)",
len(results), humanizeBytes(totalSize), duration.Round(time.Millisecond))
} else {
m.pinger.Fail(uuid, body)
return fmt.Errorf("some database dumps failed")
}
return nil
}
// RunBackup runs per-drive restic backup snapshots.
func (m *Manager) RunBackup(ctx context.Context) error {
if err := m.acquireRunning(); err != nil {
return err
}
defer m.releaseRunning()
return m.runBackupInternal(ctx)
}
// runBackupInternal is the implementation of RunBackup. Caller must hold the running flag.
func (m *Manager) runBackupInternal(ctx context.Context) error {
start := time.Now()
m.logger.Printf("[INFO] Starting restic backup (per-drive)")
driveStacks := m.groupStacksByDrive()
if len(driveStacks) == 0 {
m.logger.Printf("[INFO] No deployed stacks — skipping backup")
return nil
}
// Infrastructure paths included in every drive's primary repo
infraPaths := []string{
m.cfg.Paths.StacksDir,
"/opt/docker/felhom-controller/controller.yaml",
}
var lastResult *SnapshotResult
var anyErr error
driveCount := 0
for drivePath, stacks := range driveStacks {
repoPath := PrimaryResticRepoPath(drivePath)
// Ensure repo is initialized
if err := m.restic.EnsureInitialized(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
anyErr = err
continue
}
// Build paths for this drive
var paths []string
paths = append(paths, infraPaths...)
for _, stack := range stacks {
// App data (appdata/<stack>/)
appData := AppDataDir(drivePath, stack.Name)
if _, err := os.Stat(appData); err == nil {
paths = append(paths, appData)
}
// HDD mounts (for apps with custom mount points)
if m.stackProvider != nil {
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
if _, err := os.Stat(mount); err == nil {
paths = append(paths, mount)
}
}
}
// DB dumps for this stack
dumpDir := AppDBDumpPath(drivePath, stack.Name)
if _, err := os.Stat(dumpDir); err == nil {
paths = append(paths, dumpDir)
}
}
// Deduplicate paths
paths = dedup(paths)
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
result, err := m.restic.Snapshot(repoPath, paths, tags)
if err != nil {
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
anyErr = err
continue
}
lastResult = result
driveCount++
// Prune check (weekly — Sunday)
if shouldPrune(m.cfg.Backup.PruneSchedule) {
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
}
}
}
duration := time.Since(start)
if anyErr != nil && driveCount == 0 {
// All drives failed
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", anyErr))
m.mu.Lock()
m.lastBackup = &BackupStatus{
LastRun: time.Now(),
Success: false,
Duration: duration,
}
m.mu.Unlock()
return anyErr
}
// Get aggregated stats
stats := m.aggregateRepoStats()
m.mu.Lock()
m.lastBackup = &BackupStatus{
LastRun: time.Now(),
Snapshot: lastResult,
Success: anyErr == nil,
Duration: duration,
RepoStats: stats,
}
if lastResult != nil {
m.appendSnapshotRecord(SnapshotRecord{
SnapshotID: lastResult.SnapshotID,
Time: time.Now(),
FilesNew: lastResult.FilesNew,
FilesChanged: lastResult.FilesChanged,
DataAdded: lastResult.DataAdded,
Duration: duration,
Success: true,
HasStats: true,
})
}
m.mu.Unlock()
if lastResult != nil {
body := fmt.Sprintf("Backup OK (%d drives)\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s",
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
duration.Round(time.Second))
m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body)
m.logger.Printf("[INFO] Restic backup completed: %d drives, snapshot %s, %d new, %d changed, %s added (%s)",
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
duration.Round(time.Millisecond))
}
// Refresh cache so the page shows updated data immediately
if m.AfterBackup != nil {
m.AfterBackup()
}
return anyErr
}
// RunIntegrityCheck runs restic check on all primary repos and pings healthchecks.
func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
m.logger.Printf("[INFO] Starting restic integrity check")
start := time.Now()
drives := m.activeDrives()
if len(drives) == 0 {
m.logger.Printf("[INFO] No active drives — skipping integrity check")
return nil
}
var checkErr error
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
if err := m.restic.Check(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err)
checkErr = err
}
}
duration := time.Since(start)
uuid := m.cfg.Monitoring.PingUUIDs.BackupIntegrity
m.mu.Lock()
m.lastCheckTime = time.Now()
m.lastCheckOK = checkErr == nil
m.mu.Unlock()
if checkErr != nil {
m.logger.Printf("[ERROR] Restic integrity check failed (%s): %v", duration.Round(time.Second), checkErr)
m.pinger.Fail(uuid, fmt.Sprintf("restic check failed: %v", checkErr))
return checkErr
}
m.logger.Printf("[INFO] Restic integrity check passed (%d repos, %s)", len(drives), duration.Round(time.Second))
m.pinger.Ping(uuid, fmt.Sprintf("restic check passed (%d repos, %s)", len(drives), duration.Round(time.Second)))
return nil
}
// RunFullBackup runs DB dumps followed by restic backup.
func (m *Manager) RunFullBackup(ctx context.Context) error {
if err := m.acquireRunning(); err != nil {
return err
}
defer m.releaseRunning()
// Step 1: DB dumps
if err := m.runDBDumpsInternal(ctx); err != nil {
m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway")
}
// Step 2: Restic backup
return m.runBackupInternal(ctx)
}
// GetStatus returns the current backup status.
func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) {
m.mu.Lock()
defer m.mu.Unlock()
return m.lastDBDump, m.lastBackup
}
// GetRepoStats returns aggregated repository statistics across all primary repos.
func (m *Manager) GetRepoStats() (*RepoStats, error) {
stats := m.aggregateRepoStats()
if stats.SnapshotCount == 0 && stats.TotalSize == "" {
return stats, fmt.Errorf("no repos available")
}
return stats, nil
}
// IsRunning returns whether a backup or restore is currently in progress.
func (m *Manager) IsRunning() bool {
m.mu.Lock()
defer m.mu.Unlock()
return m.running
}
// acquireRunning atomically sets the running flag. Returns error if already running.
func (m *Manager) acquireRunning() error {
m.mu.Lock()
defer m.mu.Unlock()
if m.running {
return fmt.Errorf("backup already in progress")
}
m.running = true
return nil
}
// releaseRunning clears the running flag.
func (m *Manager) releaseRunning() {
m.mu.Lock()
m.running = false
m.mu.Unlock()
}
// GetResticPassword returns the restic repository encryption password.
func (m *Manager) GetResticPassword() (string, error) {
return m.restic.GetPassword()
}
// ListSnapshots returns snapshots from all primary restic repositories, merged and sorted.
func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) {
drives := m.activeDrives()
var allSnapshots []SnapshotInfo
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
if err != nil {
m.logger.Printf("[WARN] Could not list snapshots from %s: %v", repoPath, err)
continue
}
for i := range snapshots {
snapshots[i].RepoPath = repoPath
}
allSnapshots = append(allSnapshots, snapshots...)
}
// Sort newest first
sort.Slice(allSnapshots, func(i, j int) bool {
return allSnapshots[i].Time.After(allSnapshots[j].Time)
})
if limit > 0 && len(allSnapshots) > limit {
allSnapshots = allSnapshots[:limit]
}
return allSnapshots, nil
}
// ListAllSnapshots returns snapshots from both primary and secondary restic repos.
// Primary snapshots get Tier=1, secondary snapshots get Tier=2.
func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
drives := m.activeDrives()
var allSnapshots []SnapshotInfo
// Tier 1: primary repos (same as ListSnapshots)
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
if err != nil {
m.logger.Printf("[WARN] Could not list snapshots from %s: %v", repoPath, err)
continue
}
for i := range snapshots {
snapshots[i].RepoPath = repoPath
snapshots[i].Tier = 1
}
allSnapshots = append(allSnapshots, snapshots...)
}
// Tier 2: secondary restic repos on cross-drive destinations
if m.settings != nil {
destPaths := make(map[string]bool)
for _, cfg := range m.settings.GetAllCrossDriveConfigs() {
if cfg != nil && cfg.Method == "restic" && cfg.DestinationPath != "" {
destPaths[cfg.DestinationPath] = true
}
}
for destPath := range destPaths {
repoPath := SecondaryResticRepoPath(destPath)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
if err != nil {
m.logger.Printf("[WARN] Could not list secondary snapshots from %s: %v", repoPath, err)
continue
}
for i := range snapshots {
snapshots[i].RepoPath = repoPath
snapshots[i].Tier = 2
}
allSnapshots = append(allSnapshots, snapshots...)
}
}
// Sort newest first
sort.Slice(allSnapshots, func(i, j int) bool {
return allSnapshots[i].Time.After(allSnapshots[j].Time)
})
if limit > 0 && len(allSnapshots) > limit {
allSnapshots = allSnapshots[:limit]
}
return allSnapshots, nil
}
// SetStackProvider sets the stack data provider for app data discovery.
// C3: Write is protected by mutex since stackProvider is read by concurrent goroutines.
func (m *Manager) SetStackProvider(provider StackDataProvider) {
m.mu.Lock()
m.stackProvider = provider
m.mu.Unlock()
}
// GetStackHDDMounts returns HDD mount paths for the named stack via the stack provider.
func (m *Manager) GetStackHDDMounts(name string) []string {
if m.stackProvider == nil {
return nil
}
return m.stackProvider.GetStackHDDMounts(name)
}
// DumpStackDB runs a database dump for containers belonging to a specific stack.
// Dumps to the stack's home drive: <drive>/backups/primary/<stack>/db-dumps/.
func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
dbs, err := DiscoverDatabases(ctx, m.logger)
if err != nil {
return fmt.Errorf("database discovery failed: %w", err)
}
var stackDBs []DiscoveredDB
for _, db := range dbs {
if db.StackName == stackName {
stackDBs = append(stackDBs, db)
}
}
if len(stackDBs) == 0 {
m.logger.Printf("[DEBUG] No databases found for stack %s — skipping pre-backup dump", stackName)
return nil
}
drivePath := m.GetAppDrivePath(stackName)
if drivePath == "" || !filepath.IsAbs(drivePath) {
return fmt.Errorf("cannot determine absolute drive path for %s (systemDataPath not configured?)", stackName)
}
dumpDir := AppDBDumpPath(drivePath, stackName)
m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir)
for _, db := range stackDBs {
result := DumpOne(ctx, db, dumpDir, m.logger)
if result.Error != nil {
return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error)
}
m.logger.Printf("[INFO] Pre-backup DB dump OK: %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size))
// Persist validation to settings
if m.settings != nil && result.FilePath != "" {
filename := filepath.Base(result.FilePath)
cache := settings.DBValidationCache{
ValidatedAt: time.Now().Format(time.RFC3339),
TableCount: result.Validation.TableCount,
HasHeader: result.Validation.Valid,
}
if !result.Validation.Valid {
cache.Error = result.Validation.Error
}
_ = m.settings.SetDBValidation(filename, cache)
}
}
return nil
}
// perDriveRepoStats returns per-drive restic repository statistics for the Részletek section.
func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
drives := m.activeDrives()
var infos []DriveRepoInfo
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
stats, err := m.restic.Stats(repoPath)
if err != nil {
continue
}
infos = append(infos, DriveRepoInfo{
DrivePath: drive,
TotalSize: stats.TotalSize,
TotalSizeBytes: stats.TotalSizeBytes,
SnapshotCount: stats.SnapshotCount,
})
}
return infos
}
// aggregateRepoStats combines stats from all primary restic repos.
func (m *Manager) aggregateRepoStats() *RepoStats {
drives := m.activeDrives()
agg := &RepoStats{}
var totalBytes int64
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
stats, err := m.restic.Stats(repoPath)
if err != nil {
continue
}
agg.SnapshotCount += stats.SnapshotCount
totalBytes += stats.TotalSizeBytes
if stats.LatestSnapshot != nil {
if agg.LatestSnapshot == nil || stats.LatestSnapshot.Time.After(agg.LatestSnapshot.Time) {
agg.LatestSnapshot = stats.LatestSnapshot
}
}
}
agg.TotalSizeBytes = totalBytes
if totalBytes > 0 {
agg.TotalSize = humanizeBytes(totalBytes)
}
return agg
}
// listAllDumpFiles scans per-drive per-stack DB dump directories.
func (m *Manager) listAllDumpFiles() []DumpFileInfo {
var allFiles []DumpFileInfo
for drive, stacks := range m.groupStacksByDrive() {
for _, stack := range stacks {
dumpDir := AppDBDumpPath(drive, stack.Name)
if files, err := ListDumpFiles(dumpDir); err == nil {
allFiles = append(allFiles, files...)
}
}
}
return allFiles
}
func shouldPrune(schedule string) bool {
loc, err := time.LoadLocation("Europe/Budapest")
if err != nil {
loc = time.UTC
}
now := time.Now().In(loc)
switch strings.ToLower(schedule) {
case "weekly":
return now.Weekday() == time.Sunday
case "daily":
return true
default:
return now.Weekday() == time.Sunday
}
}
// appendSnapshotRecord adds a record to the ring buffer (max 20). Caller must hold m.mu.
func (m *Manager) appendSnapshotRecord(rec SnapshotRecord) {
m.snapshotHistory = append(m.snapshotHistory, rec)
if len(m.snapshotHistory) > 20 {
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
}
m.saveSnapshotHistory()
}
// saveSnapshotHistory persists the current snapshotHistory to disk as JSON.
// Caller must hold m.mu. Writes atomically (tmp file + rename).
func (m *Manager) saveSnapshotHistory() {
if m.snapshotHistoryFile == "" {
return
}
data, err := json.Marshal(m.snapshotHistory)
if err != nil {
m.logger.Printf("[WARN] Could not marshal snapshot history: %v", err)
return
}
tmp := m.snapshotHistoryFile + ".tmp"
if err := os.WriteFile(tmp, data, 0644); err != nil {
m.logger.Printf("[WARN] Could not write snapshot history tmp file: %v", err)
return
}
if err := os.Rename(tmp, m.snapshotHistoryFile); err != nil {
m.logger.Printf("[WARN] Could not rename snapshot history file: %v", err)
}
}
// loadSnapshotHistoryFromFile reads the persisted snapshot history from disk.
// Returns nil if the file does not exist or cannot be read.
func (m *Manager) loadSnapshotHistoryFromFile() []SnapshotRecord {
if m.snapshotHistoryFile == "" {
return nil
}
data, err := os.ReadFile(m.snapshotHistoryFile)
if err != nil {
if !os.IsNotExist(err) {
m.logger.Printf("[WARN] Could not read snapshot history file: %v", err)
}
return nil
}
var records []SnapshotRecord
if err := json.Unmarshal(data, &records); err != nil {
m.logger.Printf("[WARN] Could not parse snapshot history file: %v", err)
return nil
}
return records
}
// LoadSnapshotHistory populates the snapshot history on startup.
// First tries to load persisted history (with delta stats) from disk.
// Merges with restic repo snapshots to pick up any entries not in the persisted file.
func (m *Manager) LoadSnapshotHistory() {
// Try loading persisted records (contains delta stats from actual backup runs)
persisted := m.loadSnapshotHistoryFromFile()
// Build a lookup map of persisted records by SnapshotID
persistedByID := make(map[string]SnapshotRecord, len(persisted))
for _, r := range persisted {
persistedByID[r.SnapshotID] = r
}
// Query restic repos for any snapshots not in the persisted file
drives := m.activeDrives()
var allSnapshots []SnapshotInfo
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 20)
if err != nil {
m.logger.Printf("[WARN] Could not load snapshot history from %s: %v", repoPath, err)
continue
}
allSnapshots = append(allSnapshots, snapshots...)
}
// Sort by time (oldest first for ring buffer)
sort.Slice(allSnapshots, func(i, j int) bool {
return allSnapshots[i].Time.Before(allSnapshots[j].Time)
})
m.mu.Lock()
defer m.mu.Unlock()
if len(persisted) > 0 {
// Start from persisted records, add any restic snapshots not already there
m.snapshotHistory = persisted
for _, s := range allSnapshots {
if _, found := persistedByID[s.ID]; !found {
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
SnapshotID: s.ID,
Time: s.Time,
HasStats: false,
Success: true,
})
}
}
// Re-sort by time after merge (oldest first for ring buffer)
sort.Slice(m.snapshotHistory, func(i, j int) bool {
return m.snapshotHistory[i].Time.Before(m.snapshotHistory[j].Time)
})
m.logger.Printf("[INFO] Loaded %d snapshots from persisted history (merged with %d restic entries)", len(persisted), len(allSnapshots))
} else {
// No persisted file — fall back to restic-only loading (first run)
for _, s := range allSnapshots {
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
SnapshotID: s.ID,
Time: s.Time,
HasStats: false,
Success: true,
})
}
m.logger.Printf("[INFO] Loaded %d historical snapshots from %d restic repos (no persisted history)", len(m.snapshotHistory), len(drives))
}
if len(m.snapshotHistory) > 20 {
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
}
}
// RefreshCache updates the cached full status. Called by scheduler every 5 minutes
// and after each backup run.
func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
status := &FullBackupStatus{
Enabled: m.cfg.Backup.Enabled,
DBDumpSchedule: m.cfg.Backup.DBDumpSchedule,
ResticSchedule: m.cfg.Backup.ResticSchedule,
PruneSchedule: m.cfg.Backup.PruneSchedule,
NextDBDump: nextDBDump,
NextBackup: nextBackup,
Retention: m.cfg.Backup.Retention,
}
// Expensive calls (outside lock)
status.RepoStats = m.aggregateRepoStats()
status.PerDriveRepoStats = m.perDriveRepoStats()
// Scan dump files from per-drive per-stack paths
files := m.listAllDumpFiles()
status.DumpFiles = files
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil {
status.DiscoveredDBs = dbs
}
// Discover app data — all deployed stacks, backup is mandatory
if m.stackProvider != nil {
status.AppDataInfo = DiscoverAppData(m.stackProvider, status.DiscoveredDBs)
}
// Fill in dynamic fields under lock.
// C1: lastDBDump mutation also happens here to prevent data races with GetFullStatus.
// C2: snapshot history reversal happens before cachedStatus assignment (inside lock).
m.mu.Lock()
status.Running = m.running
status.LastDBDump = m.lastDBDump
status.LastBackup = m.lastBackup
status.LastCheckTime = m.lastCheckTime
status.LastCheckOK = m.lastCheckOK
status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory))
copy(status.SnapshotHistory, m.snapshotHistory)
// C1: Cross-check lastDBDump results inside lock to prevent torn writes.
if m.lastDBDump != nil && len(files) > 0 {
fileValidation := make(map[string]DumpValidation) // keyed by filename
for _, f := range files {
fileValidation[f.FileName] = f.Validation
}
for i, r := range m.lastDBDump.Results {
if !r.Validation.Valid && r.Validation.Error == "" && r.FilePath != "" {
filename := filepath.Base(r.FilePath)
if fv, ok := fileValidation[filename]; ok {
m.lastDBDump.Results[i].Validation = fv
m.logger.Printf("[INFO] Re-validated %s from disk: valid=%v tables=%d",
filename, fv.Valid, fv.TableCount)
}
}
}
}
// C2: Reverse snapshot history before assigning to cachedStatus (inside lock).
for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 {
status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i]
}
m.cachedStatus = status
m.cacheTime = time.Now()
m.mu.Unlock()
m.logger.Printf("[INFO] Backup status cache refreshed")
}
// GetFullStatus returns the cached backup status for page rendering.
// Returns instantly — no subprocess calls.
// Returns a deep copy so callers can safely append to slice fields without
// polluting the cache (which would cause duplicate entries on repeated calls).
func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupStatus {
m.mu.Lock()
defer m.mu.Unlock()
if m.cachedStatus != nil {
// Deep copy — callers (backupsHandler) append to CrossDriveSummary,
// UnconfiguredApps, and CrossDriveWarnings. If we returned the cache
// pointer directly, every page load would accumulate more entries.
status := *m.cachedStatus
status.AppDataInfo = make([]AppBackupInfo, len(m.cachedStatus.AppDataInfo))
copy(status.AppDataInfo, m.cachedStatus.AppDataInfo)
status.PerDriveRepoStats = make([]DriveRepoInfo, len(m.cachedStatus.PerDriveRepoStats))
copy(status.PerDriveRepoStats, m.cachedStatus.PerDriveRepoStats)
// These three slices are assembled by the handler from AppDataInfo + settings;
// they must always start empty so the handler builds them fresh.
status.CrossDriveSummary = nil
status.UnconfiguredApps = nil
status.CrossDriveWarnings = nil
// Update dynamic fields that don't need subprocess calls
status.Running = m.running
status.NextDBDump = nextDBDump
status.NextBackup = nextBackup
// C4: Deep-copy lastDBDump and lastBackup so callers cannot mutate shared state.
if m.lastDBDump != nil {
copyDump := *m.lastDBDump
if len(m.lastDBDump.Results) > 0 {
copyDump.Results = make([]DumpResult, len(m.lastDBDump.Results))
copy(copyDump.Results, m.lastDBDump.Results)
}
status.LastDBDump = &copyDump
}
if m.lastBackup != nil {
copyBackup := *m.lastBackup
status.LastBackup = &copyBackup
}
// Update snapshot history
status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory))
copy(status.SnapshotHistory, m.snapshotHistory)
// Reverse so newest first
for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 {
status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i]
}
// Synthesize LastBackup from snapshot history if not in memory (e.g., after restart)
if status.LastBackup == nil && len(status.SnapshotHistory) > 0 {
latest := status.SnapshotHistory[0] // already reversed, newest first
status.LastBackup = &BackupStatus{
LastRun: latest.Time,
Success: latest.Success,
Snapshot: &SnapshotResult{
SnapshotID: latest.SnapshotID,
},
}
}
// Synthesize LastDBDump from DumpFiles on disk if not in memory
if status.LastDBDump == nil && len(status.DumpFiles) > 0 {
var results []DumpResult
var latestTime time.Time
for _, f := range status.DumpFiles {
results = append(results, DumpResult{
DB: DiscoveredDB{StackName: f.StackName, DBType: f.DBType, ContainerName: f.StackName},
FilePath: f.FileName,
Size: f.Size,
Validation: f.Validation,
})
if f.ModTime.After(latestTime) {
latestTime = f.ModTime
}
}
status.LastDBDump = &DBDumpStatus{
LastRun: latestTime,
Results: results,
Success: true,
}
}
return &status
}
// No cache yet — return a minimal status (first page load before cache is populated)
return &FullBackupStatus{
Enabled: m.cfg.Backup.Enabled,
Running: m.running,
LastDBDump: m.lastDBDump,
LastBackup: m.lastBackup,
DBDumpSchedule: m.cfg.Backup.DBDumpSchedule,
ResticSchedule: m.cfg.Backup.ResticSchedule,
PruneSchedule: m.cfg.Backup.PruneSchedule,
NextDBDump: nextDBDump,
NextBackup: nextBackup,
Retention: m.cfg.Backup.Retention,
LastCheckTime: m.lastCheckTime,
LastCheckOK: m.lastCheckOK,
}
}
func dbNames(dbs []DiscoveredDB) string {
var names []string
for _, db := range dbs {
names = append(names, fmt.Sprintf("%s(%s)", db.ContainerName, db.DBType))
}
return strings.Join(names, ", ")
}
// dedup removes duplicate strings from a slice, preserving order.
func dedup(items []string) []string {
seen := make(map[string]bool)
var result []string
for _, item := range items {
if !seen[item] {
seen[item] = true
result = append(result, item)
}
}
return result
}