c929948f27
- Add Docker named volume backup to Tier 1 (dump to tar, include in restic) and Tier 2 (copy tars to rsync mirror _volumes/ dir) - Fix volume name resolution: use project-prefixed names (mealie_mealie_data) - Fix double Tier 1 in restore dropdown: filter snapshots by app's home drive - Add Tier 2 restore: RestoreAppFromTier2() restores from rsync mirror - Show Tier 2 entry in restore dropdown when cross-drive backup succeeded - Add .fab import link in restore section - Volume-aware restore type banners and backup content labels Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1449 lines
45 KiB
Go
1449 lines
45 KiB
Go
package backup
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
|
|
)
|
|
|
|
// Manager orchestrates database dumps and restic backups.
|
|
type Manager struct {
|
|
cfg *config.Config
|
|
restic *ResticManager
|
|
logger *log.Logger
|
|
pinger *monitor.Pinger
|
|
settings *settings.Settings
|
|
stackProvider StackDataProvider
|
|
systemDataPath string // fallback drive for SSD-only apps
|
|
|
|
mu sync.Mutex
|
|
lastDBDump *DBDumpStatus
|
|
lastBackup *BackupStatus
|
|
running bool
|
|
snapshotHistory []SnapshotRecord // ring buffer, last 20 entries
|
|
snapshotHistoryFile string // path to persist snapshot history JSON
|
|
lastCheckTime time.Time
|
|
lastCheckOK bool
|
|
|
|
// Cached status for page rendering (refreshed periodically)
|
|
cachedStatus *FullBackupStatus
|
|
cacheTime time.Time
|
|
|
|
// AfterBackup is called after a backup completes to refresh the cache.
|
|
// Set by main.go to avoid circular import with scheduler.
|
|
AfterBackup func()
|
|
|
|
// MigrationActiveCheck returns true if a full drive migration is in progress.
|
|
// Set by main.go to coordinate with DriveMigrator.
|
|
MigrationActiveCheck func() bool
|
|
}
|
|
|
|
// SnapshotRecord combines restic snapshot metadata with our run stats.
|
|
type SnapshotRecord struct {
|
|
SnapshotID string `json:"snapshot_id"`
|
|
Time time.Time `json:"time"`
|
|
FilesNew int `json:"files_new"`
|
|
FilesChanged int `json:"files_changed"`
|
|
DataAdded string `json:"data_added"`
|
|
Duration time.Duration `json:"duration"`
|
|
Success bool `json:"success"`
|
|
HasStats bool `json:"has_stats"` // false for historical entries loaded from restic
|
|
}
|
|
|
|
// DriveRepoInfo holds per-drive restic repository statistics for the Részletek section.
|
|
type DriveRepoInfo struct {
|
|
DrivePath string
|
|
DriveLabel string // filled by handler from settings
|
|
TotalSize string
|
|
TotalSizeBytes int64
|
|
SnapshotCount int
|
|
LatestSnapshot *SnapshotInfo `json:"-"` // used for aggregation, not serialized
|
|
}
|
|
|
|
// CrossDriveSummaryItem holds display data for one app's cross-drive backup.
|
|
type CrossDriveSummaryItem struct {
|
|
StackName string
|
|
DisplayName string
|
|
Method string // "rsync" or "restic"
|
|
MethodLabel string // "Egyszerű másolat" or "Restic"
|
|
DestPath string
|
|
DestLabel string // storage path label
|
|
Schedule string
|
|
ScheduleLabel string // "Naponta" or "Hetente" or "Kézi"
|
|
LastStatus string // "ok", "error", "running", ""
|
|
LastRunShort string // formatted short time e.g. "03:15"
|
|
SizeHuman string
|
|
}
|
|
|
|
// FullBackupStatus contains everything the backup page needs.
|
|
type FullBackupStatus struct {
|
|
Enabled bool
|
|
Running bool
|
|
|
|
// DB Dumps
|
|
LastDBDump *DBDumpStatus
|
|
DumpFiles []DumpFileInfo
|
|
DiscoveredDBs []DiscoveredDB
|
|
|
|
// Restic
|
|
LastBackup *BackupStatus
|
|
SnapshotHistory []SnapshotRecord
|
|
RepoStats *RepoStats
|
|
PerDriveRepoStats []DriveRepoInfo // per-drive Tier 1 restic stats
|
|
|
|
// Schedule
|
|
DBDumpSchedule string
|
|
ResticSchedule string
|
|
PruneSchedule string
|
|
NextDBDump time.Time
|
|
NextBackup time.Time
|
|
Retention config.RetentionConfig
|
|
|
|
// Repository health
|
|
LastCheckTime time.Time
|
|
LastCheckOK bool
|
|
|
|
// Remote (placeholder)
|
|
RemoteEnabled bool
|
|
|
|
// App data backup
|
|
AppDataInfo []AppBackupInfo
|
|
|
|
// Cross-drive backup summary
|
|
CrossDriveSummary []CrossDriveSummaryItem
|
|
UnconfiguredApps []CrossDriveSummaryItem // apps with HDD data but no cross-drive config
|
|
CrossDriveWarnings []string // destination health warnings
|
|
|
|
// Flash messages (set by handlers, passed through redirect)
|
|
FlashSuccess string
|
|
FlashError string
|
|
}
|
|
|
|
// DBDumpStatus holds the last DB dump result.
|
|
type DBDumpStatus struct {
|
|
LastRun time.Time
|
|
Results []DumpResult
|
|
Success bool
|
|
Duration time.Duration
|
|
}
|
|
|
|
// BackupStatus holds the last backup result.
|
|
type BackupStatus struct {
|
|
LastRun time.Time
|
|
Snapshot *SnapshotResult
|
|
Success bool
|
|
Duration time.Duration
|
|
RepoStats *RepoStats
|
|
}
|
|
|
|
// NewManager creates a new backup manager.
|
|
func NewManager(cfg *config.Config, pinger *monitor.Pinger, sett *settings.Settings, logger *log.Logger) *Manager {
|
|
if cfg.Paths.SystemDataPath == "" {
|
|
logger.Printf("[WARN] [backup] SystemDataPath is empty in config — SSD-only apps will not have correct backup paths")
|
|
}
|
|
dataDir := cfg.Paths.DataDir
|
|
if dataDir == "" {
|
|
dataDir = "/opt/docker/felhom-controller/data"
|
|
}
|
|
restic := NewResticManager(cfg, logger)
|
|
restic.SetDebug(cfg.Logging.Level == "debug")
|
|
return &Manager{
|
|
cfg: cfg,
|
|
restic: restic,
|
|
logger: logger,
|
|
pinger: pinger,
|
|
settings: sett,
|
|
systemDataPath: cfg.Paths.SystemDataPath,
|
|
snapshotHistoryFile: filepath.Join(dataDir, "snapshot-history.json"),
|
|
}
|
|
}
|
|
|
|
// GetAppDrivePath returns the drive path for an app.
|
|
// Uses HDD_PATH from app.yaml if set, otherwise falls back to system data path.
|
|
func (m *Manager) GetAppDrivePath(stackName string) string {
|
|
if m.stackProvider != nil {
|
|
if hddPath := m.stackProvider.GetStackHDDPath(stackName); hddPath != "" {
|
|
return hddPath
|
|
}
|
|
}
|
|
if m.systemDataPath == "" {
|
|
m.logger.Printf("[ERROR] [backup] systemDataPath is empty — cannot determine drive for %s", stackName)
|
|
}
|
|
return m.systemDataPath
|
|
}
|
|
|
|
// groupStacksByDrive groups deployed stacks by their home drive path.
|
|
func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
|
|
if m.stackProvider == nil {
|
|
return nil
|
|
}
|
|
result := make(map[string][]StackSummary)
|
|
for _, stack := range m.stackProvider.ListDeployedStacks() {
|
|
drive := m.GetAppDrivePath(stack.Name)
|
|
result[drive] = append(result[drive], stack)
|
|
}
|
|
if m.isDebug() {
|
|
for drive, stacks := range result {
|
|
names := make([]string, len(stacks))
|
|
for i, s := range stacks {
|
|
names[i] = s.Name
|
|
}
|
|
m.logger.Printf("[DEBUG] groupStacksByDrive: %s → [%s]", drive, strings.Join(names, ", "))
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// activeDrives returns sorted list of drives that have deployed apps.
|
|
// Disconnected and decommissioned drives are excluded.
|
|
func (m *Manager) activeDrives() []string {
|
|
groups := m.groupStacksByDrive()
|
|
var drives []string
|
|
var skipped []string
|
|
for d := range groups {
|
|
if m.settings != nil && (m.settings.IsDisconnected(d) || m.settings.IsDecommissioned(d)) {
|
|
skipped = append(skipped, d)
|
|
continue
|
|
}
|
|
drives = append(drives, d)
|
|
}
|
|
sort.Strings(drives)
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] activeDrives: %d active (%s), %d skipped (disconnected/decommissioned)",
|
|
len(drives), strings.Join(drives, ", "), len(skipped))
|
|
}
|
|
return drives
|
|
}
|
|
|
|
// RunDBDumps discovers and dumps all databases to per-drive, per-app paths.
|
|
func (m *Manager) RunDBDumps(ctx context.Context) error {
|
|
if err := m.acquireRunning(); err != nil {
|
|
return err
|
|
}
|
|
defer m.releaseRunning()
|
|
return m.runDBDumpsInternal(ctx)
|
|
}
|
|
|
|
// runDBDumpsInternal is the implementation of RunDBDumps. Caller must hold the running flag.
|
|
func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
|
|
start := time.Now()
|
|
m.logger.Printf("[INFO] [backup] Starting database dump run")
|
|
|
|
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
|
|
if err != nil {
|
|
m.logger.Printf("[ERROR] [backup] Database discovery failed: %v", err)
|
|
return err
|
|
}
|
|
|
|
if len(dbs) == 0 {
|
|
m.logger.Printf("[INFO] [backup] No database containers found")
|
|
m.mu.Lock()
|
|
m.lastDBDump = &DBDumpStatus{
|
|
LastRun: time.Now(),
|
|
Success: true,
|
|
Duration: time.Since(start),
|
|
}
|
|
m.mu.Unlock()
|
|
return nil
|
|
}
|
|
|
|
m.logger.Printf("[INFO] [backup] Discovered %d database(s): %s", len(dbs), dbNames(dbs))
|
|
|
|
// Dump each DB to its app's drive path
|
|
var results []DumpResult
|
|
allOK := true
|
|
var summary []string
|
|
var totalSize int64
|
|
|
|
for _, db := range dbs {
|
|
drivePath := m.GetAppDrivePath(db.StackName)
|
|
|
|
// Skip if drive is disconnected or decommissioned
|
|
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
|
|
m.logger.Printf("[WARN] [backup] Skipping DB dump for %s — drive disconnected: %s", db.StackName, drivePath)
|
|
summary = append(summary, fmt.Sprintf("SKIP %s (drive disconnected)", db.ContainerName))
|
|
continue
|
|
}
|
|
if m.settings != nil && m.settings.IsDecommissioned(drivePath) {
|
|
m.logger.Printf("[WARN] [backup] Skipping DB dump for %s — drive decommissioned: %s", db.StackName, drivePath)
|
|
summary = append(summary, fmt.Sprintf("SKIP %s (drive decommissioned)", db.ContainerName))
|
|
continue
|
|
}
|
|
|
|
dumpDir := AppDBDumpPath(drivePath, db.StackName)
|
|
|
|
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
|
|
results = append(results, result)
|
|
|
|
if result.Error != nil {
|
|
allOK = false
|
|
summary = append(summary, fmt.Sprintf("FAIL %s: %v", result.DB.ContainerName, result.Error))
|
|
m.logger.Printf("[ERROR] [backup] DB dump failed for %s: %v", result.DB.ContainerName, result.Error)
|
|
} else {
|
|
totalSize += result.Size
|
|
summary = append(summary, fmt.Sprintf("OK %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size)))
|
|
|
|
// Persist validation result to settings.json
|
|
if m.settings != nil && result.FilePath != "" {
|
|
filename := filepath.Base(result.FilePath)
|
|
cache := settings.DBValidationCache{
|
|
ValidatedAt: time.Now().Format(time.RFC3339),
|
|
TableCount: result.Validation.TableCount,
|
|
HasHeader: result.Validation.Valid,
|
|
}
|
|
if !result.Validation.Valid {
|
|
cache.Error = result.Validation.Error
|
|
}
|
|
if err := m.settings.SetDBValidation(filename, cache); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Failed to cache validation for %s: %v", filename, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
duration := time.Since(start)
|
|
m.mu.Lock()
|
|
m.lastDBDump = &DBDumpStatus{
|
|
LastRun: time.Now(),
|
|
Results: results,
|
|
Success: allOK,
|
|
Duration: duration,
|
|
}
|
|
m.mu.Unlock()
|
|
|
|
// Ping healthcheck
|
|
uuid := m.cfg.Monitoring.PingUUIDs.DBDump
|
|
body := fmt.Sprintf("DB dump: %d databases, %s total\n%s",
|
|
len(results), humanizeBytes(totalSize), strings.Join(summary, "\n"))
|
|
|
|
if allOK {
|
|
m.pinger.Ping(uuid, body)
|
|
m.logger.Printf("[INFO] [backup] DB dump completed: %d databases, %s total (%s)",
|
|
len(results), humanizeBytes(totalSize), duration.Round(time.Millisecond))
|
|
} else {
|
|
m.pinger.Fail(uuid, body)
|
|
return fmt.Errorf("some database dumps failed")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// RunBackup runs per-drive restic backup snapshots.
|
|
func (m *Manager) RunBackup(ctx context.Context) error {
|
|
if err := m.acquireRunning(); err != nil {
|
|
return err
|
|
}
|
|
defer m.releaseRunning()
|
|
return m.runBackupInternal(ctx)
|
|
}
|
|
|
|
// runBackupInternal is the implementation of RunBackup. Caller must hold the running flag.
|
|
func (m *Manager) runBackupInternal(ctx context.Context) error {
|
|
// Skip if a full drive migration is in progress
|
|
if m.MigrationActiveCheck != nil && m.MigrationActiveCheck() {
|
|
m.logger.Printf("[WARN] [backup] Skipping nightly backup — drive migration in progress")
|
|
return nil
|
|
}
|
|
|
|
start := time.Now()
|
|
m.logger.Printf("[INFO] [backup] Starting restic backup (per-drive)")
|
|
|
|
driveStacks := m.groupStacksByDrive()
|
|
if len(driveStacks) == 0 {
|
|
m.logger.Printf("[INFO] [backup] No deployed stacks — skipping backup")
|
|
return nil
|
|
}
|
|
|
|
// Infrastructure paths included in every drive's primary repo
|
|
infraPaths := []string{
|
|
m.cfg.Paths.StacksDir,
|
|
"/opt/docker/felhom-controller/controller.yaml",
|
|
}
|
|
|
|
var lastResult *SnapshotResult
|
|
var anyErr error
|
|
driveCount := 0
|
|
|
|
for drivePath, stacks := range driveStacks {
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] runBackupInternal: processing drive %s (%d stacks)", drivePath, len(stacks))
|
|
}
|
|
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
|
|
if err != nil {
|
|
anyErr = err
|
|
continue
|
|
}
|
|
if result != nil {
|
|
lastResult = result
|
|
driveCount++
|
|
}
|
|
}
|
|
|
|
duration := time.Since(start)
|
|
|
|
if anyErr != nil && driveCount == 0 {
|
|
// All drives failed
|
|
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", anyErr))
|
|
m.mu.Lock()
|
|
m.lastBackup = &BackupStatus{
|
|
LastRun: time.Now(),
|
|
Success: false,
|
|
Duration: duration,
|
|
}
|
|
m.mu.Unlock()
|
|
return anyErr
|
|
}
|
|
|
|
// Get aggregated stats
|
|
stats := m.aggregateRepoStats()
|
|
|
|
m.mu.Lock()
|
|
m.lastBackup = &BackupStatus{
|
|
LastRun: time.Now(),
|
|
Snapshot: lastResult,
|
|
Success: anyErr == nil,
|
|
Duration: duration,
|
|
RepoStats: stats,
|
|
}
|
|
if lastResult != nil {
|
|
m.appendSnapshotRecord(SnapshotRecord{
|
|
SnapshotID: lastResult.SnapshotID,
|
|
Time: time.Now(),
|
|
FilesNew: lastResult.FilesNew,
|
|
FilesChanged: lastResult.FilesChanged,
|
|
DataAdded: lastResult.DataAdded,
|
|
Duration: duration,
|
|
Success: true,
|
|
HasStats: true,
|
|
})
|
|
}
|
|
m.mu.Unlock()
|
|
|
|
if lastResult != nil {
|
|
body := fmt.Sprintf("Backup OK (%d drives)\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s",
|
|
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
|
|
duration.Round(time.Second))
|
|
m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body)
|
|
|
|
m.logger.Printf("[INFO] [backup] Restic backup completed: %d drives, snapshot %s, %d new, %d changed, %s added (%s)",
|
|
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
|
|
duration.Round(time.Millisecond))
|
|
}
|
|
|
|
// Refresh cache so the page shows updated data immediately
|
|
if m.AfterBackup != nil {
|
|
m.AfterBackup()
|
|
}
|
|
|
|
return anyErr
|
|
}
|
|
|
|
// backupDrive runs restic backup for a single drive. Returns nil result if skipped.
|
|
// Caller must hold the running flag.
|
|
func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []StackSummary, infraPaths []string) (*SnapshotResult, error) {
|
|
// Skip disconnected or decommissioned drives
|
|
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
|
|
m.logger.Printf("[WARN] [backup] Skipping backup for drive %s — disconnected", drivePath)
|
|
return nil, nil
|
|
}
|
|
if m.settings != nil && m.settings.IsDecommissioned(drivePath) {
|
|
m.logger.Printf("[WARN] [backup] Skipping backup for drive %s — decommissioned", drivePath)
|
|
return nil, nil
|
|
}
|
|
|
|
repoPath := PrimaryResticRepoPath(drivePath)
|
|
|
|
// Ensure repo is initialized
|
|
if err := m.restic.EnsureInitialized(repoPath); err != nil {
|
|
m.logger.Printf("[ERROR] [backup] Restic init failed for %s: %v", repoPath, err)
|
|
return nil, err
|
|
}
|
|
|
|
// Build paths for this drive
|
|
var paths []string
|
|
paths = append(paths, infraPaths...)
|
|
|
|
for _, stack := range stacks {
|
|
// App data (appdata/<stack>/)
|
|
appData := AppDataDir(drivePath, stack.Name)
|
|
if _, err := os.Stat(appData); err == nil {
|
|
paths = append(paths, appData)
|
|
}
|
|
// HDD mounts (for apps with custom mount points)
|
|
if m.stackProvider != nil {
|
|
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
|
|
if _, err := os.Stat(mount); err == nil {
|
|
paths = append(paths, mount)
|
|
}
|
|
}
|
|
}
|
|
// DB dumps for this stack
|
|
dumpDir := AppDBDumpPath(drivePath, stack.Name)
|
|
if _, err := os.Stat(dumpDir); err == nil {
|
|
paths = append(paths, dumpDir)
|
|
}
|
|
// Docker volume dumps for this stack
|
|
volDumpDir := AppVolumeDumpPath(drivePath, stack.Name)
|
|
if _, err := os.Stat(volDumpDir); err == nil {
|
|
paths = append(paths, volDumpDir)
|
|
}
|
|
}
|
|
|
|
// Deduplicate paths
|
|
paths = dedup(paths)
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] backupDrive %s: repo=%s, %d include paths:", drivePath, repoPath, len(paths))
|
|
for _, p := range paths {
|
|
m.logger.Printf("[DEBUG] %s", p)
|
|
}
|
|
}
|
|
|
|
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
|
|
m.logger.Printf("[INFO] [backup] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
|
|
|
|
result, err := m.restic.Snapshot(repoPath, paths, tags)
|
|
if err != nil {
|
|
m.logger.Printf("[ERROR] [backup] Restic backup failed for drive %s: %v", drivePath, err)
|
|
return nil, err
|
|
}
|
|
|
|
// Prune check (weekly — Sunday)
|
|
if shouldPrune(m.cfg.Backup.PruneSchedule) {
|
|
m.logger.Printf("[INFO] [backup] Running weekly prune for %s", repoPath)
|
|
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Restic prune failed for %s: %v", repoPath, err)
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// tryAcquireRunning attempts to set the running flag without blocking.
|
|
// Returns true if acquired, false if already running.
|
|
func (m *Manager) tryAcquireRunning() bool {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
if m.running {
|
|
return false
|
|
}
|
|
m.running = true
|
|
return true
|
|
}
|
|
|
|
// TryRunDriveBackup runs a backup for a single drive if no other backup is in progress.
|
|
// Returns error if the backup lock cannot be acquired or if backup fails.
|
|
func (m *Manager) TryRunDriveBackup(ctx context.Context, drivePath string) error {
|
|
if !m.tryAcquireRunning() {
|
|
return fmt.Errorf("backup already in progress")
|
|
}
|
|
defer m.releaseRunning()
|
|
|
|
driveStacks := m.groupStacksByDrive()
|
|
stacks, ok := driveStacks[drivePath]
|
|
if !ok || len(stacks) == 0 {
|
|
m.logger.Printf("[INFO] [backup] No deployed stacks on drive %s — skipping backup", drivePath)
|
|
return nil
|
|
}
|
|
|
|
infraPaths := []string{
|
|
m.cfg.Paths.StacksDir,
|
|
"/opt/docker/felhom-controller/controller.yaml",
|
|
}
|
|
|
|
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if result != nil {
|
|
m.logger.Printf("[INFO] [backup] Single-drive backup for %s: snapshot %s, %d new, %d changed, %s added",
|
|
drivePath, result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// RunIntegrityCheck runs restic check on all primary repos and pings healthchecks.
|
|
func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
|
|
m.logger.Printf("[INFO] [backup] Starting restic integrity check")
|
|
start := time.Now()
|
|
|
|
drives := m.activeDrives()
|
|
if len(drives) == 0 {
|
|
m.logger.Printf("[INFO] [backup] No active drives — skipping integrity check")
|
|
return nil
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking %d drives", len(drives))
|
|
}
|
|
|
|
var checkErr error
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RunIntegrityCheck: skipping %s (repo does not exist)", repoPath)
|
|
}
|
|
continue
|
|
}
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking repo %s", repoPath)
|
|
}
|
|
if err := m.restic.Check(repoPath); err != nil {
|
|
m.logger.Printf("[ERROR] [backup] Restic check failed for %s: %v", repoPath, err)
|
|
checkErr = err
|
|
} else if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RunIntegrityCheck: repo %s OK", repoPath)
|
|
}
|
|
}
|
|
|
|
duration := time.Since(start)
|
|
uuid := m.cfg.Monitoring.PingUUIDs.BackupIntegrity
|
|
|
|
m.mu.Lock()
|
|
m.lastCheckTime = time.Now()
|
|
m.lastCheckOK = checkErr == nil
|
|
m.mu.Unlock()
|
|
|
|
if checkErr != nil {
|
|
m.logger.Printf("[ERROR] [backup] Restic integrity check failed (%s): %v", duration.Round(time.Second), checkErr)
|
|
m.pinger.Fail(uuid, fmt.Sprintf("restic check failed: %v", checkErr))
|
|
return checkErr
|
|
}
|
|
|
|
m.logger.Printf("[INFO] [backup] Restic integrity check passed (%d repos, %s)", len(drives), duration.Round(time.Second))
|
|
m.pinger.Ping(uuid, fmt.Sprintf("restic check passed (%d repos, %s)", len(drives), duration.Round(time.Second)))
|
|
return nil
|
|
}
|
|
|
|
// DumpAppVolumes exports Docker named volumes to tar files for the given stack.
|
|
// Tars are written to AppVolumeDumpPath(drivePath, stackName)/.
|
|
// Uses "docker run alpine tar" (same pattern as appexport).
|
|
func (m *Manager) DumpAppVolumes(stackName string) error {
|
|
if m.stackProvider == nil {
|
|
return nil
|
|
}
|
|
|
|
volumes := m.stackProvider.GetDockerVolumes(stackName)
|
|
if len(volumes) == 0 {
|
|
return nil
|
|
}
|
|
|
|
drivePath := m.GetAppDrivePath(stackName)
|
|
if drivePath == "" {
|
|
return fmt.Errorf("cannot determine drive path for %s", stackName)
|
|
}
|
|
|
|
dumpDir := AppVolumeDumpPath(drivePath, stackName)
|
|
if err := os.MkdirAll(dumpDir, 0755); err != nil {
|
|
return fmt.Errorf("creating volume dump dir: %w", err)
|
|
}
|
|
|
|
var dumpErrors []string
|
|
for _, volName := range volumes {
|
|
tarPath := filepath.Join(dumpDir, volName+".tar")
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [backup] Dumping volume %s for %s", volName, stackName)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
|
cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
|
|
"-v", volName+":/vol:ro",
|
|
"-v", dumpDir+":/out",
|
|
"alpine", "tar", "cf", "/out/"+volName+".tar", "-C", "/vol", ".")
|
|
out, err := cmd.CombinedOutput()
|
|
cancel()
|
|
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] [backup] Volume dump failed for %s/%s: %s — %v",
|
|
stackName, volName, strings.TrimSpace(string(out)), err)
|
|
os.Remove(tarPath)
|
|
dumpErrors = append(dumpErrors, volName)
|
|
continue
|
|
}
|
|
|
|
if info, _ := os.Stat(tarPath); info != nil {
|
|
m.logger.Printf("[INFO] [backup] Volume dump: %s/%s → %s", stackName, volName, humanizeBytes(info.Size()))
|
|
}
|
|
}
|
|
|
|
// Clean up tars for volumes that no longer exist
|
|
entries, _ := os.ReadDir(dumpDir)
|
|
activeVols := make(map[string]bool)
|
|
for _, v := range volumes {
|
|
activeVols[v+".tar"] = true
|
|
}
|
|
for _, e := range entries {
|
|
if !activeVols[e.Name()] && strings.HasSuffix(e.Name(), ".tar") {
|
|
os.Remove(filepath.Join(dumpDir, e.Name()))
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [backup] Removed stale volume dump: %s/%s", stackName, e.Name())
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(dumpErrors) > 0 {
|
|
return fmt.Errorf("volume dump failed for: %s", strings.Join(dumpErrors, ", "))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// runVolumeDumpsInternal dumps Docker named volumes for all deployed apps.
|
|
func (m *Manager) runVolumeDumpsInternal(ctx context.Context) error {
|
|
if m.stackProvider == nil {
|
|
return nil
|
|
}
|
|
|
|
stacks := m.stackProvider.ListDeployedStacks()
|
|
var dumped, failed int
|
|
for _, stack := range stacks {
|
|
if !stack.HasVolumes {
|
|
continue
|
|
}
|
|
if ctx.Err() != nil {
|
|
return ctx.Err()
|
|
}
|
|
if err := m.DumpAppVolumes(stack.Name); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Volume dump error for %s: %v", stack.Name, err)
|
|
failed++
|
|
} else {
|
|
dumped++
|
|
}
|
|
}
|
|
if dumped > 0 || failed > 0 {
|
|
m.logger.Printf("[INFO] [backup] Volume dumps completed: %d ok, %d failed", dumped, failed)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// RunFullBackup runs DB dumps, volume dumps, then restic backup.
|
|
func (m *Manager) RunFullBackup(ctx context.Context) error {
|
|
if err := m.acquireRunning(); err != nil {
|
|
return err
|
|
}
|
|
defer m.releaseRunning()
|
|
|
|
if m.isDebug() {
|
|
drives := m.activeDrives()
|
|
driveStacks := m.groupStacksByDrive()
|
|
totalStacks := 0
|
|
for _, s := range driveStacks {
|
|
totalStacks += len(s)
|
|
}
|
|
m.logger.Printf("[DEBUG] RunFullBackup: starting full backup — %d active drives, %d stacks", len(drives), totalStacks)
|
|
}
|
|
|
|
// Step 1: DB dumps
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RunFullBackup: phase 1a — database dumps")
|
|
}
|
|
if err := m.runDBDumpsInternal(ctx); err != nil {
|
|
m.logger.Printf("[WARN] [backup] DB dump had errors, continuing with backup anyway")
|
|
}
|
|
|
|
// Step 2: Volume dumps
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RunFullBackup: phase 1b — Docker volume dumps")
|
|
}
|
|
if err := m.runVolumeDumpsInternal(ctx); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Volume dump had errors, continuing with backup anyway")
|
|
}
|
|
|
|
// Step 3: Restic backup
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RunFullBackup: phase 2 — restic snapshots")
|
|
}
|
|
return m.runBackupInternal(ctx)
|
|
}
|
|
|
|
// GetStatus returns the current backup status.
|
|
func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
return m.lastDBDump, m.lastBackup
|
|
}
|
|
|
|
// GetRepoStats returns aggregated repository statistics across all primary repos.
|
|
func (m *Manager) GetRepoStats() (*RepoStats, error) {
|
|
stats := m.aggregateRepoStats()
|
|
if stats.SnapshotCount == 0 && stats.TotalSize == "" {
|
|
return stats, fmt.Errorf("no repos available")
|
|
}
|
|
return stats, nil
|
|
}
|
|
|
|
// IsRunning returns whether a backup or restore is currently in progress.
|
|
func (m *Manager) IsRunning() bool {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
return m.running
|
|
}
|
|
|
|
// acquireRunning atomically sets the running flag. Returns error if already running.
|
|
func (m *Manager) acquireRunning() error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
if m.running {
|
|
return fmt.Errorf("backup already in progress")
|
|
}
|
|
m.running = true
|
|
return nil
|
|
}
|
|
|
|
// releaseRunning clears the running flag.
|
|
func (m *Manager) releaseRunning() {
|
|
m.mu.Lock()
|
|
m.running = false
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// GetResticPassword returns the restic repository encryption password.
|
|
func (m *Manager) GetResticPassword() (string, error) {
|
|
return m.restic.GetPassword()
|
|
}
|
|
|
|
// ListSnapshots returns snapshots from all primary restic repositories, merged and sorted.
|
|
func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) {
|
|
drives := m.activeDrives()
|
|
var allSnapshots []SnapshotInfo
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] [backup] Could not list snapshots from %s: %v", repoPath, err)
|
|
continue
|
|
}
|
|
for i := range snapshots {
|
|
snapshots[i].RepoPath = repoPath
|
|
}
|
|
allSnapshots = append(allSnapshots, snapshots...)
|
|
}
|
|
// Sort newest first
|
|
sort.Slice(allSnapshots, func(i, j int) bool {
|
|
return allSnapshots[i].Time.After(allSnapshots[j].Time)
|
|
})
|
|
if limit > 0 && len(allSnapshots) > limit {
|
|
allSnapshots = allSnapshots[:limit]
|
|
}
|
|
return allSnapshots, nil
|
|
}
|
|
|
|
// ListAllSnapshots returns snapshots from primary restic repos across all active drives.
|
|
// All snapshots get Tier=1.
|
|
func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
|
|
drives := m.activeDrives()
|
|
var allSnapshots []SnapshotInfo
|
|
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] [backup] Could not list snapshots from %s: %v", repoPath, err)
|
|
continue
|
|
}
|
|
for i := range snapshots {
|
|
snapshots[i].RepoPath = repoPath
|
|
snapshots[i].Tier = 1
|
|
}
|
|
allSnapshots = append(allSnapshots, snapshots...)
|
|
}
|
|
|
|
// Sort newest first
|
|
sort.Slice(allSnapshots, func(i, j int) bool {
|
|
return allSnapshots[i].Time.After(allSnapshots[j].Time)
|
|
})
|
|
if limit > 0 && len(allSnapshots) > limit {
|
|
allSnapshots = allSnapshots[:limit]
|
|
}
|
|
return allSnapshots, nil
|
|
}
|
|
|
|
// ListSnapshotsForApp returns snapshots only from the app's home drive primary repo.
|
|
// This prevents showing irrelevant snapshots from other drives (e.g. a 544 KB SYS_DRIVE
|
|
// snapshot appearing for Immich because it contains the shared stacks directory).
|
|
func (m *Manager) ListSnapshotsForApp(stackName string, limit int) ([]SnapshotInfo, error) {
|
|
drivePath := m.GetAppDrivePath(stackName)
|
|
if drivePath == "" {
|
|
return []SnapshotInfo{}, nil
|
|
}
|
|
repoPath := PrimaryResticRepoPath(drivePath)
|
|
|
|
if !m.restic.RepoExists(repoPath) {
|
|
return []SnapshotInfo{}, nil
|
|
}
|
|
|
|
snapshots, err := m.restic.ListSnapshots(repoPath, limit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for i := range snapshots {
|
|
snapshots[i].RepoPath = repoPath
|
|
snapshots[i].Tier = 1
|
|
snapshots[i].Source = "restic"
|
|
}
|
|
|
|
// Sort newest first
|
|
sort.Slice(snapshots, func(i, j int) bool {
|
|
return snapshots[i].Time.After(snapshots[j].Time)
|
|
})
|
|
if limit > 0 && len(snapshots) > limit {
|
|
snapshots = snapshots[:limit]
|
|
}
|
|
return snapshots, nil
|
|
}
|
|
|
|
// SetStackProvider sets the stack data provider for app data discovery.
|
|
// C3: Write is protected by mutex since stackProvider is read by concurrent goroutines.
|
|
func (m *Manager) SetStackProvider(provider StackDataProvider) {
|
|
m.mu.Lock()
|
|
m.stackProvider = provider
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// UnlockRepo runs restic unlock on the given repo path.
|
|
func (m *Manager) UnlockRepo(ctx context.Context, repoPath string) error {
|
|
if !m.restic.RepoExists(repoPath) {
|
|
return nil // no repo to unlock
|
|
}
|
|
cmd := m.restic.UnlockCommand(ctx, repoPath)
|
|
out, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf("restic unlock: %v (%s)", err, strings.TrimSpace(string(out)))
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Restic repo unlocked: %s", repoPath)
|
|
return nil
|
|
}
|
|
|
|
// GetStackHDDMounts returns HDD mount paths for the named stack via the stack provider.
|
|
func (m *Manager) GetStackHDDMounts(name string) []string {
|
|
if m.stackProvider == nil {
|
|
return nil
|
|
}
|
|
return m.stackProvider.GetStackHDDMounts(name)
|
|
}
|
|
|
|
// DumpStackDB runs a database dump for containers belonging to a specific stack.
|
|
// Dumps to the stack's home drive: <drive>/backups/primary/<stack>/db-dumps/.
|
|
func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
|
|
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
|
|
if err != nil {
|
|
return fmt.Errorf("database discovery failed: %w", err)
|
|
}
|
|
|
|
var stackDBs []DiscoveredDB
|
|
for _, db := range dbs {
|
|
if db.StackName == stackName {
|
|
stackDBs = append(stackDBs, db)
|
|
}
|
|
}
|
|
if len(stackDBs) == 0 {
|
|
m.logger.Printf("[DEBUG] No databases found for stack %s — skipping pre-backup dump", stackName)
|
|
return nil
|
|
}
|
|
|
|
drivePath := m.GetAppDrivePath(stackName)
|
|
if drivePath == "" || !filepath.IsAbs(drivePath) {
|
|
return fmt.Errorf("cannot determine absolute drive path for %s (systemDataPath not configured?)", stackName)
|
|
}
|
|
dumpDir := AppDBDumpPath(drivePath, stackName)
|
|
|
|
m.logger.Printf("[INFO] [backup] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir)
|
|
|
|
for _, db := range stackDBs {
|
|
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
|
|
if result.Error != nil {
|
|
return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error)
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Pre-backup DB dump OK: %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size))
|
|
|
|
// Persist validation to settings
|
|
if m.settings != nil && result.FilePath != "" {
|
|
filename := filepath.Base(result.FilePath)
|
|
cache := settings.DBValidationCache{
|
|
ValidatedAt: time.Now().Format(time.RFC3339),
|
|
TableCount: result.Validation.TableCount,
|
|
HasHeader: result.Validation.Valid,
|
|
}
|
|
if !result.Validation.Valid {
|
|
cache.Error = result.Validation.Error
|
|
}
|
|
_ = m.settings.SetDBValidation(filename, cache)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// perDriveRepoStats returns per-drive restic repository statistics for the Részletek section.
|
|
func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
|
|
drives := m.activeDrives()
|
|
var infos []DriveRepoInfo
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
stats, err := m.restic.Stats(repoPath)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] [backup] perDriveRepoStats: failed to get stats for %s: %v", drive, err)
|
|
continue
|
|
}
|
|
infos = append(infos, DriveRepoInfo{
|
|
DrivePath: drive,
|
|
TotalSize: stats.TotalSize,
|
|
TotalSizeBytes: stats.TotalSizeBytes,
|
|
SnapshotCount: stats.SnapshotCount,
|
|
LatestSnapshot: stats.LatestSnapshot,
|
|
})
|
|
}
|
|
m.logger.Printf("[INFO] [backup] perDriveRepoStats: collected stats for %d drives", len(infos))
|
|
return infos
|
|
}
|
|
|
|
// aggregateFromDriveStats derives aggregate stats from already-computed per-drive stats,
|
|
// avoiding a second round of restic subprocess calls.
|
|
func aggregateFromDriveStats(drives []DriveRepoInfo, m *Manager) *RepoStats {
|
|
agg := &RepoStats{}
|
|
var totalBytes int64
|
|
for _, d := range drives {
|
|
agg.SnapshotCount += d.SnapshotCount
|
|
totalBytes += d.TotalSizeBytes
|
|
if d.LatestSnapshot != nil {
|
|
if agg.LatestSnapshot == nil || d.LatestSnapshot.Time.After(agg.LatestSnapshot.Time) {
|
|
agg.LatestSnapshot = d.LatestSnapshot
|
|
}
|
|
}
|
|
}
|
|
agg.TotalSizeBytes = totalBytes
|
|
if totalBytes > 0 {
|
|
agg.TotalSize = humanizeBytes(totalBytes)
|
|
}
|
|
return agg
|
|
}
|
|
|
|
// aggregateRepoStats combines stats from all primary restic repos.
|
|
func (m *Manager) aggregateRepoStats() *RepoStats {
|
|
drives := m.activeDrives()
|
|
agg := &RepoStats{}
|
|
var totalBytes int64
|
|
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
stats, err := m.restic.Stats(repoPath)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
agg.SnapshotCount += stats.SnapshotCount
|
|
totalBytes += stats.TotalSizeBytes
|
|
if stats.LatestSnapshot != nil {
|
|
if agg.LatestSnapshot == nil || stats.LatestSnapshot.Time.After(agg.LatestSnapshot.Time) {
|
|
agg.LatestSnapshot = stats.LatestSnapshot
|
|
}
|
|
}
|
|
}
|
|
|
|
agg.TotalSizeBytes = totalBytes
|
|
if totalBytes > 0 {
|
|
agg.TotalSize = humanizeBytes(totalBytes)
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Aggregated repo stats: %d snapshots, total size %s", agg.SnapshotCount, agg.TotalSize)
|
|
return agg
|
|
}
|
|
|
|
// listAllDumpFiles scans per-drive per-stack DB dump directories.
|
|
func (m *Manager) listAllDumpFiles() []DumpFileInfo {
|
|
var allFiles []DumpFileInfo
|
|
for drive, stacks := range m.groupStacksByDrive() {
|
|
for _, stack := range stacks {
|
|
dumpDir := AppDBDumpPath(drive, stack.Name)
|
|
if files, err := ListDumpFiles(dumpDir); err == nil {
|
|
allFiles = append(allFiles, files...)
|
|
}
|
|
}
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Found %d DB dump files across drives", len(allFiles))
|
|
return allFiles
|
|
}
|
|
|
|
func shouldPrune(schedule string) bool {
|
|
loc, err := time.LoadLocation("Europe/Budapest")
|
|
if err != nil {
|
|
loc = time.UTC
|
|
}
|
|
now := time.Now().In(loc)
|
|
|
|
switch strings.ToLower(schedule) {
|
|
case "weekly":
|
|
return now.Weekday() == time.Sunday
|
|
case "daily":
|
|
return true
|
|
default:
|
|
return now.Weekday() == time.Sunday
|
|
}
|
|
}
|
|
|
|
// appendSnapshotRecord adds a record to the ring buffer (max 20). Caller must hold m.mu.
|
|
func (m *Manager) appendSnapshotRecord(rec SnapshotRecord) {
|
|
m.snapshotHistory = append(m.snapshotHistory, rec)
|
|
if len(m.snapshotHistory) > 20 {
|
|
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
|
|
}
|
|
m.saveSnapshotHistory()
|
|
}
|
|
|
|
// saveSnapshotHistory persists the current snapshotHistory to disk as JSON.
|
|
// Caller must hold m.mu. Writes atomically (tmp file + rename).
|
|
func (m *Manager) saveSnapshotHistory() {
|
|
if m.snapshotHistoryFile == "" {
|
|
return
|
|
}
|
|
data, err := json.Marshal(m.snapshotHistory)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] [backup] Could not marshal snapshot history: %v", err)
|
|
return
|
|
}
|
|
tmp := m.snapshotHistoryFile + ".tmp"
|
|
if err := os.WriteFile(tmp, data, 0644); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Could not write snapshot history tmp file: %v", err)
|
|
return
|
|
}
|
|
if err := os.Rename(tmp, m.snapshotHistoryFile); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Could not rename snapshot history file: %v", err)
|
|
return
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Saved snapshot history (%d entries)", len(m.snapshotHistory))
|
|
}
|
|
|
|
// loadSnapshotHistoryFromFile reads the persisted snapshot history from disk.
|
|
// Returns nil if the file does not exist or cannot be read.
|
|
func (m *Manager) loadSnapshotHistoryFromFile() []SnapshotRecord {
|
|
if m.snapshotHistoryFile == "" {
|
|
return nil
|
|
}
|
|
data, err := os.ReadFile(m.snapshotHistoryFile)
|
|
if err != nil {
|
|
if !os.IsNotExist(err) {
|
|
m.logger.Printf("[WARN] [backup] Could not read snapshot history file: %v", err)
|
|
}
|
|
return nil
|
|
}
|
|
var records []SnapshotRecord
|
|
if err := json.Unmarshal(data, &records); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Could not parse snapshot history file: %v", err)
|
|
return nil
|
|
}
|
|
return records
|
|
}
|
|
|
|
// LoadSnapshotHistory populates the snapshot history on startup.
|
|
// First tries to load persisted history (with delta stats) from disk.
|
|
// Merges with restic repo snapshots to pick up any entries not in the persisted file.
|
|
func (m *Manager) LoadSnapshotHistory() {
|
|
// Try loading persisted records (contains delta stats from actual backup runs)
|
|
persisted := m.loadSnapshotHistoryFromFile()
|
|
|
|
// Build a lookup map of persisted records by SnapshotID
|
|
persistedByID := make(map[string]SnapshotRecord, len(persisted))
|
|
for _, r := range persisted {
|
|
persistedByID[r.SnapshotID] = r
|
|
}
|
|
|
|
// Query restic repos for any snapshots not in the persisted file
|
|
drives := m.activeDrives()
|
|
var allSnapshots []SnapshotInfo
|
|
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
snapshots, err := m.restic.ListSnapshots(repoPath, 20)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] [backup] Could not load snapshot history from %s: %v", repoPath, err)
|
|
continue
|
|
}
|
|
allSnapshots = append(allSnapshots, snapshots...)
|
|
}
|
|
|
|
// Sort by time (oldest first for ring buffer)
|
|
sort.Slice(allSnapshots, func(i, j int) bool {
|
|
return allSnapshots[i].Time.Before(allSnapshots[j].Time)
|
|
})
|
|
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
if len(persisted) > 0 {
|
|
// Start from persisted records, add any restic snapshots not already there
|
|
m.snapshotHistory = persisted
|
|
for _, s := range allSnapshots {
|
|
if _, found := persistedByID[s.ID]; !found {
|
|
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
|
|
SnapshotID: s.ID,
|
|
Time: s.Time,
|
|
HasStats: false,
|
|
Success: true,
|
|
})
|
|
}
|
|
}
|
|
// Re-sort by time after merge (oldest first for ring buffer)
|
|
sort.Slice(m.snapshotHistory, func(i, j int) bool {
|
|
return m.snapshotHistory[i].Time.Before(m.snapshotHistory[j].Time)
|
|
})
|
|
m.logger.Printf("[INFO] [backup] Loaded %d snapshots from persisted history (merged with %d restic entries)", len(persisted), len(allSnapshots))
|
|
} else {
|
|
// No persisted file — fall back to restic-only loading (first run)
|
|
for _, s := range allSnapshots {
|
|
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
|
|
SnapshotID: s.ID,
|
|
Time: s.Time,
|
|
HasStats: false,
|
|
Success: true,
|
|
})
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Loaded %d historical snapshots from %d restic repos (no persisted history)", len(m.snapshotHistory), len(drives))
|
|
}
|
|
|
|
if len(m.snapshotHistory) > 20 {
|
|
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
|
|
}
|
|
}
|
|
|
|
// RefreshCache updates the cached full status. Called by scheduler every 5 minutes
|
|
// and after each backup run.
|
|
func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
|
|
status := &FullBackupStatus{
|
|
Enabled: m.cfg.Backup.Enabled,
|
|
|
|
DBDumpSchedule: m.cfg.Backup.DBDumpSchedule,
|
|
ResticSchedule: m.cfg.Backup.ResticSchedule,
|
|
PruneSchedule: m.cfg.Backup.PruneSchedule,
|
|
NextDBDump: nextDBDump,
|
|
NextBackup: nextBackup,
|
|
Retention: m.cfg.Backup.Retention,
|
|
}
|
|
|
|
// Expensive calls (outside lock) — compute per-drive stats once, derive aggregate
|
|
status.PerDriveRepoStats = m.perDriveRepoStats()
|
|
status.RepoStats = aggregateFromDriveStats(status.PerDriveRepoStats, m)
|
|
|
|
// Scan dump files from per-drive per-stack paths
|
|
files := m.listAllDumpFiles()
|
|
status.DumpFiles = files
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
if dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()); err == nil {
|
|
status.DiscoveredDBs = dbs
|
|
}
|
|
|
|
// Discover app data — all deployed stacks, backup is mandatory
|
|
if m.stackProvider != nil {
|
|
status.AppDataInfo = DiscoverAppData(m.stackProvider, status.DiscoveredDBs)
|
|
}
|
|
|
|
// Fill in dynamic fields under lock.
|
|
// C1: lastDBDump mutation also happens here to prevent data races with GetFullStatus.
|
|
// C2: snapshot history reversal happens before cachedStatus assignment (inside lock).
|
|
m.mu.Lock()
|
|
status.Running = m.running
|
|
status.LastDBDump = m.lastDBDump
|
|
status.LastBackup = m.lastBackup
|
|
status.LastCheckTime = m.lastCheckTime
|
|
status.LastCheckOK = m.lastCheckOK
|
|
status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory))
|
|
copy(status.SnapshotHistory, m.snapshotHistory)
|
|
|
|
// C1: Cross-check lastDBDump results inside lock to prevent torn writes.
|
|
if m.lastDBDump != nil && len(files) > 0 {
|
|
fileValidation := make(map[string]DumpValidation) // keyed by filename
|
|
for _, f := range files {
|
|
fileValidation[f.FileName] = f.Validation
|
|
}
|
|
for i, r := range m.lastDBDump.Results {
|
|
if !r.Validation.Valid && r.Validation.Error == "" && r.FilePath != "" {
|
|
filename := filepath.Base(r.FilePath)
|
|
if fv, ok := fileValidation[filename]; ok {
|
|
m.lastDBDump.Results[i].Validation = fv
|
|
m.logger.Printf("[INFO] [backup] Re-validated %s from disk: valid=%v tables=%d",
|
|
filename, fv.Valid, fv.TableCount)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// C2: Reverse snapshot history before assigning to cachedStatus (inside lock).
|
|
for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 {
|
|
status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i]
|
|
}
|
|
|
|
m.cachedStatus = status
|
|
m.cacheTime = time.Now()
|
|
m.mu.Unlock()
|
|
|
|
m.logger.Printf("[INFO] [backup] Backup status cache refreshed")
|
|
}
|
|
|
|
// GetFullStatus returns the cached backup status for page rendering.
|
|
// Returns instantly — no subprocess calls.
|
|
// Returns a deep copy so callers can safely append to slice fields without
|
|
// polluting the cache (which would cause duplicate entries on repeated calls).
|
|
func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupStatus {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
if m.cachedStatus != nil {
|
|
// Deep copy — callers (backupsHandler) append to CrossDriveSummary,
|
|
// UnconfiguredApps, and CrossDriveWarnings. If we returned the cache
|
|
// pointer directly, every page load would accumulate more entries.
|
|
status := *m.cachedStatus
|
|
status.AppDataInfo = make([]AppBackupInfo, len(m.cachedStatus.AppDataInfo))
|
|
copy(status.AppDataInfo, m.cachedStatus.AppDataInfo)
|
|
status.PerDriveRepoStats = make([]DriveRepoInfo, len(m.cachedStatus.PerDriveRepoStats))
|
|
copy(status.PerDriveRepoStats, m.cachedStatus.PerDriveRepoStats)
|
|
// These three slices are assembled by the handler from AppDataInfo + settings;
|
|
// they must always start empty so the handler builds them fresh.
|
|
status.CrossDriveSummary = nil
|
|
status.UnconfiguredApps = nil
|
|
status.CrossDriveWarnings = nil
|
|
|
|
// Update dynamic fields that don't need subprocess calls
|
|
status.Running = m.running
|
|
status.NextDBDump = nextDBDump
|
|
status.NextBackup = nextBackup
|
|
// C4: Deep-copy lastDBDump and lastBackup so callers cannot mutate shared state.
|
|
if m.lastDBDump != nil {
|
|
copyDump := *m.lastDBDump
|
|
if len(m.lastDBDump.Results) > 0 {
|
|
copyDump.Results = make([]DumpResult, len(m.lastDBDump.Results))
|
|
copy(copyDump.Results, m.lastDBDump.Results)
|
|
}
|
|
status.LastDBDump = ©Dump
|
|
}
|
|
if m.lastBackup != nil {
|
|
copyBackup := *m.lastBackup
|
|
status.LastBackup = ©Backup
|
|
}
|
|
// Update snapshot history
|
|
status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory))
|
|
copy(status.SnapshotHistory, m.snapshotHistory)
|
|
// Reverse so newest first
|
|
for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 {
|
|
status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i]
|
|
}
|
|
|
|
// Synthesize LastBackup from snapshot history if not in memory (e.g., after restart)
|
|
if status.LastBackup == nil && len(status.SnapshotHistory) > 0 {
|
|
latest := status.SnapshotHistory[0] // already reversed, newest first
|
|
status.LastBackup = &BackupStatus{
|
|
LastRun: latest.Time,
|
|
Success: latest.Success,
|
|
Snapshot: &SnapshotResult{
|
|
SnapshotID: latest.SnapshotID,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Synthesize LastDBDump from DumpFiles on disk if not in memory
|
|
if status.LastDBDump == nil && len(status.DumpFiles) > 0 {
|
|
var results []DumpResult
|
|
var latestTime time.Time
|
|
for _, f := range status.DumpFiles {
|
|
results = append(results, DumpResult{
|
|
DB: DiscoveredDB{StackName: f.StackName, DBType: f.DBType, ContainerName: f.StackName},
|
|
FilePath: f.FileName,
|
|
Size: f.Size,
|
|
Validation: f.Validation,
|
|
})
|
|
if f.ModTime.After(latestTime) {
|
|
latestTime = f.ModTime
|
|
}
|
|
}
|
|
status.LastDBDump = &DBDumpStatus{
|
|
LastRun: latestTime,
|
|
Results: results,
|
|
Success: true,
|
|
}
|
|
}
|
|
|
|
return &status
|
|
}
|
|
|
|
// No cache yet — return a minimal status (first page load before cache is populated)
|
|
// Deep-copy lastDBDump and lastBackup to prevent callers from mutating shared state.
|
|
status := &FullBackupStatus{
|
|
Enabled: m.cfg.Backup.Enabled,
|
|
Running: m.running,
|
|
DBDumpSchedule: m.cfg.Backup.DBDumpSchedule,
|
|
ResticSchedule: m.cfg.Backup.ResticSchedule,
|
|
PruneSchedule: m.cfg.Backup.PruneSchedule,
|
|
NextDBDump: nextDBDump,
|
|
NextBackup: nextBackup,
|
|
Retention: m.cfg.Backup.Retention,
|
|
LastCheckTime: m.lastCheckTime,
|
|
LastCheckOK: m.lastCheckOK,
|
|
}
|
|
if m.lastDBDump != nil {
|
|
copyDump := *m.lastDBDump
|
|
if len(m.lastDBDump.Results) > 0 {
|
|
copyDump.Results = make([]DumpResult, len(m.lastDBDump.Results))
|
|
copy(copyDump.Results, m.lastDBDump.Results)
|
|
}
|
|
status.LastDBDump = ©Dump
|
|
}
|
|
if m.lastBackup != nil {
|
|
copyBackup := *m.lastBackup
|
|
status.LastBackup = ©Backup
|
|
}
|
|
return status
|
|
}
|
|
|
|
// isDebug returns true if logging level is "debug".
|
|
func (m *Manager) isDebug() bool {
|
|
return m.cfg.Logging.Level == "debug"
|
|
}
|
|
|
|
func dbNames(dbs []DiscoveredDB) string {
|
|
var names []string
|
|
for _, db := range dbs {
|
|
names = append(names, fmt.Sprintf("%s(%s)", db.ContainerName, db.DBType))
|
|
}
|
|
return strings.Join(names, ", ")
|
|
}
|
|
|
|
// dedup removes duplicate strings from a slice, preserving order.
|
|
func dedup(items []string) []string {
|
|
seen := make(map[string]bool)
|
|
var result []string
|
|
for _, item := range items {
|
|
if !seen[item] {
|
|
seen[item] = true
|
|
result = append(result, item)
|
|
}
|
|
}
|
|
return result
|
|
}
|