v0.14.0: Per-drive backup architecture + storage path overhaul
Major refactor of backup and storage paths: - Per-drive restic repos at <drive>/backups/primary/restic/ - Per-app DB dumps at <drive>/backups/primary/<app>/db-dumps/ - Remove global BackupDir, DBDumpDir, ResticRepo config fields - Add SystemDataPath config (fallback for apps without HDD) - New backup/paths.go with pure path computation helpers - Add GetStackHDDPath to StackDataProvider interface - Restic methods now accept repoPath as parameter - Cross-drive backup uses new secondary path structure - Rename storage/ to appdata/ in scripts and compose templates - Update protected HDD paths (storage → appdata + backups) - Simplify backup UI (remove global path displays) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -17,12 +18,13 @@ import (
|
||||
|
||||
// Manager orchestrates database dumps and restic backups.
|
||||
type Manager struct {
|
||||
cfg *config.Config
|
||||
restic *ResticManager
|
||||
logger *log.Logger
|
||||
pinger *monitor.Pinger
|
||||
settings *settings.Settings
|
||||
stackProvider StackDataProvider
|
||||
cfg *config.Config
|
||||
restic *ResticManager
|
||||
logger *log.Logger
|
||||
pinger *monitor.Pinger
|
||||
settings *settings.Settings
|
||||
stackProvider StackDataProvider
|
||||
systemDataPath string // fallback drive for SSD-only apps
|
||||
|
||||
mu sync.Mutex
|
||||
lastDBDump *DBDumpStatus
|
||||
@@ -92,8 +94,6 @@ type FullBackupStatus struct {
|
||||
Retention config.RetentionConfig
|
||||
|
||||
// Repository health
|
||||
RepoPath string
|
||||
BackupPaths []string
|
||||
LastCheckTime time.Time
|
||||
LastCheckOK bool
|
||||
|
||||
@@ -133,15 +133,51 @@ type BackupStatus struct {
|
||||
// NewManager creates a new backup manager.
|
||||
func NewManager(cfg *config.Config, pinger *monitor.Pinger, sett *settings.Settings, logger *log.Logger) *Manager {
|
||||
return &Manager{
|
||||
cfg: cfg,
|
||||
restic: NewResticManager(cfg, logger),
|
||||
logger: logger,
|
||||
pinger: pinger,
|
||||
settings: sett,
|
||||
cfg: cfg,
|
||||
restic: NewResticManager(cfg, logger),
|
||||
logger: logger,
|
||||
pinger: pinger,
|
||||
settings: sett,
|
||||
systemDataPath: cfg.Paths.SystemDataPath,
|
||||
}
|
||||
}
|
||||
|
||||
// RunDBDumps discovers and dumps all databases.
|
||||
// GetAppDrivePath returns the drive path for an app.
|
||||
// Uses HDD_PATH from app.yaml if set, otherwise falls back to system data path.
|
||||
func (m *Manager) GetAppDrivePath(stackName string) string {
|
||||
if m.stackProvider != nil {
|
||||
if hddPath := m.stackProvider.GetStackHDDPath(stackName); hddPath != "" {
|
||||
return hddPath
|
||||
}
|
||||
}
|
||||
return m.systemDataPath
|
||||
}
|
||||
|
||||
// groupStacksByDrive groups deployed stacks by their home drive path.
|
||||
func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
|
||||
if m.stackProvider == nil {
|
||||
return nil
|
||||
}
|
||||
result := make(map[string][]StackSummary)
|
||||
for _, stack := range m.stackProvider.ListDeployedStacks() {
|
||||
drive := m.GetAppDrivePath(stack.Name)
|
||||
result[drive] = append(result[drive], stack)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// activeDrives returns sorted list of drives that have deployed apps.
|
||||
func (m *Manager) activeDrives() []string {
|
||||
groups := m.groupStacksByDrive()
|
||||
var drives []string
|
||||
for d := range groups {
|
||||
drives = append(drives, d)
|
||||
}
|
||||
sort.Strings(drives)
|
||||
return drives
|
||||
}
|
||||
|
||||
// RunDBDumps discovers and dumps all databases to per-drive, per-app paths.
|
||||
func (m *Manager) RunDBDumps(ctx context.Context) error {
|
||||
start := time.Now()
|
||||
m.logger.Printf("[INFO] Starting database dump run")
|
||||
@@ -166,31 +202,37 @@ func (m *Manager) RunDBDumps(ctx context.Context) error {
|
||||
|
||||
m.logger.Printf("[INFO] Discovered %d database(s): %s", len(dbs), dbNames(dbs))
|
||||
|
||||
results := DumpAll(ctx, dbs, m.cfg.Paths.DBDumpDir, m.logger)
|
||||
|
||||
// Check results and persist validations
|
||||
// Dump each DB to its app's drive path
|
||||
var results []DumpResult
|
||||
allOK := true
|
||||
var summary []string
|
||||
var totalSize int64
|
||||
for _, r := range results {
|
||||
if r.Error != nil {
|
||||
|
||||
for _, db := range dbs {
|
||||
drivePath := m.GetAppDrivePath(db.StackName)
|
||||
dumpDir := AppDBDumpPath(drivePath, db.StackName)
|
||||
|
||||
result := DumpOne(ctx, db, dumpDir, m.logger)
|
||||
results = append(results, result)
|
||||
|
||||
if result.Error != nil {
|
||||
allOK = false
|
||||
summary = append(summary, fmt.Sprintf("FAIL %s: %v", r.DB.ContainerName, r.Error))
|
||||
m.logger.Printf("[ERROR] DB dump failed for %s: %v", r.DB.ContainerName, r.Error)
|
||||
summary = append(summary, fmt.Sprintf("FAIL %s: %v", result.DB.ContainerName, result.Error))
|
||||
m.logger.Printf("[ERROR] DB dump failed for %s: %v", result.DB.ContainerName, result.Error)
|
||||
} else {
|
||||
totalSize += r.Size
|
||||
summary = append(summary, fmt.Sprintf("OK %s (%s)", r.DB.ContainerName, humanizeBytes(r.Size)))
|
||||
totalSize += result.Size
|
||||
summary = append(summary, fmt.Sprintf("OK %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size)))
|
||||
|
||||
// Persist validation result to settings.json
|
||||
if m.settings != nil && r.FilePath != "" {
|
||||
filename := filepath.Base(r.FilePath)
|
||||
if m.settings != nil && result.FilePath != "" {
|
||||
filename := filepath.Base(result.FilePath)
|
||||
cache := settings.DBValidationCache{
|
||||
ValidatedAt: time.Now().Format(time.RFC3339),
|
||||
TableCount: r.Validation.TableCount,
|
||||
HasHeader: r.Validation.Valid,
|
||||
TableCount: result.Validation.TableCount,
|
||||
HasHeader: result.Validation.Valid,
|
||||
}
|
||||
if !r.Validation.Valid {
|
||||
cache.Error = r.Validation.Error
|
||||
if !result.Validation.Valid {
|
||||
cache.Error = result.Validation.Error
|
||||
}
|
||||
if err := m.settings.SetDBValidation(filename, cache); err != nil {
|
||||
m.logger.Printf("[WARN] Failed to cache validation for %s: %v", filename, err)
|
||||
@@ -226,132 +268,185 @@ func (m *Manager) RunDBDumps(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// RunBackup runs a restic backup snapshot.
|
||||
// RunBackup runs per-drive restic backup snapshots.
|
||||
func (m *Manager) RunBackup(ctx context.Context) error {
|
||||
start := time.Now()
|
||||
m.logger.Printf("[INFO] Starting restic backup")
|
||||
m.logger.Printf("[INFO] Starting restic backup (per-drive)")
|
||||
|
||||
// Ensure repo is initialized
|
||||
if err := m.restic.EnsureInitialized(); err != nil {
|
||||
m.logger.Printf("[ERROR] Restic init failed: %v", err)
|
||||
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Restic init failed: %v", err))
|
||||
return err
|
||||
driveStacks := m.groupStacksByDrive()
|
||||
if len(driveStacks) == 0 {
|
||||
m.logger.Printf("[INFO] No deployed stacks — skipping backup")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Backup paths: base + dynamic app data
|
||||
paths := []string{
|
||||
// Infrastructure paths included in every drive's primary repo
|
||||
infraPaths := []string{
|
||||
m.cfg.Paths.StacksDir,
|
||||
m.cfg.Paths.DBDumpDir,
|
||||
"/opt/docker/felhom-controller/controller.yaml",
|
||||
}
|
||||
appPaths := m.resolveAppBackupPaths()
|
||||
if len(appPaths) > 0 {
|
||||
paths = append(paths, appPaths...)
|
||||
m.logger.Printf("[INFO] Backup paths (%d total, %d app data): %v", len(paths), len(appPaths), paths)
|
||||
|
||||
var lastResult *SnapshotResult
|
||||
var anyErr error
|
||||
driveCount := 0
|
||||
|
||||
for drivePath, stacks := range driveStacks {
|
||||
repoPath := PrimaryResticRepoPath(drivePath)
|
||||
|
||||
// Ensure repo is initialized
|
||||
if err := m.restic.EnsureInitialized(repoPath); err != nil {
|
||||
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
|
||||
anyErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
// Build paths for this drive
|
||||
var paths []string
|
||||
paths = append(paths, infraPaths...)
|
||||
|
||||
for _, stack := range stacks {
|
||||
// App data (appdata/<stack>/)
|
||||
appData := AppDataDir(drivePath, stack.Name)
|
||||
if _, err := os.Stat(appData); err == nil {
|
||||
paths = append(paths, appData)
|
||||
}
|
||||
// HDD mounts (for apps with custom mount points)
|
||||
if m.stackProvider != nil {
|
||||
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
|
||||
if _, err := os.Stat(mount); err == nil {
|
||||
paths = append(paths, mount)
|
||||
}
|
||||
}
|
||||
}
|
||||
// DB dumps for this stack
|
||||
dumpDir := AppDBDumpPath(drivePath, stack.Name)
|
||||
if _, err := os.Stat(dumpDir); err == nil {
|
||||
paths = append(paths, dumpDir)
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate paths
|
||||
paths = dedup(paths)
|
||||
|
||||
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
|
||||
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
|
||||
|
||||
result, err := m.restic.Snapshot(repoPath, paths, tags)
|
||||
if err != nil {
|
||||
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
|
||||
anyErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
lastResult = result
|
||||
driveCount++
|
||||
|
||||
// Prune check (weekly — Sunday)
|
||||
if shouldPrune(m.cfg.Backup.PruneSchedule) {
|
||||
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
|
||||
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
|
||||
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
tags := []string{"felhom", m.cfg.Customer.ID}
|
||||
|
||||
result, err := m.restic.Snapshot(paths, tags)
|
||||
if err != nil {
|
||||
m.logger.Printf("[ERROR] Restic backup failed: %v", err)
|
||||
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", err))
|
||||
duration := time.Since(start)
|
||||
|
||||
if anyErr != nil && driveCount == 0 {
|
||||
// All drives failed
|
||||
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", anyErr))
|
||||
m.mu.Lock()
|
||||
m.lastBackup = &BackupStatus{
|
||||
LastRun: time.Now(),
|
||||
Success: false,
|
||||
Duration: time.Since(start),
|
||||
Duration: duration,
|
||||
}
|
||||
m.mu.Unlock()
|
||||
return err
|
||||
return anyErr
|
||||
}
|
||||
|
||||
// Prune check (weekly — Sunday)
|
||||
if shouldPrune(m.cfg.Backup.PruneSchedule) {
|
||||
m.logger.Printf("[INFO] Running weekly prune")
|
||||
if err := m.restic.Prune(m.cfg.Backup.Retention); err != nil {
|
||||
m.logger.Printf("[WARN] Restic prune failed: %v", err)
|
||||
}
|
||||
checkErr := m.restic.Check()
|
||||
if checkErr != nil {
|
||||
m.logger.Printf("[WARN] Restic check failed: %v", checkErr)
|
||||
}
|
||||
m.mu.Lock()
|
||||
m.lastCheckTime = time.Now()
|
||||
m.lastCheckOK = checkErr == nil
|
||||
m.mu.Unlock()
|
||||
}
|
||||
// Get aggregated stats
|
||||
stats := m.aggregateRepoStats()
|
||||
|
||||
// Get stats
|
||||
stats, _ := m.restic.Stats()
|
||||
|
||||
duration := time.Since(start)
|
||||
m.mu.Lock()
|
||||
m.lastBackup = &BackupStatus{
|
||||
LastRun: time.Now(),
|
||||
Snapshot: result,
|
||||
Success: true,
|
||||
Snapshot: lastResult,
|
||||
Success: anyErr == nil,
|
||||
Duration: duration,
|
||||
RepoStats: stats,
|
||||
}
|
||||
// Append to snapshot history
|
||||
m.appendSnapshotRecord(SnapshotRecord{
|
||||
SnapshotID: result.SnapshotID,
|
||||
Time: time.Now(),
|
||||
FilesNew: result.FilesNew,
|
||||
FilesChanged: result.FilesChanged,
|
||||
DataAdded: result.DataAdded,
|
||||
Duration: duration,
|
||||
Success: true,
|
||||
HasStats: true,
|
||||
})
|
||||
if lastResult != nil {
|
||||
m.appendSnapshotRecord(SnapshotRecord{
|
||||
SnapshotID: lastResult.SnapshotID,
|
||||
Time: time.Now(),
|
||||
FilesNew: lastResult.FilesNew,
|
||||
FilesChanged: lastResult.FilesChanged,
|
||||
DataAdded: lastResult.DataAdded,
|
||||
Duration: duration,
|
||||
Success: true,
|
||||
HasStats: true,
|
||||
})
|
||||
}
|
||||
m.mu.Unlock()
|
||||
|
||||
body := fmt.Sprintf("Backup OK\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s",
|
||||
result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded,
|
||||
duration.Round(time.Second))
|
||||
m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body)
|
||||
if lastResult != nil {
|
||||
body := fmt.Sprintf("Backup OK (%d drives)\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s",
|
||||
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
|
||||
duration.Round(time.Second))
|
||||
m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body)
|
||||
|
||||
m.logger.Printf("[INFO] Restic backup completed: snapshot %s, %d new, %d changed, %s added (%s)",
|
||||
result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded,
|
||||
duration.Round(time.Millisecond))
|
||||
m.logger.Printf("[INFO] Restic backup completed: %d drives, snapshot %s, %d new, %d changed, %s added (%s)",
|
||||
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
|
||||
duration.Round(time.Millisecond))
|
||||
}
|
||||
|
||||
// Refresh cache so the page shows updated data immediately
|
||||
if m.AfterBackup != nil {
|
||||
m.AfterBackup()
|
||||
}
|
||||
|
||||
return nil
|
||||
return anyErr
|
||||
}
|
||||
|
||||
// RunIntegrityCheck runs restic check and pings healthchecks with the result.
|
||||
// RunIntegrityCheck runs restic check on all primary repos and pings healthchecks.
|
||||
func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
|
||||
m.logger.Printf("[INFO] Starting restic integrity check")
|
||||
start := time.Now()
|
||||
|
||||
if err := m.restic.EnsureInitialized(); err != nil {
|
||||
m.logger.Printf("[ERROR] Restic init failed for integrity check: %v", err)
|
||||
return err
|
||||
drives := m.activeDrives()
|
||||
if len(drives) == 0 {
|
||||
m.logger.Printf("[INFO] No active drives — skipping integrity check")
|
||||
return nil
|
||||
}
|
||||
|
||||
err := m.restic.Check()
|
||||
duration := time.Since(start)
|
||||
var checkErr error
|
||||
for _, drive := range drives {
|
||||
repoPath := PrimaryResticRepoPath(drive)
|
||||
if !m.restic.RepoExists(repoPath) {
|
||||
continue
|
||||
}
|
||||
if err := m.restic.Check(repoPath); err != nil {
|
||||
m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err)
|
||||
checkErr = err
|
||||
}
|
||||
}
|
||||
|
||||
duration := time.Since(start)
|
||||
uuid := m.cfg.Monitoring.PingUUIDs.BackupIntegrity
|
||||
|
||||
m.mu.Lock()
|
||||
m.lastCheckTime = time.Now()
|
||||
m.lastCheckOK = err == nil
|
||||
m.lastCheckOK = checkErr == nil
|
||||
m.mu.Unlock()
|
||||
|
||||
if err != nil {
|
||||
m.logger.Printf("[ERROR] Restic integrity check failed (%s): %v", duration.Round(time.Second), err)
|
||||
m.pinger.Fail(uuid, fmt.Sprintf("restic check failed: %v", err))
|
||||
return err
|
||||
if checkErr != nil {
|
||||
m.logger.Printf("[ERROR] Restic integrity check failed (%s): %v", duration.Round(time.Second), checkErr)
|
||||
m.pinger.Fail(uuid, fmt.Sprintf("restic check failed: %v", checkErr))
|
||||
return checkErr
|
||||
}
|
||||
|
||||
m.logger.Printf("[INFO] Restic integrity check passed (%s)", duration.Round(time.Second))
|
||||
m.pinger.Ping(uuid, fmt.Sprintf("restic check passed (%s)", duration.Round(time.Second)))
|
||||
m.logger.Printf("[INFO] Restic integrity check passed (%d repos, %s)", len(drives), duration.Round(time.Second))
|
||||
m.pinger.Ping(uuid, fmt.Sprintf("restic check passed (%d repos, %s)", len(drives), duration.Round(time.Second)))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -387,9 +482,13 @@ func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) {
|
||||
return m.lastDBDump, m.lastBackup
|
||||
}
|
||||
|
||||
// GetRepoStats returns repository statistics.
|
||||
// GetRepoStats returns aggregated repository statistics across all primary repos.
|
||||
func (m *Manager) GetRepoStats() (*RepoStats, error) {
|
||||
return m.restic.Stats()
|
||||
stats := m.aggregateRepoStats()
|
||||
if stats.SnapshotCount == 0 && stats.TotalSize == "" {
|
||||
return stats, fmt.Errorf("no repos available")
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// IsRunning returns whether a backup or restore is currently in progress.
|
||||
@@ -404,9 +503,33 @@ func (m *Manager) GetResticPassword() (string, error) {
|
||||
return m.restic.GetPassword()
|
||||
}
|
||||
|
||||
// ListSnapshots returns snapshots from the restic repository.
|
||||
// ListSnapshots returns snapshots from all primary restic repositories, merged and sorted.
|
||||
func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) {
|
||||
return m.restic.ListSnapshots(limit)
|
||||
drives := m.activeDrives()
|
||||
var allSnapshots []SnapshotInfo
|
||||
for _, drive := range drives {
|
||||
repoPath := PrimaryResticRepoPath(drive)
|
||||
if !m.restic.RepoExists(repoPath) {
|
||||
continue
|
||||
}
|
||||
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
|
||||
if err != nil {
|
||||
m.logger.Printf("[WARN] Could not list snapshots from %s: %v", repoPath, err)
|
||||
continue
|
||||
}
|
||||
for i := range snapshots {
|
||||
snapshots[i].RepoPath = repoPath
|
||||
}
|
||||
allSnapshots = append(allSnapshots, snapshots...)
|
||||
}
|
||||
// Sort newest first
|
||||
sort.Slice(allSnapshots, func(i, j int) bool {
|
||||
return allSnapshots[i].Time.After(allSnapshots[j].Time)
|
||||
})
|
||||
if limit > 0 && len(allSnapshots) > limit {
|
||||
allSnapshots = allSnapshots[:limit]
|
||||
}
|
||||
return allSnapshots, nil
|
||||
}
|
||||
|
||||
// SetStackProvider sets the stack data provider for app data discovery.
|
||||
@@ -425,34 +548,8 @@ func (m *Manager) GetStackHDDMounts(name string) []string {
|
||||
return m.stackProvider.GetStackHDDMounts(name)
|
||||
}
|
||||
|
||||
// resolveAppBackupPaths returns HDD paths for ALL deployed apps.
|
||||
// User data backup is mandatory — every app with HDD mounts is included.
|
||||
func (m *Manager) resolveAppBackupPaths() []string {
|
||||
if m.stackProvider == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var paths []string
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, stack := range m.stackProvider.ListDeployedStacks() {
|
||||
hddMounts := m.stackProvider.GetStackHDDMounts(stack.Name)
|
||||
for _, mount := range hddMounts {
|
||||
if seen[mount] {
|
||||
continue
|
||||
}
|
||||
if _, err := os.Stat(mount); err == nil {
|
||||
paths = append(paths, mount)
|
||||
seen[mount] = true
|
||||
m.logger.Printf("[DEBUG] Including app data: %s (from %s)", mount, stack.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
// DumpStackDB runs a database dump for containers belonging to a specific stack.
|
||||
// Used by cross-drive backup to ensure DB state matches user data.
|
||||
// Dumps to the stack's home drive: <drive>/backups/primary/<stack>/db-dumps/.
|
||||
func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
|
||||
dbs, err := DiscoverDatabases(ctx, m.logger)
|
||||
if err != nil {
|
||||
@@ -470,25 +567,28 @@ func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s))", stackName, len(stackDBs))
|
||||
results := DumpAll(ctx, stackDBs, m.cfg.Paths.DBDumpDir, m.logger)
|
||||
drivePath := m.GetAppDrivePath(stackName)
|
||||
dumpDir := AppDBDumpPath(drivePath, stackName)
|
||||
|
||||
for _, r := range results {
|
||||
if r.Error != nil {
|
||||
return fmt.Errorf("DB dump failed for %s: %w", r.DB.ContainerName, r.Error)
|
||||
m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir)
|
||||
|
||||
for _, db := range stackDBs {
|
||||
result := DumpOne(ctx, db, dumpDir, m.logger)
|
||||
if result.Error != nil {
|
||||
return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error)
|
||||
}
|
||||
m.logger.Printf("[INFO] Pre-backup DB dump OK: %s (%s)", r.DB.ContainerName, humanizeBytes(r.Size))
|
||||
m.logger.Printf("[INFO] Pre-backup DB dump OK: %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size))
|
||||
|
||||
// Persist validation to settings
|
||||
if m.settings != nil && r.FilePath != "" {
|
||||
filename := filepath.Base(r.FilePath)
|
||||
if m.settings != nil && result.FilePath != "" {
|
||||
filename := filepath.Base(result.FilePath)
|
||||
cache := settings.DBValidationCache{
|
||||
ValidatedAt: time.Now().Format(time.RFC3339),
|
||||
TableCount: r.Validation.TableCount,
|
||||
HasHeader: r.Validation.Valid,
|
||||
TableCount: result.Validation.TableCount,
|
||||
HasHeader: result.Validation.Valid,
|
||||
}
|
||||
if !r.Validation.Valid {
|
||||
cache.Error = r.Validation.Error
|
||||
if !result.Validation.Valid {
|
||||
cache.Error = result.Validation.Error
|
||||
}
|
||||
_ = m.settings.SetDBValidation(filename, cache)
|
||||
}
|
||||
@@ -496,6 +596,51 @@ func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// aggregateRepoStats combines stats from all primary restic repos.
|
||||
func (m *Manager) aggregateRepoStats() *RepoStats {
|
||||
drives := m.activeDrives()
|
||||
agg := &RepoStats{}
|
||||
var totalBytes int64
|
||||
|
||||
for _, drive := range drives {
|
||||
repoPath := PrimaryResticRepoPath(drive)
|
||||
if !m.restic.RepoExists(repoPath) {
|
||||
continue
|
||||
}
|
||||
stats, err := m.restic.Stats(repoPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
agg.SnapshotCount += stats.SnapshotCount
|
||||
totalBytes += stats.TotalSizeBytes
|
||||
if stats.LatestSnapshot != nil {
|
||||
if agg.LatestSnapshot == nil || stats.LatestSnapshot.Time.After(agg.LatestSnapshot.Time) {
|
||||
agg.LatestSnapshot = stats.LatestSnapshot
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
agg.TotalSizeBytes = totalBytes
|
||||
if totalBytes > 0 {
|
||||
agg.TotalSize = humanizeBytes(totalBytes)
|
||||
}
|
||||
return agg
|
||||
}
|
||||
|
||||
// listAllDumpFiles scans per-drive per-stack DB dump directories.
|
||||
func (m *Manager) listAllDumpFiles() []DumpFileInfo {
|
||||
var allFiles []DumpFileInfo
|
||||
for drive, stacks := range m.groupStacksByDrive() {
|
||||
for _, stack := range stacks {
|
||||
dumpDir := AppDBDumpPath(drive, stack.Name)
|
||||
if files, err := ListDumpFiles(dumpDir); err == nil {
|
||||
allFiles = append(allFiles, files...)
|
||||
}
|
||||
}
|
||||
}
|
||||
return allFiles
|
||||
}
|
||||
|
||||
func shouldPrune(schedule string) bool {
|
||||
loc, err := time.LoadLocation("Europe/Budapest")
|
||||
if err != nil {
|
||||
@@ -521,18 +666,33 @@ func (m *Manager) appendSnapshotRecord(rec SnapshotRecord) {
|
||||
}
|
||||
}
|
||||
|
||||
// LoadSnapshotHistory populates the snapshot history from restic on startup.
|
||||
// LoadSnapshotHistory populates the snapshot history from all primary restic repos on startup.
|
||||
func (m *Manager) LoadSnapshotHistory() {
|
||||
snapshots, err := m.restic.ListSnapshots(20)
|
||||
if err != nil {
|
||||
m.logger.Printf("[WARN] Could not load snapshot history: %v", err)
|
||||
return
|
||||
drives := m.activeDrives()
|
||||
var allSnapshots []SnapshotInfo
|
||||
|
||||
for _, drive := range drives {
|
||||
repoPath := PrimaryResticRepoPath(drive)
|
||||
if !m.restic.RepoExists(repoPath) {
|
||||
continue
|
||||
}
|
||||
snapshots, err := m.restic.ListSnapshots(repoPath, 20)
|
||||
if err != nil {
|
||||
m.logger.Printf("[WARN] Could not load snapshot history from %s: %v", repoPath, err)
|
||||
continue
|
||||
}
|
||||
allSnapshots = append(allSnapshots, snapshots...)
|
||||
}
|
||||
|
||||
// Sort by time (oldest first for ring buffer)
|
||||
sort.Slice(allSnapshots, func(i, j int) bool {
|
||||
return allSnapshots[i].Time.Before(allSnapshots[j].Time)
|
||||
})
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
for _, s := range snapshots {
|
||||
for _, s := range allSnapshots {
|
||||
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
|
||||
SnapshotID: s.ID,
|
||||
Time: s.Time,
|
||||
@@ -543,7 +703,7 @@ func (m *Manager) LoadSnapshotHistory() {
|
||||
if len(m.snapshotHistory) > 20 {
|
||||
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
|
||||
}
|
||||
m.logger.Printf("[INFO] Loaded %d historical snapshots", len(m.snapshotHistory))
|
||||
m.logger.Printf("[INFO] Loaded %d historical snapshots from %d repos", len(m.snapshotHistory), len(drives))
|
||||
}
|
||||
|
||||
// RefreshCache updates the cached full status. Called by scheduler every 5 minutes
|
||||
@@ -558,23 +718,15 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
|
||||
NextDBDump: nextDBDump,
|
||||
NextBackup: nextBackup,
|
||||
Retention: m.cfg.Backup.Retention,
|
||||
|
||||
RepoPath: m.cfg.Backup.ResticRepo,
|
||||
BackupPaths: []string{
|
||||
m.cfg.Paths.StacksDir,
|
||||
m.cfg.Paths.DBDumpDir,
|
||||
"/opt/docker/felhom-controller/controller.yaml",
|
||||
},
|
||||
}
|
||||
|
||||
// Expensive calls (outside lock)
|
||||
if stats, err := m.restic.Stats(); err == nil {
|
||||
status.RepoStats = stats
|
||||
}
|
||||
files, filesErr := ListDumpFiles(m.cfg.Paths.DBDumpDir)
|
||||
if filesErr == nil {
|
||||
status.DumpFiles = files
|
||||
}
|
||||
status.RepoStats = m.aggregateRepoStats()
|
||||
|
||||
// Scan dump files from per-drive per-stack paths
|
||||
files := m.listAllDumpFiles()
|
||||
status.DumpFiles = files
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil {
|
||||
@@ -584,12 +736,6 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
|
||||
// Discover app data — all deployed stacks, backup is mandatory
|
||||
if m.stackProvider != nil {
|
||||
status.AppDataInfo = DiscoverAppData(m.stackProvider, status.DiscoveredDBs)
|
||||
|
||||
// Include enabled app backup paths in the displayed BackupPaths
|
||||
appPaths := m.resolveAppBackupPaths()
|
||||
if len(appPaths) > 0 {
|
||||
status.BackupPaths = append(status.BackupPaths, appPaths...)
|
||||
}
|
||||
}
|
||||
|
||||
// Fill in dynamic fields under lock.
|
||||
@@ -605,7 +751,7 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
|
||||
copy(status.SnapshotHistory, m.snapshotHistory)
|
||||
|
||||
// C1: Cross-check lastDBDump results inside lock to prevent torn writes.
|
||||
if m.lastDBDump != nil && filesErr == nil {
|
||||
if m.lastDBDump != nil && len(files) > 0 {
|
||||
fileValidation := make(map[string]DumpValidation) // keyed by filename
|
||||
for _, f := range files {
|
||||
fileValidation[f.FileName] = f.Validation
|
||||
@@ -728,14 +874,8 @@ func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupSta
|
||||
NextDBDump: nextDBDump,
|
||||
NextBackup: nextBackup,
|
||||
Retention: m.cfg.Backup.Retention,
|
||||
RepoPath: m.cfg.Backup.ResticRepo,
|
||||
LastCheckTime: m.lastCheckTime,
|
||||
LastCheckOK: m.lastCheckOK,
|
||||
BackupPaths: []string{
|
||||
m.cfg.Paths.StacksDir,
|
||||
m.cfg.Paths.DBDumpDir,
|
||||
"/opt/docker/felhom-controller/controller.yaml",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -746,3 +886,16 @@ func dbNames(dbs []DiscoveredDB) string {
|
||||
}
|
||||
return strings.Join(names, ", ")
|
||||
}
|
||||
|
||||
// dedup removes duplicate strings from a slice, preserving order.
|
||||
func dedup(items []string) []string {
|
||||
seen := make(map[string]bool)
|
||||
var result []string
|
||||
for _, item := range items {
|
||||
if !seen[item] {
|
||||
seen[item] = true
|
||||
result = append(result, item)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user