Files
deploy-felhom-compose/controller/internal/backup/backup.go
T
admin bdbe170a54 feat: storage watchdog — USB disconnect detection, auto-stop, safe eject, auto-reconnect (v0.17.0)
New storage watchdog monitors registered storage paths every 5s. On disconnect
(3 consecutive probe failures), auto-stops affected apps, lazy-unmounts stale
VFS entries, fires alerts/notifications/hub report. On reconnect (UUID detected),
auto-remounts via fstab, cleans stale restic locks, offers app restart.

Safe disconnect UI for USB drives: confirmation dialog, stop apps, sync, unmount.
Disconnected state visible across all pages (dashboard, settings, backups, monitoring)
with hatched red bars and badges. Backup guards skip disconnected drives.

22 files changed (1 new: monitor/watchdog.go), ~1500 lines added.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 19:42:26 +01:00

1147 lines
35 KiB
Go

package backup
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
)
// Manager orchestrates database dumps and restic backups.
type Manager struct {
cfg *config.Config
restic *ResticManager
logger *log.Logger
pinger *monitor.Pinger
settings *settings.Settings
stackProvider StackDataProvider
systemDataPath string // fallback drive for SSD-only apps
mu sync.Mutex
lastDBDump *DBDumpStatus
lastBackup *BackupStatus
running bool
snapshotHistory []SnapshotRecord // ring buffer, last 20 entries
snapshotHistoryFile string // path to persist snapshot history JSON
lastCheckTime time.Time
lastCheckOK bool
// Cached status for page rendering (refreshed periodically)
cachedStatus *FullBackupStatus
cacheTime time.Time
// AfterBackup is called after a backup completes to refresh the cache.
// Set by main.go to avoid circular import with scheduler.
AfterBackup func()
}
// SnapshotRecord combines restic snapshot metadata with our run stats.
type SnapshotRecord struct {
SnapshotID string `json:"snapshot_id"`
Time time.Time `json:"time"`
FilesNew int `json:"files_new"`
FilesChanged int `json:"files_changed"`
DataAdded string `json:"data_added"`
Duration time.Duration `json:"duration"`
Success bool `json:"success"`
HasStats bool `json:"has_stats"` // false for historical entries loaded from restic
}
// DriveRepoInfo holds per-drive restic repository statistics for the Részletek section.
type DriveRepoInfo struct {
DrivePath string
DriveLabel string // filled by handler from settings
TotalSize string
TotalSizeBytes int64
SnapshotCount int
}
// CrossDriveSummaryItem holds display data for one app's cross-drive backup.
type CrossDriveSummaryItem struct {
StackName string
DisplayName string
Method string // "rsync" or "restic"
MethodLabel string // "Egyszerű másolat" or "Restic"
DestPath string
DestLabel string // storage path label
Schedule string
ScheduleLabel string // "Naponta" or "Hetente" or "Kézi"
LastStatus string // "ok", "error", "running", ""
LastRunShort string // formatted short time e.g. "03:15"
SizeHuman string
}
// FullBackupStatus contains everything the backup page needs.
type FullBackupStatus struct {
Enabled bool
Running bool
// DB Dumps
LastDBDump *DBDumpStatus
DumpFiles []DumpFileInfo
DiscoveredDBs []DiscoveredDB
// Restic
LastBackup *BackupStatus
SnapshotHistory []SnapshotRecord
RepoStats *RepoStats
PerDriveRepoStats []DriveRepoInfo // per-drive Tier 1 restic stats
// Schedule
DBDumpSchedule string
ResticSchedule string
PruneSchedule string
NextDBDump time.Time
NextBackup time.Time
Retention config.RetentionConfig
// Repository health
LastCheckTime time.Time
LastCheckOK bool
// Remote (placeholder)
RemoteEnabled bool
// App data backup
AppDataInfo []AppBackupInfo
// Cross-drive backup summary
CrossDriveSummary []CrossDriveSummaryItem
UnconfiguredApps []CrossDriveSummaryItem // apps with HDD data but no cross-drive config
CrossDriveWarnings []string // destination health warnings
// Flash messages (set by handlers, passed through redirect)
FlashSuccess string
FlashError string
}
// DBDumpStatus holds the last DB dump result.
type DBDumpStatus struct {
LastRun time.Time
Results []DumpResult
Success bool
Duration time.Duration
}
// BackupStatus holds the last backup result.
type BackupStatus struct {
LastRun time.Time
Snapshot *SnapshotResult
Success bool
Duration time.Duration
RepoStats *RepoStats
}
// NewManager creates a new backup manager.
func NewManager(cfg *config.Config, pinger *monitor.Pinger, sett *settings.Settings, logger *log.Logger) *Manager {
if cfg.Paths.SystemDataPath == "" {
logger.Printf("[WARN] SystemDataPath is empty in config — SSD-only apps will not have correct backup paths")
}
dataDir := cfg.Paths.DataDir
if dataDir == "" {
dataDir = "/opt/docker/felhom-controller/data"
}
return &Manager{
cfg: cfg,
restic: NewResticManager(cfg, logger),
logger: logger,
pinger: pinger,
settings: sett,
systemDataPath: cfg.Paths.SystemDataPath,
snapshotHistoryFile: filepath.Join(dataDir, "snapshot-history.json"),
}
}
// GetAppDrivePath returns the drive path for an app.
// Uses HDD_PATH from app.yaml if set, otherwise falls back to system data path.
func (m *Manager) GetAppDrivePath(stackName string) string {
if m.stackProvider != nil {
if hddPath := m.stackProvider.GetStackHDDPath(stackName); hddPath != "" {
return hddPath
}
}
if m.systemDataPath == "" {
m.logger.Printf("[ERROR] systemDataPath is empty — cannot determine drive for %s", stackName)
}
return m.systemDataPath
}
// groupStacksByDrive groups deployed stacks by their home drive path.
func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
if m.stackProvider == nil {
return nil
}
result := make(map[string][]StackSummary)
for _, stack := range m.stackProvider.ListDeployedStacks() {
drive := m.GetAppDrivePath(stack.Name)
result[drive] = append(result[drive], stack)
}
return result
}
// activeDrives returns sorted list of drives that have deployed apps.
func (m *Manager) activeDrives() []string {
groups := m.groupStacksByDrive()
var drives []string
for d := range groups {
drives = append(drives, d)
}
sort.Strings(drives)
return drives
}
// RunDBDumps discovers and dumps all databases to per-drive, per-app paths.
func (m *Manager) RunDBDumps(ctx context.Context) error {
if err := m.acquireRunning(); err != nil {
return err
}
defer m.releaseRunning()
return m.runDBDumpsInternal(ctx)
}
// runDBDumpsInternal is the implementation of RunDBDumps. Caller must hold the running flag.
func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
start := time.Now()
m.logger.Printf("[INFO] Starting database dump run")
dbs, err := DiscoverDatabases(ctx, m.logger)
if err != nil {
m.logger.Printf("[ERROR] Database discovery failed: %v", err)
return err
}
if len(dbs) == 0 {
m.logger.Printf("[INFO] No database containers found")
m.mu.Lock()
m.lastDBDump = &DBDumpStatus{
LastRun: time.Now(),
Success: true,
Duration: time.Since(start),
}
m.mu.Unlock()
return nil
}
m.logger.Printf("[INFO] Discovered %d database(s): %s", len(dbs), dbNames(dbs))
// Dump each DB to its app's drive path
var results []DumpResult
allOK := true
var summary []string
var totalSize int64
for _, db := range dbs {
drivePath := m.GetAppDrivePath(db.StackName)
// Skip if drive is disconnected
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
m.logger.Printf("[WARN] Skipping DB dump for %s — drive disconnected: %s", db.StackName, drivePath)
summary = append(summary, fmt.Sprintf("SKIP %s (drive disconnected)", db.ContainerName))
continue
}
dumpDir := AppDBDumpPath(drivePath, db.StackName)
result := DumpOne(ctx, db, dumpDir, m.logger)
results = append(results, result)
if result.Error != nil {
allOK = false
summary = append(summary, fmt.Sprintf("FAIL %s: %v", result.DB.ContainerName, result.Error))
m.logger.Printf("[ERROR] DB dump failed for %s: %v", result.DB.ContainerName, result.Error)
} else {
totalSize += result.Size
summary = append(summary, fmt.Sprintf("OK %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size)))
// Persist validation result to settings.json
if m.settings != nil && result.FilePath != "" {
filename := filepath.Base(result.FilePath)
cache := settings.DBValidationCache{
ValidatedAt: time.Now().Format(time.RFC3339),
TableCount: result.Validation.TableCount,
HasHeader: result.Validation.Valid,
}
if !result.Validation.Valid {
cache.Error = result.Validation.Error
}
if err := m.settings.SetDBValidation(filename, cache); err != nil {
m.logger.Printf("[WARN] Failed to cache validation for %s: %v", filename, err)
}
}
}
}
duration := time.Since(start)
m.mu.Lock()
m.lastDBDump = &DBDumpStatus{
LastRun: time.Now(),
Results: results,
Success: allOK,
Duration: duration,
}
m.mu.Unlock()
// Ping healthcheck
uuid := m.cfg.Monitoring.PingUUIDs.DBDump
body := fmt.Sprintf("DB dump: %d databases, %s total\n%s",
len(results), humanizeBytes(totalSize), strings.Join(summary, "\n"))
if allOK {
m.pinger.Ping(uuid, body)
m.logger.Printf("[INFO] DB dump completed: %d databases, %s total (%s)",
len(results), humanizeBytes(totalSize), duration.Round(time.Millisecond))
} else {
m.pinger.Fail(uuid, body)
return fmt.Errorf("some database dumps failed")
}
return nil
}
// RunBackup runs per-drive restic backup snapshots.
func (m *Manager) RunBackup(ctx context.Context) error {
if err := m.acquireRunning(); err != nil {
return err
}
defer m.releaseRunning()
return m.runBackupInternal(ctx)
}
// runBackupInternal is the implementation of RunBackup. Caller must hold the running flag.
func (m *Manager) runBackupInternal(ctx context.Context) error {
start := time.Now()
m.logger.Printf("[INFO] Starting restic backup (per-drive)")
driveStacks := m.groupStacksByDrive()
if len(driveStacks) == 0 {
m.logger.Printf("[INFO] No deployed stacks — skipping backup")
return nil
}
// Infrastructure paths included in every drive's primary repo
infraPaths := []string{
m.cfg.Paths.StacksDir,
"/opt/docker/felhom-controller/controller.yaml",
}
var lastResult *SnapshotResult
var anyErr error
driveCount := 0
for drivePath, stacks := range driveStacks {
// Skip disconnected drives
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
m.logger.Printf("[WARN] Skipping backup for drive %s — disconnected", drivePath)
continue
}
repoPath := PrimaryResticRepoPath(drivePath)
// Ensure repo is initialized
if err := m.restic.EnsureInitialized(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
anyErr = err
continue
}
// Build paths for this drive
var paths []string
paths = append(paths, infraPaths...)
for _, stack := range stacks {
// App data (appdata/<stack>/)
appData := AppDataDir(drivePath, stack.Name)
if _, err := os.Stat(appData); err == nil {
paths = append(paths, appData)
}
// HDD mounts (for apps with custom mount points)
if m.stackProvider != nil {
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
if _, err := os.Stat(mount); err == nil {
paths = append(paths, mount)
}
}
}
// DB dumps for this stack
dumpDir := AppDBDumpPath(drivePath, stack.Name)
if _, err := os.Stat(dumpDir); err == nil {
paths = append(paths, dumpDir)
}
}
// Deduplicate paths
paths = dedup(paths)
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
result, err := m.restic.Snapshot(repoPath, paths, tags)
if err != nil {
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
anyErr = err
continue
}
lastResult = result
driveCount++
// Prune check (weekly — Sunday)
if shouldPrune(m.cfg.Backup.PruneSchedule) {
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
}
}
}
duration := time.Since(start)
if anyErr != nil && driveCount == 0 {
// All drives failed
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", anyErr))
m.mu.Lock()
m.lastBackup = &BackupStatus{
LastRun: time.Now(),
Success: false,
Duration: duration,
}
m.mu.Unlock()
return anyErr
}
// Get aggregated stats
stats := m.aggregateRepoStats()
m.mu.Lock()
m.lastBackup = &BackupStatus{
LastRun: time.Now(),
Snapshot: lastResult,
Success: anyErr == nil,
Duration: duration,
RepoStats: stats,
}
if lastResult != nil {
m.appendSnapshotRecord(SnapshotRecord{
SnapshotID: lastResult.SnapshotID,
Time: time.Now(),
FilesNew: lastResult.FilesNew,
FilesChanged: lastResult.FilesChanged,
DataAdded: lastResult.DataAdded,
Duration: duration,
Success: true,
HasStats: true,
})
}
m.mu.Unlock()
if lastResult != nil {
body := fmt.Sprintf("Backup OK (%d drives)\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s",
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
duration.Round(time.Second))
m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body)
m.logger.Printf("[INFO] Restic backup completed: %d drives, snapshot %s, %d new, %d changed, %s added (%s)",
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
duration.Round(time.Millisecond))
}
// Refresh cache so the page shows updated data immediately
if m.AfterBackup != nil {
m.AfterBackup()
}
return anyErr
}
// RunIntegrityCheck runs restic check on all primary repos and pings healthchecks.
func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
m.logger.Printf("[INFO] Starting restic integrity check")
start := time.Now()
drives := m.activeDrives()
if len(drives) == 0 {
m.logger.Printf("[INFO] No active drives — skipping integrity check")
return nil
}
var checkErr error
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
if err := m.restic.Check(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err)
checkErr = err
}
}
duration := time.Since(start)
uuid := m.cfg.Monitoring.PingUUIDs.BackupIntegrity
m.mu.Lock()
m.lastCheckTime = time.Now()
m.lastCheckOK = checkErr == nil
m.mu.Unlock()
if checkErr != nil {
m.logger.Printf("[ERROR] Restic integrity check failed (%s): %v", duration.Round(time.Second), checkErr)
m.pinger.Fail(uuid, fmt.Sprintf("restic check failed: %v", checkErr))
return checkErr
}
m.logger.Printf("[INFO] Restic integrity check passed (%d repos, %s)", len(drives), duration.Round(time.Second))
m.pinger.Ping(uuid, fmt.Sprintf("restic check passed (%d repos, %s)", len(drives), duration.Round(time.Second)))
return nil
}
// RunFullBackup runs DB dumps followed by restic backup.
func (m *Manager) RunFullBackup(ctx context.Context) error {
if err := m.acquireRunning(); err != nil {
return err
}
defer m.releaseRunning()
// Step 1: DB dumps
if err := m.runDBDumpsInternal(ctx); err != nil {
m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway")
}
// Step 2: Restic backup
return m.runBackupInternal(ctx)
}
// GetStatus returns the current backup status.
func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) {
m.mu.Lock()
defer m.mu.Unlock()
return m.lastDBDump, m.lastBackup
}
// GetRepoStats returns aggregated repository statistics across all primary repos.
func (m *Manager) GetRepoStats() (*RepoStats, error) {
stats := m.aggregateRepoStats()
if stats.SnapshotCount == 0 && stats.TotalSize == "" {
return stats, fmt.Errorf("no repos available")
}
return stats, nil
}
// IsRunning returns whether a backup or restore is currently in progress.
func (m *Manager) IsRunning() bool {
m.mu.Lock()
defer m.mu.Unlock()
return m.running
}
// acquireRunning atomically sets the running flag. Returns error if already running.
func (m *Manager) acquireRunning() error {
m.mu.Lock()
defer m.mu.Unlock()
if m.running {
return fmt.Errorf("backup already in progress")
}
m.running = true
return nil
}
// releaseRunning clears the running flag.
func (m *Manager) releaseRunning() {
m.mu.Lock()
m.running = false
m.mu.Unlock()
}
// GetResticPassword returns the restic repository encryption password.
func (m *Manager) GetResticPassword() (string, error) {
return m.restic.GetPassword()
}
// ListSnapshots returns snapshots from all primary restic repositories, merged and sorted.
func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) {
drives := m.activeDrives()
var allSnapshots []SnapshotInfo
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
if err != nil {
m.logger.Printf("[WARN] Could not list snapshots from %s: %v", repoPath, err)
continue
}
for i := range snapshots {
snapshots[i].RepoPath = repoPath
}
allSnapshots = append(allSnapshots, snapshots...)
}
// Sort newest first
sort.Slice(allSnapshots, func(i, j int) bool {
return allSnapshots[i].Time.After(allSnapshots[j].Time)
})
if limit > 0 && len(allSnapshots) > limit {
allSnapshots = allSnapshots[:limit]
}
return allSnapshots, nil
}
// ListAllSnapshots returns snapshots from both primary and secondary restic repos.
// Primary snapshots get Tier=1, secondary snapshots get Tier=2.
func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
drives := m.activeDrives()
var allSnapshots []SnapshotInfo
// Tier 1: primary repos (same as ListSnapshots)
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
if err != nil {
m.logger.Printf("[WARN] Could not list snapshots from %s: %v", repoPath, err)
continue
}
for i := range snapshots {
snapshots[i].RepoPath = repoPath
snapshots[i].Tier = 1
}
allSnapshots = append(allSnapshots, snapshots...)
}
// Tier 2: secondary restic repos on cross-drive destinations
if m.settings != nil {
destPaths := make(map[string]bool)
for _, cfg := range m.settings.GetAllCrossDriveConfigs() {
if cfg != nil && cfg.Method == "restic" && cfg.DestinationPath != "" {
destPaths[cfg.DestinationPath] = true
}
}
for destPath := range destPaths {
repoPath := SecondaryResticRepoPath(destPath)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
if err != nil {
m.logger.Printf("[WARN] Could not list secondary snapshots from %s: %v", repoPath, err)
continue
}
for i := range snapshots {
snapshots[i].RepoPath = repoPath
snapshots[i].Tier = 2
}
allSnapshots = append(allSnapshots, snapshots...)
}
}
// Sort newest first
sort.Slice(allSnapshots, func(i, j int) bool {
return allSnapshots[i].Time.After(allSnapshots[j].Time)
})
if limit > 0 && len(allSnapshots) > limit {
allSnapshots = allSnapshots[:limit]
}
return allSnapshots, nil
}
// SetStackProvider sets the stack data provider for app data discovery.
// C3: Write is protected by mutex since stackProvider is read by concurrent goroutines.
func (m *Manager) SetStackProvider(provider StackDataProvider) {
m.mu.Lock()
m.stackProvider = provider
m.mu.Unlock()
}
// UnlockRepo runs restic unlock on the given repo path.
func (m *Manager) UnlockRepo(ctx context.Context, repoPath string) error {
if !m.restic.RepoExists(repoPath) {
return nil // no repo to unlock
}
cmd := m.restic.UnlockCommand(ctx, repoPath)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("restic unlock: %v (%s)", err, strings.TrimSpace(string(out)))
}
m.logger.Printf("[INFO] Restic repo unlocked: %s", repoPath)
return nil
}
// GetStackHDDMounts returns HDD mount paths for the named stack via the stack provider.
func (m *Manager) GetStackHDDMounts(name string) []string {
if m.stackProvider == nil {
return nil
}
return m.stackProvider.GetStackHDDMounts(name)
}
// DumpStackDB runs a database dump for containers belonging to a specific stack.
// Dumps to the stack's home drive: <drive>/backups/primary/<stack>/db-dumps/.
func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
dbs, err := DiscoverDatabases(ctx, m.logger)
if err != nil {
return fmt.Errorf("database discovery failed: %w", err)
}
var stackDBs []DiscoveredDB
for _, db := range dbs {
if db.StackName == stackName {
stackDBs = append(stackDBs, db)
}
}
if len(stackDBs) == 0 {
m.logger.Printf("[DEBUG] No databases found for stack %s — skipping pre-backup dump", stackName)
return nil
}
drivePath := m.GetAppDrivePath(stackName)
if drivePath == "" || !filepath.IsAbs(drivePath) {
return fmt.Errorf("cannot determine absolute drive path for %s (systemDataPath not configured?)", stackName)
}
dumpDir := AppDBDumpPath(drivePath, stackName)
m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir)
for _, db := range stackDBs {
result := DumpOne(ctx, db, dumpDir, m.logger)
if result.Error != nil {
return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error)
}
m.logger.Printf("[INFO] Pre-backup DB dump OK: %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size))
// Persist validation to settings
if m.settings != nil && result.FilePath != "" {
filename := filepath.Base(result.FilePath)
cache := settings.DBValidationCache{
ValidatedAt: time.Now().Format(time.RFC3339),
TableCount: result.Validation.TableCount,
HasHeader: result.Validation.Valid,
}
if !result.Validation.Valid {
cache.Error = result.Validation.Error
}
_ = m.settings.SetDBValidation(filename, cache)
}
}
return nil
}
// perDriveRepoStats returns per-drive restic repository statistics for the Részletek section.
func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
drives := m.activeDrives()
var infos []DriveRepoInfo
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
stats, err := m.restic.Stats(repoPath)
if err != nil {
continue
}
infos = append(infos, DriveRepoInfo{
DrivePath: drive,
TotalSize: stats.TotalSize,
TotalSizeBytes: stats.TotalSizeBytes,
SnapshotCount: stats.SnapshotCount,
})
}
return infos
}
// aggregateRepoStats combines stats from all primary restic repos.
func (m *Manager) aggregateRepoStats() *RepoStats {
drives := m.activeDrives()
agg := &RepoStats{}
var totalBytes int64
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
stats, err := m.restic.Stats(repoPath)
if err != nil {
continue
}
agg.SnapshotCount += stats.SnapshotCount
totalBytes += stats.TotalSizeBytes
if stats.LatestSnapshot != nil {
if agg.LatestSnapshot == nil || stats.LatestSnapshot.Time.After(agg.LatestSnapshot.Time) {
agg.LatestSnapshot = stats.LatestSnapshot
}
}
}
agg.TotalSizeBytes = totalBytes
if totalBytes > 0 {
agg.TotalSize = humanizeBytes(totalBytes)
}
return agg
}
// listAllDumpFiles scans per-drive per-stack DB dump directories.
func (m *Manager) listAllDumpFiles() []DumpFileInfo {
var allFiles []DumpFileInfo
for drive, stacks := range m.groupStacksByDrive() {
for _, stack := range stacks {
dumpDir := AppDBDumpPath(drive, stack.Name)
if files, err := ListDumpFiles(dumpDir); err == nil {
allFiles = append(allFiles, files...)
}
}
}
return allFiles
}
func shouldPrune(schedule string) bool {
loc, err := time.LoadLocation("Europe/Budapest")
if err != nil {
loc = time.UTC
}
now := time.Now().In(loc)
switch strings.ToLower(schedule) {
case "weekly":
return now.Weekday() == time.Sunday
case "daily":
return true
default:
return now.Weekday() == time.Sunday
}
}
// appendSnapshotRecord adds a record to the ring buffer (max 20). Caller must hold m.mu.
func (m *Manager) appendSnapshotRecord(rec SnapshotRecord) {
m.snapshotHistory = append(m.snapshotHistory, rec)
if len(m.snapshotHistory) > 20 {
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
}
m.saveSnapshotHistory()
}
// saveSnapshotHistory persists the current snapshotHistory to disk as JSON.
// Caller must hold m.mu. Writes atomically (tmp file + rename).
func (m *Manager) saveSnapshotHistory() {
if m.snapshotHistoryFile == "" {
return
}
data, err := json.Marshal(m.snapshotHistory)
if err != nil {
m.logger.Printf("[WARN] Could not marshal snapshot history: %v", err)
return
}
tmp := m.snapshotHistoryFile + ".tmp"
if err := os.WriteFile(tmp, data, 0644); err != nil {
m.logger.Printf("[WARN] Could not write snapshot history tmp file: %v", err)
return
}
if err := os.Rename(tmp, m.snapshotHistoryFile); err != nil {
m.logger.Printf("[WARN] Could not rename snapshot history file: %v", err)
}
}
// loadSnapshotHistoryFromFile reads the persisted snapshot history from disk.
// Returns nil if the file does not exist or cannot be read.
func (m *Manager) loadSnapshotHistoryFromFile() []SnapshotRecord {
if m.snapshotHistoryFile == "" {
return nil
}
data, err := os.ReadFile(m.snapshotHistoryFile)
if err != nil {
if !os.IsNotExist(err) {
m.logger.Printf("[WARN] Could not read snapshot history file: %v", err)
}
return nil
}
var records []SnapshotRecord
if err := json.Unmarshal(data, &records); err != nil {
m.logger.Printf("[WARN] Could not parse snapshot history file: %v", err)
return nil
}
return records
}
// LoadSnapshotHistory populates the snapshot history on startup.
// First tries to load persisted history (with delta stats) from disk.
// Merges with restic repo snapshots to pick up any entries not in the persisted file.
func (m *Manager) LoadSnapshotHistory() {
// Try loading persisted records (contains delta stats from actual backup runs)
persisted := m.loadSnapshotHistoryFromFile()
// Build a lookup map of persisted records by SnapshotID
persistedByID := make(map[string]SnapshotRecord, len(persisted))
for _, r := range persisted {
persistedByID[r.SnapshotID] = r
}
// Query restic repos for any snapshots not in the persisted file
drives := m.activeDrives()
var allSnapshots []SnapshotInfo
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 20)
if err != nil {
m.logger.Printf("[WARN] Could not load snapshot history from %s: %v", repoPath, err)
continue
}
allSnapshots = append(allSnapshots, snapshots...)
}
// Sort by time (oldest first for ring buffer)
sort.Slice(allSnapshots, func(i, j int) bool {
return allSnapshots[i].Time.Before(allSnapshots[j].Time)
})
m.mu.Lock()
defer m.mu.Unlock()
if len(persisted) > 0 {
// Start from persisted records, add any restic snapshots not already there
m.snapshotHistory = persisted
for _, s := range allSnapshots {
if _, found := persistedByID[s.ID]; !found {
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
SnapshotID: s.ID,
Time: s.Time,
HasStats: false,
Success: true,
})
}
}
// Re-sort by time after merge (oldest first for ring buffer)
sort.Slice(m.snapshotHistory, func(i, j int) bool {
return m.snapshotHistory[i].Time.Before(m.snapshotHistory[j].Time)
})
m.logger.Printf("[INFO] Loaded %d snapshots from persisted history (merged with %d restic entries)", len(persisted), len(allSnapshots))
} else {
// No persisted file — fall back to restic-only loading (first run)
for _, s := range allSnapshots {
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
SnapshotID: s.ID,
Time: s.Time,
HasStats: false,
Success: true,
})
}
m.logger.Printf("[INFO] Loaded %d historical snapshots from %d restic repos (no persisted history)", len(m.snapshotHistory), len(drives))
}
if len(m.snapshotHistory) > 20 {
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
}
}
// RefreshCache updates the cached full status. Called by scheduler every 5 minutes
// and after each backup run.
func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
status := &FullBackupStatus{
Enabled: m.cfg.Backup.Enabled,
DBDumpSchedule: m.cfg.Backup.DBDumpSchedule,
ResticSchedule: m.cfg.Backup.ResticSchedule,
PruneSchedule: m.cfg.Backup.PruneSchedule,
NextDBDump: nextDBDump,
NextBackup: nextBackup,
Retention: m.cfg.Backup.Retention,
}
// Expensive calls (outside lock)
status.RepoStats = m.aggregateRepoStats()
status.PerDriveRepoStats = m.perDriveRepoStats()
// Scan dump files from per-drive per-stack paths
files := m.listAllDumpFiles()
status.DumpFiles = files
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil {
status.DiscoveredDBs = dbs
}
// Discover app data — all deployed stacks, backup is mandatory
if m.stackProvider != nil {
status.AppDataInfo = DiscoverAppData(m.stackProvider, status.DiscoveredDBs)
}
// Fill in dynamic fields under lock.
// C1: lastDBDump mutation also happens here to prevent data races with GetFullStatus.
// C2: snapshot history reversal happens before cachedStatus assignment (inside lock).
m.mu.Lock()
status.Running = m.running
status.LastDBDump = m.lastDBDump
status.LastBackup = m.lastBackup
status.LastCheckTime = m.lastCheckTime
status.LastCheckOK = m.lastCheckOK
status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory))
copy(status.SnapshotHistory, m.snapshotHistory)
// C1: Cross-check lastDBDump results inside lock to prevent torn writes.
if m.lastDBDump != nil && len(files) > 0 {
fileValidation := make(map[string]DumpValidation) // keyed by filename
for _, f := range files {
fileValidation[f.FileName] = f.Validation
}
for i, r := range m.lastDBDump.Results {
if !r.Validation.Valid && r.Validation.Error == "" && r.FilePath != "" {
filename := filepath.Base(r.FilePath)
if fv, ok := fileValidation[filename]; ok {
m.lastDBDump.Results[i].Validation = fv
m.logger.Printf("[INFO] Re-validated %s from disk: valid=%v tables=%d",
filename, fv.Valid, fv.TableCount)
}
}
}
}
// C2: Reverse snapshot history before assigning to cachedStatus (inside lock).
for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 {
status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i]
}
m.cachedStatus = status
m.cacheTime = time.Now()
m.mu.Unlock()
m.logger.Printf("[INFO] Backup status cache refreshed")
}
// GetFullStatus returns the cached backup status for page rendering.
// Returns instantly — no subprocess calls.
// Returns a deep copy so callers can safely append to slice fields without
// polluting the cache (which would cause duplicate entries on repeated calls).
func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupStatus {
m.mu.Lock()
defer m.mu.Unlock()
if m.cachedStatus != nil {
// Deep copy — callers (backupsHandler) append to CrossDriveSummary,
// UnconfiguredApps, and CrossDriveWarnings. If we returned the cache
// pointer directly, every page load would accumulate more entries.
status := *m.cachedStatus
status.AppDataInfo = make([]AppBackupInfo, len(m.cachedStatus.AppDataInfo))
copy(status.AppDataInfo, m.cachedStatus.AppDataInfo)
status.PerDriveRepoStats = make([]DriveRepoInfo, len(m.cachedStatus.PerDriveRepoStats))
copy(status.PerDriveRepoStats, m.cachedStatus.PerDriveRepoStats)
// These three slices are assembled by the handler from AppDataInfo + settings;
// they must always start empty so the handler builds them fresh.
status.CrossDriveSummary = nil
status.UnconfiguredApps = nil
status.CrossDriveWarnings = nil
// Update dynamic fields that don't need subprocess calls
status.Running = m.running
status.NextDBDump = nextDBDump
status.NextBackup = nextBackup
// C4: Deep-copy lastDBDump and lastBackup so callers cannot mutate shared state.
if m.lastDBDump != nil {
copyDump := *m.lastDBDump
if len(m.lastDBDump.Results) > 0 {
copyDump.Results = make([]DumpResult, len(m.lastDBDump.Results))
copy(copyDump.Results, m.lastDBDump.Results)
}
status.LastDBDump = &copyDump
}
if m.lastBackup != nil {
copyBackup := *m.lastBackup
status.LastBackup = &copyBackup
}
// Update snapshot history
status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory))
copy(status.SnapshotHistory, m.snapshotHistory)
// Reverse so newest first
for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 {
status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i]
}
// Synthesize LastBackup from snapshot history if not in memory (e.g., after restart)
if status.LastBackup == nil && len(status.SnapshotHistory) > 0 {
latest := status.SnapshotHistory[0] // already reversed, newest first
status.LastBackup = &BackupStatus{
LastRun: latest.Time,
Success: latest.Success,
Snapshot: &SnapshotResult{
SnapshotID: latest.SnapshotID,
},
}
}
// Synthesize LastDBDump from DumpFiles on disk if not in memory
if status.LastDBDump == nil && len(status.DumpFiles) > 0 {
var results []DumpResult
var latestTime time.Time
for _, f := range status.DumpFiles {
results = append(results, DumpResult{
DB: DiscoveredDB{StackName: f.StackName, DBType: f.DBType, ContainerName: f.StackName},
FilePath: f.FileName,
Size: f.Size,
Validation: f.Validation,
})
if f.ModTime.After(latestTime) {
latestTime = f.ModTime
}
}
status.LastDBDump = &DBDumpStatus{
LastRun: latestTime,
Results: results,
Success: true,
}
}
return &status
}
// No cache yet — return a minimal status (first page load before cache is populated)
return &FullBackupStatus{
Enabled: m.cfg.Backup.Enabled,
Running: m.running,
LastDBDump: m.lastDBDump,
LastBackup: m.lastBackup,
DBDumpSchedule: m.cfg.Backup.DBDumpSchedule,
ResticSchedule: m.cfg.Backup.ResticSchedule,
PruneSchedule: m.cfg.Backup.PruneSchedule,
NextDBDump: nextDBDump,
NextBackup: nextBackup,
Retention: m.cfg.Backup.Retention,
LastCheckTime: m.lastCheckTime,
LastCheckOK: m.lastCheckOK,
}
}
func dbNames(dbs []DiscoveredDB) string {
var names []string
for _, db := range dbs {
names = append(names, fmt.Sprintf("%s(%s)", db.ContainerName, db.DBType))
}
return strings.Join(names, ", ")
}
// dedup removes duplicate strings from a slice, preserving order.
func dedup(items []string) []string {
seen := make(map[string]bool)
var result []string
for _, item := range items {
if !seen[item] {
seen[item] = true
result = append(result, item)
}
}
return result
}