bdbe170a54
New storage watchdog monitors registered storage paths every 5s. On disconnect (3 consecutive probe failures), auto-stops affected apps, lazy-unmounts stale VFS entries, fires alerts/notifications/hub report. On reconnect (UUID detected), auto-remounts via fstab, cleans stale restic locks, offers app restart. Safe disconnect UI for USB drives: confirmation dialog, stop apps, sync, unmount. Disconnected state visible across all pages (dashboard, settings, backups, monitoring) with hatched red bars and badges. Backup guards skip disconnected drives. 22 files changed (1 new: monitor/watchdog.go), ~1500 lines added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1147 lines
35 KiB
Go
1147 lines
35 KiB
Go
package backup
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
|
|
)
|
|
|
|
// Manager orchestrates database dumps and restic backups.
|
|
type Manager struct {
|
|
cfg *config.Config
|
|
restic *ResticManager
|
|
logger *log.Logger
|
|
pinger *monitor.Pinger
|
|
settings *settings.Settings
|
|
stackProvider StackDataProvider
|
|
systemDataPath string // fallback drive for SSD-only apps
|
|
|
|
mu sync.Mutex
|
|
lastDBDump *DBDumpStatus
|
|
lastBackup *BackupStatus
|
|
running bool
|
|
snapshotHistory []SnapshotRecord // ring buffer, last 20 entries
|
|
snapshotHistoryFile string // path to persist snapshot history JSON
|
|
lastCheckTime time.Time
|
|
lastCheckOK bool
|
|
|
|
// Cached status for page rendering (refreshed periodically)
|
|
cachedStatus *FullBackupStatus
|
|
cacheTime time.Time
|
|
|
|
// AfterBackup is called after a backup completes to refresh the cache.
|
|
// Set by main.go to avoid circular import with scheduler.
|
|
AfterBackup func()
|
|
}
|
|
|
|
// SnapshotRecord combines restic snapshot metadata with our run stats.
|
|
type SnapshotRecord struct {
|
|
SnapshotID string `json:"snapshot_id"`
|
|
Time time.Time `json:"time"`
|
|
FilesNew int `json:"files_new"`
|
|
FilesChanged int `json:"files_changed"`
|
|
DataAdded string `json:"data_added"`
|
|
Duration time.Duration `json:"duration"`
|
|
Success bool `json:"success"`
|
|
HasStats bool `json:"has_stats"` // false for historical entries loaded from restic
|
|
}
|
|
|
|
// DriveRepoInfo holds per-drive restic repository statistics for the Részletek section.
|
|
type DriveRepoInfo struct {
|
|
DrivePath string
|
|
DriveLabel string // filled by handler from settings
|
|
TotalSize string
|
|
TotalSizeBytes int64
|
|
SnapshotCount int
|
|
}
|
|
|
|
// CrossDriveSummaryItem holds display data for one app's cross-drive backup.
|
|
type CrossDriveSummaryItem struct {
|
|
StackName string
|
|
DisplayName string
|
|
Method string // "rsync" or "restic"
|
|
MethodLabel string // "Egyszerű másolat" or "Restic"
|
|
DestPath string
|
|
DestLabel string // storage path label
|
|
Schedule string
|
|
ScheduleLabel string // "Naponta" or "Hetente" or "Kézi"
|
|
LastStatus string // "ok", "error", "running", ""
|
|
LastRunShort string // formatted short time e.g. "03:15"
|
|
SizeHuman string
|
|
}
|
|
|
|
// FullBackupStatus contains everything the backup page needs.
|
|
type FullBackupStatus struct {
|
|
Enabled bool
|
|
Running bool
|
|
|
|
// DB Dumps
|
|
LastDBDump *DBDumpStatus
|
|
DumpFiles []DumpFileInfo
|
|
DiscoveredDBs []DiscoveredDB
|
|
|
|
// Restic
|
|
LastBackup *BackupStatus
|
|
SnapshotHistory []SnapshotRecord
|
|
RepoStats *RepoStats
|
|
PerDriveRepoStats []DriveRepoInfo // per-drive Tier 1 restic stats
|
|
|
|
// Schedule
|
|
DBDumpSchedule string
|
|
ResticSchedule string
|
|
PruneSchedule string
|
|
NextDBDump time.Time
|
|
NextBackup time.Time
|
|
Retention config.RetentionConfig
|
|
|
|
// Repository health
|
|
LastCheckTime time.Time
|
|
LastCheckOK bool
|
|
|
|
// Remote (placeholder)
|
|
RemoteEnabled bool
|
|
|
|
// App data backup
|
|
AppDataInfo []AppBackupInfo
|
|
|
|
// Cross-drive backup summary
|
|
CrossDriveSummary []CrossDriveSummaryItem
|
|
UnconfiguredApps []CrossDriveSummaryItem // apps with HDD data but no cross-drive config
|
|
CrossDriveWarnings []string // destination health warnings
|
|
|
|
// Flash messages (set by handlers, passed through redirect)
|
|
FlashSuccess string
|
|
FlashError string
|
|
}
|
|
|
|
// DBDumpStatus holds the last DB dump result.
|
|
type DBDumpStatus struct {
|
|
LastRun time.Time
|
|
Results []DumpResult
|
|
Success bool
|
|
Duration time.Duration
|
|
}
|
|
|
|
// BackupStatus holds the last backup result.
|
|
type BackupStatus struct {
|
|
LastRun time.Time
|
|
Snapshot *SnapshotResult
|
|
Success bool
|
|
Duration time.Duration
|
|
RepoStats *RepoStats
|
|
}
|
|
|
|
// NewManager creates a new backup manager.
|
|
func NewManager(cfg *config.Config, pinger *monitor.Pinger, sett *settings.Settings, logger *log.Logger) *Manager {
|
|
if cfg.Paths.SystemDataPath == "" {
|
|
logger.Printf("[WARN] SystemDataPath is empty in config — SSD-only apps will not have correct backup paths")
|
|
}
|
|
dataDir := cfg.Paths.DataDir
|
|
if dataDir == "" {
|
|
dataDir = "/opt/docker/felhom-controller/data"
|
|
}
|
|
return &Manager{
|
|
cfg: cfg,
|
|
restic: NewResticManager(cfg, logger),
|
|
logger: logger,
|
|
pinger: pinger,
|
|
settings: sett,
|
|
systemDataPath: cfg.Paths.SystemDataPath,
|
|
snapshotHistoryFile: filepath.Join(dataDir, "snapshot-history.json"),
|
|
}
|
|
}
|
|
|
|
// GetAppDrivePath returns the drive path for an app.
|
|
// Uses HDD_PATH from app.yaml if set, otherwise falls back to system data path.
|
|
func (m *Manager) GetAppDrivePath(stackName string) string {
|
|
if m.stackProvider != nil {
|
|
if hddPath := m.stackProvider.GetStackHDDPath(stackName); hddPath != "" {
|
|
return hddPath
|
|
}
|
|
}
|
|
if m.systemDataPath == "" {
|
|
m.logger.Printf("[ERROR] systemDataPath is empty — cannot determine drive for %s", stackName)
|
|
}
|
|
return m.systemDataPath
|
|
}
|
|
|
|
// groupStacksByDrive groups deployed stacks by their home drive path.
|
|
func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
|
|
if m.stackProvider == nil {
|
|
return nil
|
|
}
|
|
result := make(map[string][]StackSummary)
|
|
for _, stack := range m.stackProvider.ListDeployedStacks() {
|
|
drive := m.GetAppDrivePath(stack.Name)
|
|
result[drive] = append(result[drive], stack)
|
|
}
|
|
return result
|
|
}
|
|
|
|
// activeDrives returns sorted list of drives that have deployed apps.
|
|
func (m *Manager) activeDrives() []string {
|
|
groups := m.groupStacksByDrive()
|
|
var drives []string
|
|
for d := range groups {
|
|
drives = append(drives, d)
|
|
}
|
|
sort.Strings(drives)
|
|
return drives
|
|
}
|
|
|
|
// RunDBDumps discovers and dumps all databases to per-drive, per-app paths.
|
|
func (m *Manager) RunDBDumps(ctx context.Context) error {
|
|
if err := m.acquireRunning(); err != nil {
|
|
return err
|
|
}
|
|
defer m.releaseRunning()
|
|
return m.runDBDumpsInternal(ctx)
|
|
}
|
|
|
|
// runDBDumpsInternal is the implementation of RunDBDumps. Caller must hold the running flag.
|
|
func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
|
|
start := time.Now()
|
|
m.logger.Printf("[INFO] Starting database dump run")
|
|
|
|
dbs, err := DiscoverDatabases(ctx, m.logger)
|
|
if err != nil {
|
|
m.logger.Printf("[ERROR] Database discovery failed: %v", err)
|
|
return err
|
|
}
|
|
|
|
if len(dbs) == 0 {
|
|
m.logger.Printf("[INFO] No database containers found")
|
|
m.mu.Lock()
|
|
m.lastDBDump = &DBDumpStatus{
|
|
LastRun: time.Now(),
|
|
Success: true,
|
|
Duration: time.Since(start),
|
|
}
|
|
m.mu.Unlock()
|
|
return nil
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Discovered %d database(s): %s", len(dbs), dbNames(dbs))
|
|
|
|
// Dump each DB to its app's drive path
|
|
var results []DumpResult
|
|
allOK := true
|
|
var summary []string
|
|
var totalSize int64
|
|
|
|
for _, db := range dbs {
|
|
drivePath := m.GetAppDrivePath(db.StackName)
|
|
|
|
// Skip if drive is disconnected
|
|
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
|
|
m.logger.Printf("[WARN] Skipping DB dump for %s — drive disconnected: %s", db.StackName, drivePath)
|
|
summary = append(summary, fmt.Sprintf("SKIP %s (drive disconnected)", db.ContainerName))
|
|
continue
|
|
}
|
|
|
|
dumpDir := AppDBDumpPath(drivePath, db.StackName)
|
|
|
|
result := DumpOne(ctx, db, dumpDir, m.logger)
|
|
results = append(results, result)
|
|
|
|
if result.Error != nil {
|
|
allOK = false
|
|
summary = append(summary, fmt.Sprintf("FAIL %s: %v", result.DB.ContainerName, result.Error))
|
|
m.logger.Printf("[ERROR] DB dump failed for %s: %v", result.DB.ContainerName, result.Error)
|
|
} else {
|
|
totalSize += result.Size
|
|
summary = append(summary, fmt.Sprintf("OK %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size)))
|
|
|
|
// Persist validation result to settings.json
|
|
if m.settings != nil && result.FilePath != "" {
|
|
filename := filepath.Base(result.FilePath)
|
|
cache := settings.DBValidationCache{
|
|
ValidatedAt: time.Now().Format(time.RFC3339),
|
|
TableCount: result.Validation.TableCount,
|
|
HasHeader: result.Validation.Valid,
|
|
}
|
|
if !result.Validation.Valid {
|
|
cache.Error = result.Validation.Error
|
|
}
|
|
if err := m.settings.SetDBValidation(filename, cache); err != nil {
|
|
m.logger.Printf("[WARN] Failed to cache validation for %s: %v", filename, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
duration := time.Since(start)
|
|
m.mu.Lock()
|
|
m.lastDBDump = &DBDumpStatus{
|
|
LastRun: time.Now(),
|
|
Results: results,
|
|
Success: allOK,
|
|
Duration: duration,
|
|
}
|
|
m.mu.Unlock()
|
|
|
|
// Ping healthcheck
|
|
uuid := m.cfg.Monitoring.PingUUIDs.DBDump
|
|
body := fmt.Sprintf("DB dump: %d databases, %s total\n%s",
|
|
len(results), humanizeBytes(totalSize), strings.Join(summary, "\n"))
|
|
|
|
if allOK {
|
|
m.pinger.Ping(uuid, body)
|
|
m.logger.Printf("[INFO] DB dump completed: %d databases, %s total (%s)",
|
|
len(results), humanizeBytes(totalSize), duration.Round(time.Millisecond))
|
|
} else {
|
|
m.pinger.Fail(uuid, body)
|
|
return fmt.Errorf("some database dumps failed")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// RunBackup runs per-drive restic backup snapshots.
|
|
func (m *Manager) RunBackup(ctx context.Context) error {
|
|
if err := m.acquireRunning(); err != nil {
|
|
return err
|
|
}
|
|
defer m.releaseRunning()
|
|
return m.runBackupInternal(ctx)
|
|
}
|
|
|
|
// runBackupInternal is the implementation of RunBackup. Caller must hold the running flag.
|
|
func (m *Manager) runBackupInternal(ctx context.Context) error {
|
|
start := time.Now()
|
|
m.logger.Printf("[INFO] Starting restic backup (per-drive)")
|
|
|
|
driveStacks := m.groupStacksByDrive()
|
|
if len(driveStacks) == 0 {
|
|
m.logger.Printf("[INFO] No deployed stacks — skipping backup")
|
|
return nil
|
|
}
|
|
|
|
// Infrastructure paths included in every drive's primary repo
|
|
infraPaths := []string{
|
|
m.cfg.Paths.StacksDir,
|
|
"/opt/docker/felhom-controller/controller.yaml",
|
|
}
|
|
|
|
var lastResult *SnapshotResult
|
|
var anyErr error
|
|
driveCount := 0
|
|
|
|
for drivePath, stacks := range driveStacks {
|
|
// Skip disconnected drives
|
|
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
|
|
m.logger.Printf("[WARN] Skipping backup for drive %s — disconnected", drivePath)
|
|
continue
|
|
}
|
|
|
|
repoPath := PrimaryResticRepoPath(drivePath)
|
|
|
|
// Ensure repo is initialized
|
|
if err := m.restic.EnsureInitialized(repoPath); err != nil {
|
|
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
|
|
anyErr = err
|
|
continue
|
|
}
|
|
|
|
// Build paths for this drive
|
|
var paths []string
|
|
paths = append(paths, infraPaths...)
|
|
|
|
for _, stack := range stacks {
|
|
// App data (appdata/<stack>/)
|
|
appData := AppDataDir(drivePath, stack.Name)
|
|
if _, err := os.Stat(appData); err == nil {
|
|
paths = append(paths, appData)
|
|
}
|
|
// HDD mounts (for apps with custom mount points)
|
|
if m.stackProvider != nil {
|
|
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
|
|
if _, err := os.Stat(mount); err == nil {
|
|
paths = append(paths, mount)
|
|
}
|
|
}
|
|
}
|
|
// DB dumps for this stack
|
|
dumpDir := AppDBDumpPath(drivePath, stack.Name)
|
|
if _, err := os.Stat(dumpDir); err == nil {
|
|
paths = append(paths, dumpDir)
|
|
}
|
|
}
|
|
|
|
// Deduplicate paths
|
|
paths = dedup(paths)
|
|
|
|
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
|
|
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
|
|
|
|
result, err := m.restic.Snapshot(repoPath, paths, tags)
|
|
if err != nil {
|
|
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
|
|
anyErr = err
|
|
continue
|
|
}
|
|
|
|
lastResult = result
|
|
driveCount++
|
|
|
|
// Prune check (weekly — Sunday)
|
|
if shouldPrune(m.cfg.Backup.PruneSchedule) {
|
|
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
|
|
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
|
|
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
duration := time.Since(start)
|
|
|
|
if anyErr != nil && driveCount == 0 {
|
|
// All drives failed
|
|
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", anyErr))
|
|
m.mu.Lock()
|
|
m.lastBackup = &BackupStatus{
|
|
LastRun: time.Now(),
|
|
Success: false,
|
|
Duration: duration,
|
|
}
|
|
m.mu.Unlock()
|
|
return anyErr
|
|
}
|
|
|
|
// Get aggregated stats
|
|
stats := m.aggregateRepoStats()
|
|
|
|
m.mu.Lock()
|
|
m.lastBackup = &BackupStatus{
|
|
LastRun: time.Now(),
|
|
Snapshot: lastResult,
|
|
Success: anyErr == nil,
|
|
Duration: duration,
|
|
RepoStats: stats,
|
|
}
|
|
if lastResult != nil {
|
|
m.appendSnapshotRecord(SnapshotRecord{
|
|
SnapshotID: lastResult.SnapshotID,
|
|
Time: time.Now(),
|
|
FilesNew: lastResult.FilesNew,
|
|
FilesChanged: lastResult.FilesChanged,
|
|
DataAdded: lastResult.DataAdded,
|
|
Duration: duration,
|
|
Success: true,
|
|
HasStats: true,
|
|
})
|
|
}
|
|
m.mu.Unlock()
|
|
|
|
if lastResult != nil {
|
|
body := fmt.Sprintf("Backup OK (%d drives)\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s",
|
|
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
|
|
duration.Round(time.Second))
|
|
m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body)
|
|
|
|
m.logger.Printf("[INFO] Restic backup completed: %d drives, snapshot %s, %d new, %d changed, %s added (%s)",
|
|
driveCount, lastResult.SnapshotID, lastResult.FilesNew, lastResult.FilesChanged, lastResult.DataAdded,
|
|
duration.Round(time.Millisecond))
|
|
}
|
|
|
|
// Refresh cache so the page shows updated data immediately
|
|
if m.AfterBackup != nil {
|
|
m.AfterBackup()
|
|
}
|
|
|
|
return anyErr
|
|
}
|
|
|
|
// RunIntegrityCheck runs restic check on all primary repos and pings healthchecks.
|
|
func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
|
|
m.logger.Printf("[INFO] Starting restic integrity check")
|
|
start := time.Now()
|
|
|
|
drives := m.activeDrives()
|
|
if len(drives) == 0 {
|
|
m.logger.Printf("[INFO] No active drives — skipping integrity check")
|
|
return nil
|
|
}
|
|
|
|
var checkErr error
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
if err := m.restic.Check(repoPath); err != nil {
|
|
m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err)
|
|
checkErr = err
|
|
}
|
|
}
|
|
|
|
duration := time.Since(start)
|
|
uuid := m.cfg.Monitoring.PingUUIDs.BackupIntegrity
|
|
|
|
m.mu.Lock()
|
|
m.lastCheckTime = time.Now()
|
|
m.lastCheckOK = checkErr == nil
|
|
m.mu.Unlock()
|
|
|
|
if checkErr != nil {
|
|
m.logger.Printf("[ERROR] Restic integrity check failed (%s): %v", duration.Round(time.Second), checkErr)
|
|
m.pinger.Fail(uuid, fmt.Sprintf("restic check failed: %v", checkErr))
|
|
return checkErr
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Restic integrity check passed (%d repos, %s)", len(drives), duration.Round(time.Second))
|
|
m.pinger.Ping(uuid, fmt.Sprintf("restic check passed (%d repos, %s)", len(drives), duration.Round(time.Second)))
|
|
return nil
|
|
}
|
|
|
|
// RunFullBackup runs DB dumps followed by restic backup.
|
|
func (m *Manager) RunFullBackup(ctx context.Context) error {
|
|
if err := m.acquireRunning(); err != nil {
|
|
return err
|
|
}
|
|
defer m.releaseRunning()
|
|
|
|
// Step 1: DB dumps
|
|
if err := m.runDBDumpsInternal(ctx); err != nil {
|
|
m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway")
|
|
}
|
|
|
|
// Step 2: Restic backup
|
|
return m.runBackupInternal(ctx)
|
|
}
|
|
|
|
// GetStatus returns the current backup status.
|
|
func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
return m.lastDBDump, m.lastBackup
|
|
}
|
|
|
|
// GetRepoStats returns aggregated repository statistics across all primary repos.
|
|
func (m *Manager) GetRepoStats() (*RepoStats, error) {
|
|
stats := m.aggregateRepoStats()
|
|
if stats.SnapshotCount == 0 && stats.TotalSize == "" {
|
|
return stats, fmt.Errorf("no repos available")
|
|
}
|
|
return stats, nil
|
|
}
|
|
|
|
// IsRunning returns whether a backup or restore is currently in progress.
|
|
func (m *Manager) IsRunning() bool {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
return m.running
|
|
}
|
|
|
|
// acquireRunning atomically sets the running flag. Returns error if already running.
|
|
func (m *Manager) acquireRunning() error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
if m.running {
|
|
return fmt.Errorf("backup already in progress")
|
|
}
|
|
m.running = true
|
|
return nil
|
|
}
|
|
|
|
// releaseRunning clears the running flag.
|
|
func (m *Manager) releaseRunning() {
|
|
m.mu.Lock()
|
|
m.running = false
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// GetResticPassword returns the restic repository encryption password.
|
|
func (m *Manager) GetResticPassword() (string, error) {
|
|
return m.restic.GetPassword()
|
|
}
|
|
|
|
// ListSnapshots returns snapshots from all primary restic repositories, merged and sorted.
|
|
func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) {
|
|
drives := m.activeDrives()
|
|
var allSnapshots []SnapshotInfo
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Could not list snapshots from %s: %v", repoPath, err)
|
|
continue
|
|
}
|
|
for i := range snapshots {
|
|
snapshots[i].RepoPath = repoPath
|
|
}
|
|
allSnapshots = append(allSnapshots, snapshots...)
|
|
}
|
|
// Sort newest first
|
|
sort.Slice(allSnapshots, func(i, j int) bool {
|
|
return allSnapshots[i].Time.After(allSnapshots[j].Time)
|
|
})
|
|
if limit > 0 && len(allSnapshots) > limit {
|
|
allSnapshots = allSnapshots[:limit]
|
|
}
|
|
return allSnapshots, nil
|
|
}
|
|
|
|
// ListAllSnapshots returns snapshots from both primary and secondary restic repos.
|
|
// Primary snapshots get Tier=1, secondary snapshots get Tier=2.
|
|
func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
|
|
drives := m.activeDrives()
|
|
var allSnapshots []SnapshotInfo
|
|
|
|
// Tier 1: primary repos (same as ListSnapshots)
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Could not list snapshots from %s: %v", repoPath, err)
|
|
continue
|
|
}
|
|
for i := range snapshots {
|
|
snapshots[i].RepoPath = repoPath
|
|
snapshots[i].Tier = 1
|
|
}
|
|
allSnapshots = append(allSnapshots, snapshots...)
|
|
}
|
|
|
|
// Tier 2: secondary restic repos on cross-drive destinations
|
|
if m.settings != nil {
|
|
destPaths := make(map[string]bool)
|
|
for _, cfg := range m.settings.GetAllCrossDriveConfigs() {
|
|
if cfg != nil && cfg.Method == "restic" && cfg.DestinationPath != "" {
|
|
destPaths[cfg.DestinationPath] = true
|
|
}
|
|
}
|
|
for destPath := range destPaths {
|
|
repoPath := SecondaryResticRepoPath(destPath)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Could not list secondary snapshots from %s: %v", repoPath, err)
|
|
continue
|
|
}
|
|
for i := range snapshots {
|
|
snapshots[i].RepoPath = repoPath
|
|
snapshots[i].Tier = 2
|
|
}
|
|
allSnapshots = append(allSnapshots, snapshots...)
|
|
}
|
|
}
|
|
|
|
// Sort newest first
|
|
sort.Slice(allSnapshots, func(i, j int) bool {
|
|
return allSnapshots[i].Time.After(allSnapshots[j].Time)
|
|
})
|
|
if limit > 0 && len(allSnapshots) > limit {
|
|
allSnapshots = allSnapshots[:limit]
|
|
}
|
|
return allSnapshots, nil
|
|
}
|
|
|
|
// SetStackProvider sets the stack data provider for app data discovery.
|
|
// C3: Write is protected by mutex since stackProvider is read by concurrent goroutines.
|
|
func (m *Manager) SetStackProvider(provider StackDataProvider) {
|
|
m.mu.Lock()
|
|
m.stackProvider = provider
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// UnlockRepo runs restic unlock on the given repo path.
|
|
func (m *Manager) UnlockRepo(ctx context.Context, repoPath string) error {
|
|
if !m.restic.RepoExists(repoPath) {
|
|
return nil // no repo to unlock
|
|
}
|
|
cmd := m.restic.UnlockCommand(ctx, repoPath)
|
|
out, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf("restic unlock: %v (%s)", err, strings.TrimSpace(string(out)))
|
|
}
|
|
m.logger.Printf("[INFO] Restic repo unlocked: %s", repoPath)
|
|
return nil
|
|
}
|
|
|
|
// GetStackHDDMounts returns HDD mount paths for the named stack via the stack provider.
|
|
func (m *Manager) GetStackHDDMounts(name string) []string {
|
|
if m.stackProvider == nil {
|
|
return nil
|
|
}
|
|
return m.stackProvider.GetStackHDDMounts(name)
|
|
}
|
|
|
|
// DumpStackDB runs a database dump for containers belonging to a specific stack.
|
|
// Dumps to the stack's home drive: <drive>/backups/primary/<stack>/db-dumps/.
|
|
func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
|
|
dbs, err := DiscoverDatabases(ctx, m.logger)
|
|
if err != nil {
|
|
return fmt.Errorf("database discovery failed: %w", err)
|
|
}
|
|
|
|
var stackDBs []DiscoveredDB
|
|
for _, db := range dbs {
|
|
if db.StackName == stackName {
|
|
stackDBs = append(stackDBs, db)
|
|
}
|
|
}
|
|
if len(stackDBs) == 0 {
|
|
m.logger.Printf("[DEBUG] No databases found for stack %s — skipping pre-backup dump", stackName)
|
|
return nil
|
|
}
|
|
|
|
drivePath := m.GetAppDrivePath(stackName)
|
|
if drivePath == "" || !filepath.IsAbs(drivePath) {
|
|
return fmt.Errorf("cannot determine absolute drive path for %s (systemDataPath not configured?)", stackName)
|
|
}
|
|
dumpDir := AppDBDumpPath(drivePath, stackName)
|
|
|
|
m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir)
|
|
|
|
for _, db := range stackDBs {
|
|
result := DumpOne(ctx, db, dumpDir, m.logger)
|
|
if result.Error != nil {
|
|
return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error)
|
|
}
|
|
m.logger.Printf("[INFO] Pre-backup DB dump OK: %s (%s)", result.DB.ContainerName, humanizeBytes(result.Size))
|
|
|
|
// Persist validation to settings
|
|
if m.settings != nil && result.FilePath != "" {
|
|
filename := filepath.Base(result.FilePath)
|
|
cache := settings.DBValidationCache{
|
|
ValidatedAt: time.Now().Format(time.RFC3339),
|
|
TableCount: result.Validation.TableCount,
|
|
HasHeader: result.Validation.Valid,
|
|
}
|
|
if !result.Validation.Valid {
|
|
cache.Error = result.Validation.Error
|
|
}
|
|
_ = m.settings.SetDBValidation(filename, cache)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// perDriveRepoStats returns per-drive restic repository statistics for the Részletek section.
|
|
func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
|
|
drives := m.activeDrives()
|
|
var infos []DriveRepoInfo
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
stats, err := m.restic.Stats(repoPath)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
infos = append(infos, DriveRepoInfo{
|
|
DrivePath: drive,
|
|
TotalSize: stats.TotalSize,
|
|
TotalSizeBytes: stats.TotalSizeBytes,
|
|
SnapshotCount: stats.SnapshotCount,
|
|
})
|
|
}
|
|
return infos
|
|
}
|
|
|
|
// aggregateRepoStats combines stats from all primary restic repos.
|
|
func (m *Manager) aggregateRepoStats() *RepoStats {
|
|
drives := m.activeDrives()
|
|
agg := &RepoStats{}
|
|
var totalBytes int64
|
|
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
stats, err := m.restic.Stats(repoPath)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
agg.SnapshotCount += stats.SnapshotCount
|
|
totalBytes += stats.TotalSizeBytes
|
|
if stats.LatestSnapshot != nil {
|
|
if agg.LatestSnapshot == nil || stats.LatestSnapshot.Time.After(agg.LatestSnapshot.Time) {
|
|
agg.LatestSnapshot = stats.LatestSnapshot
|
|
}
|
|
}
|
|
}
|
|
|
|
agg.TotalSizeBytes = totalBytes
|
|
if totalBytes > 0 {
|
|
agg.TotalSize = humanizeBytes(totalBytes)
|
|
}
|
|
return agg
|
|
}
|
|
|
|
// listAllDumpFiles scans per-drive per-stack DB dump directories.
|
|
func (m *Manager) listAllDumpFiles() []DumpFileInfo {
|
|
var allFiles []DumpFileInfo
|
|
for drive, stacks := range m.groupStacksByDrive() {
|
|
for _, stack := range stacks {
|
|
dumpDir := AppDBDumpPath(drive, stack.Name)
|
|
if files, err := ListDumpFiles(dumpDir); err == nil {
|
|
allFiles = append(allFiles, files...)
|
|
}
|
|
}
|
|
}
|
|
return allFiles
|
|
}
|
|
|
|
func shouldPrune(schedule string) bool {
|
|
loc, err := time.LoadLocation("Europe/Budapest")
|
|
if err != nil {
|
|
loc = time.UTC
|
|
}
|
|
now := time.Now().In(loc)
|
|
|
|
switch strings.ToLower(schedule) {
|
|
case "weekly":
|
|
return now.Weekday() == time.Sunday
|
|
case "daily":
|
|
return true
|
|
default:
|
|
return now.Weekday() == time.Sunday
|
|
}
|
|
}
|
|
|
|
// appendSnapshotRecord adds a record to the ring buffer (max 20). Caller must hold m.mu.
|
|
func (m *Manager) appendSnapshotRecord(rec SnapshotRecord) {
|
|
m.snapshotHistory = append(m.snapshotHistory, rec)
|
|
if len(m.snapshotHistory) > 20 {
|
|
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
|
|
}
|
|
m.saveSnapshotHistory()
|
|
}
|
|
|
|
// saveSnapshotHistory persists the current snapshotHistory to disk as JSON.
|
|
// Caller must hold m.mu. Writes atomically (tmp file + rename).
|
|
func (m *Manager) saveSnapshotHistory() {
|
|
if m.snapshotHistoryFile == "" {
|
|
return
|
|
}
|
|
data, err := json.Marshal(m.snapshotHistory)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Could not marshal snapshot history: %v", err)
|
|
return
|
|
}
|
|
tmp := m.snapshotHistoryFile + ".tmp"
|
|
if err := os.WriteFile(tmp, data, 0644); err != nil {
|
|
m.logger.Printf("[WARN] Could not write snapshot history tmp file: %v", err)
|
|
return
|
|
}
|
|
if err := os.Rename(tmp, m.snapshotHistoryFile); err != nil {
|
|
m.logger.Printf("[WARN] Could not rename snapshot history file: %v", err)
|
|
}
|
|
}
|
|
|
|
// loadSnapshotHistoryFromFile reads the persisted snapshot history from disk.
|
|
// Returns nil if the file does not exist or cannot be read.
|
|
func (m *Manager) loadSnapshotHistoryFromFile() []SnapshotRecord {
|
|
if m.snapshotHistoryFile == "" {
|
|
return nil
|
|
}
|
|
data, err := os.ReadFile(m.snapshotHistoryFile)
|
|
if err != nil {
|
|
if !os.IsNotExist(err) {
|
|
m.logger.Printf("[WARN] Could not read snapshot history file: %v", err)
|
|
}
|
|
return nil
|
|
}
|
|
var records []SnapshotRecord
|
|
if err := json.Unmarshal(data, &records); err != nil {
|
|
m.logger.Printf("[WARN] Could not parse snapshot history file: %v", err)
|
|
return nil
|
|
}
|
|
return records
|
|
}
|
|
|
|
// LoadSnapshotHistory populates the snapshot history on startup.
|
|
// First tries to load persisted history (with delta stats) from disk.
|
|
// Merges with restic repo snapshots to pick up any entries not in the persisted file.
|
|
func (m *Manager) LoadSnapshotHistory() {
|
|
// Try loading persisted records (contains delta stats from actual backup runs)
|
|
persisted := m.loadSnapshotHistoryFromFile()
|
|
|
|
// Build a lookup map of persisted records by SnapshotID
|
|
persistedByID := make(map[string]SnapshotRecord, len(persisted))
|
|
for _, r := range persisted {
|
|
persistedByID[r.SnapshotID] = r
|
|
}
|
|
|
|
// Query restic repos for any snapshots not in the persisted file
|
|
drives := m.activeDrives()
|
|
var allSnapshots []SnapshotInfo
|
|
|
|
for _, drive := range drives {
|
|
repoPath := PrimaryResticRepoPath(drive)
|
|
if !m.restic.RepoExists(repoPath) {
|
|
continue
|
|
}
|
|
snapshots, err := m.restic.ListSnapshots(repoPath, 20)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Could not load snapshot history from %s: %v", repoPath, err)
|
|
continue
|
|
}
|
|
allSnapshots = append(allSnapshots, snapshots...)
|
|
}
|
|
|
|
// Sort by time (oldest first for ring buffer)
|
|
sort.Slice(allSnapshots, func(i, j int) bool {
|
|
return allSnapshots[i].Time.Before(allSnapshots[j].Time)
|
|
})
|
|
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
if len(persisted) > 0 {
|
|
// Start from persisted records, add any restic snapshots not already there
|
|
m.snapshotHistory = persisted
|
|
for _, s := range allSnapshots {
|
|
if _, found := persistedByID[s.ID]; !found {
|
|
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
|
|
SnapshotID: s.ID,
|
|
Time: s.Time,
|
|
HasStats: false,
|
|
Success: true,
|
|
})
|
|
}
|
|
}
|
|
// Re-sort by time after merge (oldest first for ring buffer)
|
|
sort.Slice(m.snapshotHistory, func(i, j int) bool {
|
|
return m.snapshotHistory[i].Time.Before(m.snapshotHistory[j].Time)
|
|
})
|
|
m.logger.Printf("[INFO] Loaded %d snapshots from persisted history (merged with %d restic entries)", len(persisted), len(allSnapshots))
|
|
} else {
|
|
// No persisted file — fall back to restic-only loading (first run)
|
|
for _, s := range allSnapshots {
|
|
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
|
|
SnapshotID: s.ID,
|
|
Time: s.Time,
|
|
HasStats: false,
|
|
Success: true,
|
|
})
|
|
}
|
|
m.logger.Printf("[INFO] Loaded %d historical snapshots from %d restic repos (no persisted history)", len(m.snapshotHistory), len(drives))
|
|
}
|
|
|
|
if len(m.snapshotHistory) > 20 {
|
|
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
|
|
}
|
|
}
|
|
|
|
// RefreshCache updates the cached full status. Called by scheduler every 5 minutes
|
|
// and after each backup run.
|
|
func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
|
|
status := &FullBackupStatus{
|
|
Enabled: m.cfg.Backup.Enabled,
|
|
|
|
DBDumpSchedule: m.cfg.Backup.DBDumpSchedule,
|
|
ResticSchedule: m.cfg.Backup.ResticSchedule,
|
|
PruneSchedule: m.cfg.Backup.PruneSchedule,
|
|
NextDBDump: nextDBDump,
|
|
NextBackup: nextBackup,
|
|
Retention: m.cfg.Backup.Retention,
|
|
}
|
|
|
|
// Expensive calls (outside lock)
|
|
status.RepoStats = m.aggregateRepoStats()
|
|
status.PerDriveRepoStats = m.perDriveRepoStats()
|
|
|
|
// Scan dump files from per-drive per-stack paths
|
|
files := m.listAllDumpFiles()
|
|
status.DumpFiles = files
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil {
|
|
status.DiscoveredDBs = dbs
|
|
}
|
|
|
|
// Discover app data — all deployed stacks, backup is mandatory
|
|
if m.stackProvider != nil {
|
|
status.AppDataInfo = DiscoverAppData(m.stackProvider, status.DiscoveredDBs)
|
|
}
|
|
|
|
// Fill in dynamic fields under lock.
|
|
// C1: lastDBDump mutation also happens here to prevent data races with GetFullStatus.
|
|
// C2: snapshot history reversal happens before cachedStatus assignment (inside lock).
|
|
m.mu.Lock()
|
|
status.Running = m.running
|
|
status.LastDBDump = m.lastDBDump
|
|
status.LastBackup = m.lastBackup
|
|
status.LastCheckTime = m.lastCheckTime
|
|
status.LastCheckOK = m.lastCheckOK
|
|
status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory))
|
|
copy(status.SnapshotHistory, m.snapshotHistory)
|
|
|
|
// C1: Cross-check lastDBDump results inside lock to prevent torn writes.
|
|
if m.lastDBDump != nil && len(files) > 0 {
|
|
fileValidation := make(map[string]DumpValidation) // keyed by filename
|
|
for _, f := range files {
|
|
fileValidation[f.FileName] = f.Validation
|
|
}
|
|
for i, r := range m.lastDBDump.Results {
|
|
if !r.Validation.Valid && r.Validation.Error == "" && r.FilePath != "" {
|
|
filename := filepath.Base(r.FilePath)
|
|
if fv, ok := fileValidation[filename]; ok {
|
|
m.lastDBDump.Results[i].Validation = fv
|
|
m.logger.Printf("[INFO] Re-validated %s from disk: valid=%v tables=%d",
|
|
filename, fv.Valid, fv.TableCount)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// C2: Reverse snapshot history before assigning to cachedStatus (inside lock).
|
|
for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 {
|
|
status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i]
|
|
}
|
|
|
|
m.cachedStatus = status
|
|
m.cacheTime = time.Now()
|
|
m.mu.Unlock()
|
|
|
|
m.logger.Printf("[INFO] Backup status cache refreshed")
|
|
}
|
|
|
|
// GetFullStatus returns the cached backup status for page rendering.
|
|
// Returns instantly — no subprocess calls.
|
|
// Returns a deep copy so callers can safely append to slice fields without
|
|
// polluting the cache (which would cause duplicate entries on repeated calls).
|
|
func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupStatus {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
if m.cachedStatus != nil {
|
|
// Deep copy — callers (backupsHandler) append to CrossDriveSummary,
|
|
// UnconfiguredApps, and CrossDriveWarnings. If we returned the cache
|
|
// pointer directly, every page load would accumulate more entries.
|
|
status := *m.cachedStatus
|
|
status.AppDataInfo = make([]AppBackupInfo, len(m.cachedStatus.AppDataInfo))
|
|
copy(status.AppDataInfo, m.cachedStatus.AppDataInfo)
|
|
status.PerDriveRepoStats = make([]DriveRepoInfo, len(m.cachedStatus.PerDriveRepoStats))
|
|
copy(status.PerDriveRepoStats, m.cachedStatus.PerDriveRepoStats)
|
|
// These three slices are assembled by the handler from AppDataInfo + settings;
|
|
// they must always start empty so the handler builds them fresh.
|
|
status.CrossDriveSummary = nil
|
|
status.UnconfiguredApps = nil
|
|
status.CrossDriveWarnings = nil
|
|
|
|
// Update dynamic fields that don't need subprocess calls
|
|
status.Running = m.running
|
|
status.NextDBDump = nextDBDump
|
|
status.NextBackup = nextBackup
|
|
// C4: Deep-copy lastDBDump and lastBackup so callers cannot mutate shared state.
|
|
if m.lastDBDump != nil {
|
|
copyDump := *m.lastDBDump
|
|
if len(m.lastDBDump.Results) > 0 {
|
|
copyDump.Results = make([]DumpResult, len(m.lastDBDump.Results))
|
|
copy(copyDump.Results, m.lastDBDump.Results)
|
|
}
|
|
status.LastDBDump = ©Dump
|
|
}
|
|
if m.lastBackup != nil {
|
|
copyBackup := *m.lastBackup
|
|
status.LastBackup = ©Backup
|
|
}
|
|
// Update snapshot history
|
|
status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory))
|
|
copy(status.SnapshotHistory, m.snapshotHistory)
|
|
// Reverse so newest first
|
|
for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 {
|
|
status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i]
|
|
}
|
|
|
|
// Synthesize LastBackup from snapshot history if not in memory (e.g., after restart)
|
|
if status.LastBackup == nil && len(status.SnapshotHistory) > 0 {
|
|
latest := status.SnapshotHistory[0] // already reversed, newest first
|
|
status.LastBackup = &BackupStatus{
|
|
LastRun: latest.Time,
|
|
Success: latest.Success,
|
|
Snapshot: &SnapshotResult{
|
|
SnapshotID: latest.SnapshotID,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Synthesize LastDBDump from DumpFiles on disk if not in memory
|
|
if status.LastDBDump == nil && len(status.DumpFiles) > 0 {
|
|
var results []DumpResult
|
|
var latestTime time.Time
|
|
for _, f := range status.DumpFiles {
|
|
results = append(results, DumpResult{
|
|
DB: DiscoveredDB{StackName: f.StackName, DBType: f.DBType, ContainerName: f.StackName},
|
|
FilePath: f.FileName,
|
|
Size: f.Size,
|
|
Validation: f.Validation,
|
|
})
|
|
if f.ModTime.After(latestTime) {
|
|
latestTime = f.ModTime
|
|
}
|
|
}
|
|
status.LastDBDump = &DBDumpStatus{
|
|
LastRun: latestTime,
|
|
Results: results,
|
|
Success: true,
|
|
}
|
|
}
|
|
|
|
return &status
|
|
}
|
|
|
|
// No cache yet — return a minimal status (first page load before cache is populated)
|
|
return &FullBackupStatus{
|
|
Enabled: m.cfg.Backup.Enabled,
|
|
Running: m.running,
|
|
LastDBDump: m.lastDBDump,
|
|
LastBackup: m.lastBackup,
|
|
DBDumpSchedule: m.cfg.Backup.DBDumpSchedule,
|
|
ResticSchedule: m.cfg.Backup.ResticSchedule,
|
|
PruneSchedule: m.cfg.Backup.PruneSchedule,
|
|
NextDBDump: nextDBDump,
|
|
NextBackup: nextBackup,
|
|
Retention: m.cfg.Backup.Retention,
|
|
LastCheckTime: m.lastCheckTime,
|
|
LastCheckOK: m.lastCheckOK,
|
|
}
|
|
}
|
|
|
|
func dbNames(dbs []DiscoveredDB) string {
|
|
var names []string
|
|
for _, db := range dbs {
|
|
names = append(names, fmt.Sprintf("%s(%s)", db.ContainerName, db.DBType))
|
|
}
|
|
return strings.Join(names, ", ")
|
|
}
|
|
|
|
// dedup removes duplicate strings from a slice, preserving order.
|
|
func dedup(items []string) []string {
|
|
seen := make(map[string]bool)
|
|
var result []string
|
|
for _, item := range items {
|
|
if !seen[item] {
|
|
seen[item] = true
|
|
result = append(result, item)
|
|
}
|
|
}
|
|
return result
|
|
}
|