From 70d503a90213d45e9057f1f9ab38be9acbab2f21 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Wed, 18 Feb 2026 19:46:16 +0100 Subject: [PATCH] fix(backup): 4 bug fixes from v0.14.1 code review (v0.14.2) Bug 1 (HIGH): add --exclude _* to rsync --delete so _db/ and _config/ directories are never deleted between backup runs (crossdrive.go) Bug 2 (MEDIUM): refactor RunDBDumps/RunBackup/RunFullBackup to use acquireRunning/releaseRunning helpers; extract runDBDumpsInternal and runBackupInternal so all three public entry points set m.running and RunFullBackup no longer deadlocks calling the public methods (backup.go) Bug 3 (MEDIUM): log [WARN] when GetDiskUsage returns nil in ValidateDestination instead of silently skipping space checks (crossdrive.go) Bug 4 (MEDIUM): add [WARN] on empty SystemDataPath in NewManager; add [ERROR] in GetAppDrivePath; guard DumpStackDB against empty/relative paths (backup.go) Co-Authored-By: Claude Sonnet 4.6 --- controller/internal/backup/backup.go | 64 ++++++++++++++++++------ controller/internal/backup/crossdrive.go | 34 +++++++------ 2 files changed, 70 insertions(+), 28 deletions(-) diff --git a/controller/internal/backup/backup.go b/controller/internal/backup/backup.go index 03cb40b..a4f1dfd 100644 --- a/controller/internal/backup/backup.go +++ b/controller/internal/backup/backup.go @@ -132,6 +132,9 @@ type BackupStatus struct { // NewManager creates a new backup manager. func NewManager(cfg *config.Config, pinger *monitor.Pinger, sett *settings.Settings, logger *log.Logger) *Manager { + if cfg.Paths.SystemDataPath == "" { + logger.Printf("[WARN] SystemDataPath is empty in config — SSD-only apps will not have correct backup paths") + } return &Manager{ cfg: cfg, restic: NewResticManager(cfg, logger), @@ -150,6 +153,9 @@ func (m *Manager) GetAppDrivePath(stackName string) string { return hddPath } } + if m.systemDataPath == "" { + m.logger.Printf("[ERROR] systemDataPath is empty — cannot determine drive for %s", stackName) + } return m.systemDataPath } @@ -179,6 +185,15 @@ func (m *Manager) activeDrives() []string { // RunDBDumps discovers and dumps all databases to per-drive, per-app paths. func (m *Manager) RunDBDumps(ctx context.Context) error { + if err := m.acquireRunning(); err != nil { + return err + } + defer m.releaseRunning() + return m.runDBDumpsInternal(ctx) +} + +// runDBDumpsInternal is the implementation of RunDBDumps. Caller must hold the running flag. +func (m *Manager) runDBDumpsInternal(ctx context.Context) error { start := time.Now() m.logger.Printf("[INFO] Starting database dump run") @@ -270,6 +285,15 @@ func (m *Manager) RunDBDumps(ctx context.Context) error { // RunBackup runs per-drive restic backup snapshots. func (m *Manager) RunBackup(ctx context.Context) error { + if err := m.acquireRunning(); err != nil { + return err + } + defer m.releaseRunning() + return m.runBackupInternal(ctx) +} + +// runBackupInternal is the implementation of RunBackup. Caller must hold the running flag. +func (m *Manager) runBackupInternal(ctx context.Context) error { start := time.Now() m.logger.Printf("[INFO] Starting restic backup (per-drive)") @@ -452,27 +476,18 @@ func (m *Manager) RunIntegrityCheck(ctx context.Context) error { // RunFullBackup runs DB dumps followed by restic backup. func (m *Manager) RunFullBackup(ctx context.Context) error { - m.mu.Lock() - if m.running { - m.mu.Unlock() - return fmt.Errorf("backup already in progress") + if err := m.acquireRunning(); err != nil { + return err } - m.running = true - m.mu.Unlock() - - defer func() { - m.mu.Lock() - m.running = false - m.mu.Unlock() - }() + defer m.releaseRunning() // Step 1: DB dumps - if err := m.RunDBDumps(ctx); err != nil { + if err := m.runDBDumpsInternal(ctx); err != nil { m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway") } // Step 2: Restic backup - return m.RunBackup(ctx) + return m.runBackupInternal(ctx) } // GetStatus returns the current backup status. @@ -498,6 +513,24 @@ func (m *Manager) IsRunning() bool { return m.running } +// acquireRunning atomically sets the running flag. Returns error if already running. +func (m *Manager) acquireRunning() error { + m.mu.Lock() + defer m.mu.Unlock() + if m.running { + return fmt.Errorf("backup already in progress") + } + m.running = true + return nil +} + +// releaseRunning clears the running flag. +func (m *Manager) releaseRunning() { + m.mu.Lock() + m.running = false + m.mu.Unlock() +} + // GetResticPassword returns the restic repository encryption password. func (m *Manager) GetResticPassword() (string, error) { return m.restic.GetPassword() @@ -568,6 +601,9 @@ func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error { } drivePath := m.GetAppDrivePath(stackName) + if drivePath == "" || !filepath.IsAbs(drivePath) { + return fmt.Errorf("cannot determine absolute drive path for %s (systemDataPath not configured?)", stackName) + } dumpDir := AppDBDumpPath(drivePath, stackName) m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir) diff --git a/controller/internal/backup/crossdrive.go b/controller/internal/backup/crossdrive.go index e897ca0..2b83a2e 100644 --- a/controller/internal/backup/crossdrive.go +++ b/controller/internal/backup/crossdrive.go @@ -212,20 +212,23 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error { if !system.IsWritable(path) { return fmt.Errorf("destination %s is not writable", path) } - if di := system.GetDiskUsage(path); di != nil { - if onSystemDrive { - // System drive: protect OS stability — require ≥10 GB free and <90% used - if di.AvailGB < 10 { - return fmt.Errorf("destination %s is on the system drive with only %.1f GB free — at least 10 GB required to protect OS stability", path, di.AvailGB) - } - if di.UsedPercent >= 90 { - return fmt.Errorf("destination %s is on the system drive at %.0f%% capacity — maximum 90%% allowed", path, di.UsedPercent) - } - } else { - // External drive: just ensure it's not completely full - if di.AvailGB < 0.1 { - return fmt.Errorf("destination %s has insufficient free space (%.1f GB free)", path, di.AvailGB) - } + di := system.GetDiskUsage(path) + if di == nil { + r.logger.Printf("[WARN] Cannot determine disk usage for %s — proceeding without space verification", path) + return nil + } + if onSystemDrive { + // System drive: protect OS stability — require ≥10 GB free and <90% used + if di.AvailGB < 10 { + return fmt.Errorf("destination %s is on the system drive with only %.1f GB free — at least 10 GB required to protect OS stability", path, di.AvailGB) + } + if di.UsedPercent >= 90 { + return fmt.Errorf("destination %s is on the system drive at %.0f%% capacity — maximum 90%% allowed", path, di.UsedPercent) + } + } else { + // External drive: just ensure it's not completely full + if di.AvailGB < 0.1 { + return fmt.Errorf("destination %s has insufficient free space (%.1f GB free)", path, di.AvailGB) } } return nil @@ -263,8 +266,11 @@ func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBa src := strings.TrimRight(srcMount, "/") + "/" dst := strings.TrimRight(dstPath, "/") + "/" + // Exclude controller-managed directories (underscore prefix) to prevent --delete from removing + // _db/ and _config/ that were created by previous backup runs. // Exclude app-internal DB dump files — the controller handles DB backups via pg_dump separately. cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", + "--exclude", "_*", "--exclude", "backups/*.sql.gz", "--exclude", "backups/*.sql", "--exclude", "backups/*.dump",