Files
deploy-felhom-compose/controller/internal/backup/crossdrive.go
T
admin bdbe170a54 feat: storage watchdog — USB disconnect detection, auto-stop, safe eject, auto-reconnect (v0.17.0)
New storage watchdog monitors registered storage paths every 5s. On disconnect
(3 consecutive probe failures), auto-stops affected apps, lazy-unmounts stale
VFS entries, fires alerts/notifications/hub report. On reconnect (UUID detected),
auto-remounts via fstab, cleans stale restic locks, offers app restart.

Safe disconnect UI for USB drives: confirmation dialog, stop apps, sync, unmount.
Disconnected state visible across all pages (dashboard, settings, backups, monitoring)
with hatched red bars and badges. Backup guards skip disconnected drives.

22 files changed (1 new: monitor/watchdog.go), ~1500 lines added.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 19:42:26 +01:00

597 lines
20 KiB
Go

package backup
import (
"context"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
)
// DBDumper can run a database dump for a specific stack.
type DBDumper interface {
DumpStackDB(ctx context.Context, stackName string) error
}
// CrossDriveRunner handles per-app backup to secondary storage.
type CrossDriveRunner struct {
sett *settings.Settings
stackProvider StackDataProvider
dbDumper DBDumper
systemDataPath string // fallback drive for SSD-only apps
stacksDir string // path to stacks dir (for infra backup)
controllerYAMLPath string // path to controller.yaml (for infra backup)
logger *log.Logger
mu sync.Mutex
running map[string]bool // per-app running state
}
// NewCrossDriveRunner creates a new CrossDriveRunner.
func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger) *CrossDriveRunner {
return &CrossDriveRunner{
sett: sett,
stackProvider: provider,
systemDataPath: systemDataPath,
stacksDir: stacksDir,
controllerYAMLPath: "/opt/docker/felhom-controller/controller.yaml",
logger: logger,
running: make(map[string]bool),
}
}
// SetDBDumper sets the DB dumper for pre-backup database dumps.
// Called after backup manager is initialized (avoids circular init dependency).
func (r *CrossDriveRunner) SetDBDumper(d DBDumper) {
r.dbDumper = d
}
// getAppDrivePath returns the drive path for an app.
func (r *CrossDriveRunner) getAppDrivePath(stackName string) string {
if hddPath := r.stackProvider.GetStackHDDPath(stackName); hddPath != "" {
return hddPath
}
return r.systemDataPath
}
// RunAppBackup runs cross-drive backup for a single app.
func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) error {
cfg := r.sett.GetCrossDriveConfig(stackName)
if cfg == nil || !cfg.Enabled {
return fmt.Errorf("cross-drive backup not configured or disabled for %s", stackName)
}
// Prevent concurrent runs for the same app
r.mu.Lock()
if r.running[stackName] {
r.mu.Unlock()
return fmt.Errorf("cross-drive backup already running for %s", stackName)
}
r.running[stackName] = true
r.mu.Unlock()
defer func() {
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
}()
// Check if source or destination drive is disconnected
srcDrive := r.stackProvider.GetStackHDDPath(stackName)
if srcDrive != "" && r.sett.IsDisconnected(srcDrive) {
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
return fmt.Errorf("source drive disconnected: %s", srcDrive)
}
if r.sett.IsDisconnected(cfg.DestinationPath) {
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
return fmt.Errorf("destination drive disconnected: %s", cfg.DestinationPath)
}
// Mark as running in settings
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastStatus = "running"
})
start := time.Now()
r.logger.Printf("[INFO] Cross-drive backup starting: %s → %s (method: %s)",
stackName, cfg.DestinationPath, cfg.Method)
// Trigger fresh DB dump for this app before cross-drive backup
if r.dbDumper != nil {
if err := r.dbDumper.DumpStackDB(ctx, stackName); err != nil {
r.logger.Printf("[WARN] Pre-backup DB dump failed for %s: %v — proceeding with user data backup", stackName, err)
// Non-fatal: user data backup is still valuable without fresh dump
}
}
if err := r.ValidateDestination(cfg.DestinationPath); err != nil {
r.updateStatus(stackName, "error", err.Error(), time.Since(start), "")
return fmt.Errorf("destination validation failed: %w", err)
}
// Resolve HDD mounts for this app (may be empty for config-only apps)
mounts := r.stackProvider.GetStackHDDMounts(stackName)
// Safety: destination must not overlap with any source
for _, m := range mounts {
if system.PathsOverlap(cfg.DestinationPath, m) {
msg := fmt.Sprintf("destination %s overlaps with source %s — aborted", cfg.DestinationPath, m)
r.updateStatus(stackName, "error", msg, time.Since(start), "")
return fmt.Errorf("%s", msg)
}
}
var runErr error
switch cfg.Method {
case "rsync":
runErr = r.runRsyncBackup(ctx, stackName, cfg.DestinationPath, mounts)
case "restic":
runErr = r.runResticBackup(ctx, stackName, cfg.DestinationPath, mounts)
default:
runErr = fmt.Errorf("unknown backup method: %s", cfg.Method)
}
duration := time.Since(start)
if runErr != nil {
r.logger.Printf("[ERROR] Cross-drive backup failed: %s: %v", stackName, runErr)
r.updateStatus(stackName, "error", runErr.Error(), duration, "")
return runErr
}
// Calculate backup size
var sizeHuman string
if cfg.Method == "rsync" {
destDir := AppSecondaryRsyncPath(cfg.DestinationPath, stackName)
if sz, err := dirSizeBytes(destDir); err == nil {
sizeHuman = humanizeBytes(sz)
}
}
r.logger.Printf("[INFO] Cross-drive backup completed: %s (%s)", stackName, duration.Round(time.Second))
r.updateStatus(stackName, "ok", "", duration, sizeHuman)
return nil
}
// RunAllScheduled runs cross-drive backups for all apps matching the schedule.
// Runs sequentially (disk I/O bound).
func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) error {
// Auto-enable Tier 2 for small apps (no HDD mounts) before running backups
r.AutoEnableSmallApps()
// Sync infrastructure config to all secondary destinations
r.syncInfraConfig(ctx)
configs := r.sett.GetAllCrossDriveConfigs()
if len(configs) == 0 {
return nil
}
var errs []string
for stackName, cfg := range configs {
if !cfg.Enabled {
continue
}
if cfg.Schedule != schedule {
continue
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
if err := r.RunAppBackup(ctx, stackName); err != nil {
errs = append(errs, fmt.Sprintf("%s: %v", stackName, err))
}
}
if len(errs) > 0 {
return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; "))
}
return nil
}
// IsRunning returns true if the given app's backup is currently running.
func (r *CrossDriveRunner) IsRunning(stackName string) bool {
r.mu.Lock()
defer r.mu.Unlock()
return r.running[stackName]
}
// ValidateDestination checks that the destination path exists, is writable,
// and has sufficient free space. System-drive destinations get stricter limits
// (≥10 GB free, <90% used) to protect OS stability; external drives just need
// ≥100 MB. Non-mount-point destinations are allowed with a logged warning.
func (r *CrossDriveRunner) ValidateDestination(path string) error {
if path == "" {
return fmt.Errorf("destination path is empty")
}
if _, err := os.Stat(path); os.IsNotExist(err) {
return fmt.Errorf("destination %s does not exist", path)
}
onSystemDrive := !system.IsMountPoint(path)
if onSystemDrive {
r.logger.Printf("[WARN] Destination %s is not a separate mount point (system drive) — backup will proceed but data is not protected against drive failure", path)
}
if !system.IsWritable(path) {
return fmt.Errorf("destination %s is not writable", path)
}
di := system.GetDiskUsage(path)
if di == nil {
r.logger.Printf("[WARN] Cannot determine disk usage for %s — proceeding without space verification", path)
return nil
}
if onSystemDrive {
// System drive: protect OS stability — require ≥10 GB free and <90% used
if di.AvailGB < 10 {
return fmt.Errorf("destination %s is on the system drive with only %.1f GB free — at least 10 GB required to protect OS stability", path, di.AvailGB)
}
if di.UsedPercent >= 90 {
return fmt.Errorf("destination %s is on the system drive at %.0f%% capacity — maximum 90%% allowed", path, di.UsedPercent)
}
} else {
// External drive: just ensure it's not completely full
if di.AvailGB < 0.1 {
return fmt.Errorf("destination %s has insufficient free space (%.1f GB free)", path, di.AvailGB)
}
}
return nil
}
// --- rsync ---
func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
destDir := AppSecondaryRsyncPath(destBase, stackName)
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("creating rsync dest dir: %w", err)
}
for i, srcMount := range mounts {
var dstPath string
if len(mounts) == 1 {
// Single mount: rsync directly into the stack folder (no extra nesting)
dstPath = destDir
} else {
// Multiple mounts: use the leaf directory name as subfolder
leaf := filepath.Base(srcMount)
dstPath = filepath.Join(destDir, leaf)
// Disambiguate duplicate leaf names (e.g. two mounts both named "data")
if i > 0 {
if _, err := os.Stat(dstPath); err == nil {
dstPath = filepath.Join(destDir, fmt.Sprintf("%s_%d", leaf, i))
}
}
}
if err := os.MkdirAll(dstPath, 0755); err != nil {
return fmt.Errorf("creating rsync destination: %w", err)
}
// Ensure trailing slash on source for rsync semantics (copy contents, not the dir itself)
src := strings.TrimRight(srcMount, "/") + "/"
dst := strings.TrimRight(dstPath, "/") + "/"
// Exclude controller-managed directories (underscore prefix) to prevent --delete from removing
// _db/ and _config/ that were created by previous backup runs.
// Exclude app-internal DB dump files — the controller handles DB backups via pg_dump separately.
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete",
"--exclude", "_*",
"--exclude", "backups/*.sql.gz",
"--exclude", "backups/*.sql",
"--exclude", "backups/*.dump",
src, dst)
r.logger.Printf("[DEBUG] rsync: %s → %s", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out)))
}
}
// --- Copy DB dumps for this stack from its home drive ---
dbDestDir := filepath.Join(destDir, "_db")
if err := os.MkdirAll(dbDestDir, 0755); err != nil {
return fmt.Errorf("creating DB dump dest dir: %w", err)
}
if err := r.copyStackDBDumps(stackName, dbDestDir); err != nil {
r.logger.Printf("[WARN] Cross-drive DB dump copy failed for %s: %v", stackName, err)
// Non-fatal: user data is the primary concern
}
// --- Rsync app config (compose dir) ---
if composePath, ok := r.stackProvider.GetStackComposePath(stackName); ok {
configSrcDir := filepath.Dir(composePath)
configDestDir := filepath.Join(destDir, "_config")
if err := os.MkdirAll(configDestDir, 0755); err != nil {
return fmt.Errorf("creating config dest dir: %w", err)
}
src := strings.TrimRight(configSrcDir, "/") + "/"
dst := strings.TrimRight(configDestDir, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", src, dst)
r.logger.Printf("[DEBUG] rsync config: %s → %s", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
r.logger.Printf("[WARN] Cross-drive config rsync failed for %s: %v (%s)", stackName, err, strings.TrimSpace(string(out)))
// Non-fatal
}
}
return nil
}
// --- restic ---
func (r *CrossDriveRunner) runResticBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
repoPath := SecondaryResticRepoPath(destBase)
// Get or create the cross-drive restic password
password, err := r.sett.GetOrCreateCrossDrivePassword()
if err != nil {
return fmt.Errorf("getting restic password: %w", err)
}
// H6: Write password to temp file with safe cleanup order (close before deferred remove).
pwFile, err := os.CreateTemp("", "felhom-crossdrive-pw-*")
if err != nil {
return fmt.Errorf("creating password file: %w", err)
}
pwPath := pwFile.Name()
if _, err := pwFile.WriteString(password); err != nil {
pwFile.Close()
os.Remove(pwPath)
return fmt.Errorf("writing password file: %w", err)
}
pwFile.Close()
defer os.Remove(pwPath)
// Ensure repo is initialized
if err := r.ensureResticRepo(ctx, repoPath, pwPath); err != nil {
return err
}
// Run restic backup
args := []string{
"backup", "--repo", repoPath,
"--password-file", pwPath,
"--tag", stackName,
"--tag", "cross-drive",
}
// Include user data (HDD mounts)
args = append(args, mounts...)
// Include app config dir (compose + app.yaml + .felhom.yml)
if composePath, ok := r.stackProvider.GetStackComposePath(stackName); ok {
args = append(args, filepath.Dir(composePath))
}
// Include DB dump dir for this app (from its home drive)
appDrive := r.getAppDrivePath(stackName)
dumpDir := AppDBDumpPath(appDrive, stackName)
if _, err := os.Stat(dumpDir); err == nil {
args = append(args, dumpDir)
}
// Include infrastructure paths (same as primary restic)
args = append(args, r.stacksDir)
if _, err := os.Stat(r.controllerYAMLPath); err == nil {
args = append(args, r.controllerYAMLPath)
}
cmd := exec.CommandContext(ctx, "restic", args...)
r.logger.Printf("[DEBUG] restic backup: %v", args)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("restic backup failed: %v (%s)", err, strings.TrimSpace(string(out)))
}
// H5: Prune old snapshots to prevent unbounded accumulation.
return r.pruneResticRepo(ctx, repoPath, pwPath)
}
// pruneResticRepo forgets old snapshots in a cross-drive restic repo, keeping recent ones.
func (r *CrossDriveRunner) pruneResticRepo(ctx context.Context, repoPath, pwPath string) error {
args := []string{
"forget", "--repo", repoPath,
"--password-file", pwPath,
"--keep-daily", "7",
"--keep-weekly", "4",
"--prune",
}
cmd := exec.CommandContext(ctx, "restic", args...)
r.logger.Printf("[DEBUG] restic forget (prune): %s", repoPath)
if out, err := cmd.CombinedOutput(); err != nil {
// Non-fatal: log warning but don't fail the backup
r.logger.Printf("[WARN] restic forget failed for %s: %v (%s)", repoPath, err, strings.TrimSpace(string(out)))
}
return nil
}
func (r *CrossDriveRunner) ensureResticRepo(ctx context.Context, repoPath, pwFile string) error {
// Check if repo config exists
if _, err := os.Stat(filepath.Join(repoPath, "config")); err == nil {
return nil // already initialized
}
if err := os.MkdirAll(repoPath, 0755); err != nil {
return fmt.Errorf("creating restic repo dir: %w", err)
}
cmd := exec.CommandContext(ctx, "restic", "init", "--repo", repoPath, "--password-file", pwFile)
r.logger.Printf("[INFO] Initializing cross-drive restic repo at %s", repoPath)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("restic init failed: %v (%s)", err, strings.TrimSpace(string(out)))
}
return nil
}
// copyStackDBDumps copies DB dump files for the given stack from its home drive.
// DB dumps are at <drive>/backups/primary/<stack>/db-dumps/<stack>_<dbtype>.sql.
func (r *CrossDriveRunner) copyStackDBDumps(stackName, destDir string) error {
appDrive := r.getAppDrivePath(stackName)
dumpDir := AppDBDumpPath(appDrive, stackName)
entries, err := os.ReadDir(dumpDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("reading DB dump dir: %w", err)
}
copied := 0
for _, e := range entries {
if e.IsDir() {
continue
}
src := filepath.Join(dumpDir, e.Name())
dst := filepath.Join(destDir, e.Name())
data, err := os.ReadFile(src)
if err != nil {
return fmt.Errorf("reading %s: %w", e.Name(), err)
}
if err := os.WriteFile(dst, data, 0644); err != nil {
return fmt.Errorf("writing %s: %w", e.Name(), err)
}
copied++
}
if copied > 0 {
r.logger.Printf("[DEBUG] Copied %d DB dump file(s) to %s", copied, destDir)
}
return nil
}
// --- infra backup ---
// syncInfraConfig rsyncs infrastructure config (stacks dir + controller.yaml) to all
// secondary backup destinations. Runs once per RunAllScheduled cycle, before per-app backups.
func (r *CrossDriveRunner) syncInfraConfig(ctx context.Context) {
// Collect unique destination drives from enabled cross-drive configs
destDrives := make(map[string]bool)
for _, cfg := range r.sett.GetAllCrossDriveConfigs() {
if cfg.Enabled && cfg.DestinationPath != "" {
destDrives[cfg.DestinationPath] = true
}
}
if len(destDrives) == 0 {
return
}
for dest := range destDrives {
infraDir := SecondaryInfraPath(dest)
if err := os.MkdirAll(infraDir, 0755); err != nil {
r.logger.Printf("[WARN] Cannot create infra backup dir %s: %v", infraDir, err)
continue
}
// Rsync stacks dir → _infra/stacks/
stacksDest := filepath.Join(infraDir, "stacks") + "/"
if err := os.MkdirAll(stacksDest, 0755); err == nil {
stacksSrc := strings.TrimRight(r.stacksDir, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", stacksSrc, stacksDest)
if out, err := cmd.CombinedOutput(); err != nil {
r.logger.Printf("[WARN] Infra rsync (stacks) failed for %s: %v (%s)", dest, err, strings.TrimSpace(string(out)))
}
}
// Copy controller.yaml → _infra/controller.yaml
if _, err := os.Stat(r.controllerYAMLPath); err == nil {
yamlDest := filepath.Join(infraDir, "controller.yaml")
data, err := os.ReadFile(r.controllerYAMLPath)
if err != nil {
r.logger.Printf("[WARN] Cannot read controller.yaml for infra backup: %v", err)
} else if err := os.WriteFile(yamlDest, data, 0644); err != nil {
r.logger.Printf("[WARN] Cannot write controller.yaml to %s: %v", yamlDest, err)
}
}
r.logger.Printf("[INFO] Infrastructure config synced to %s", infraDir)
}
}
// --- auto-enable ---
// AutoEnableSmallApps auto-configures cross-drive backup for apps without HDD user data
// when at least 2 storage paths are registered. Apps with existing cross-drive config
// (even if disabled) are never modified.
func (r *CrossDriveRunner) AutoEnableSmallApps() {
storagePaths := r.sett.GetStoragePaths()
if len(storagePaths) < 2 {
return // no secondary drive available
}
deployed := r.stackProvider.ListDeployedStacks()
existingConfigs := r.sett.GetAllCrossDriveConfigs()
for _, stack := range deployed {
// Skip if already has cross-drive config (user has touched it)
if _, exists := existingConfigs[stack.Name]; exists {
continue
}
// Skip if app has HDD mounts (large user data — needs manual config)
if mounts := r.stackProvider.GetStackHDDMounts(stack.Name); len(mounts) > 0 {
continue
}
// Find destination: first storage path that differs from the app's home drive
appDrive := r.getAppDrivePath(stack.Name)
var destPath string
for _, sp := range storagePaths {
if sp.Path != appDrive {
destPath = sp.Path
break
}
}
if destPath == "" {
continue // no suitable destination found
}
// Auto-configure daily rsync
cfg := &settings.CrossDriveBackup{
Enabled: true,
Method: "rsync",
DestinationPath: destPath,
Schedule: "daily",
}
if err := r.sett.SetCrossDriveConfig(stack.Name, cfg); err != nil {
r.logger.Printf("[WARN] Auto-enable Tier 2 failed for %s: %v", stack.Name, err)
continue
}
r.logger.Printf("[INFO] Auto-enabled Tier 2 backup for %s → %s (no HDD mounts, daily rsync)", stack.Name, destPath)
}
}
// --- helpers ---
func (r *CrossDriveRunner) updateStatus(stackName, status, errMsg string, duration time.Duration, sizeHuman string) {
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastRun = time.Now().UTC().Format(time.RFC3339)
c.LastStatus = status
c.LastError = errMsg
c.LastDuration = duration.Round(time.Second).String()
if sizeHuman != "" {
c.LastSizeHuman = sizeHuman
}
})
}
// dirSizeBytes returns the total byte size of all files under path.
// H7: Walk errors are now propagated instead of silently swallowed.
func dirSizeBytes(path string) (int64, error) {
var total int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err // propagate permission/IO errors
}
if !info.IsDir() {
total += info.Size()
}
return nil
})
return total, err
}