Files
deploy-felhom-compose/controller/internal/backup/crossdrive.go
T
admin db83db383c fix: deep bug hunt II — concurrency, security & optimization (25 files)
Critical: watchdog mutex panic safety, SetGeoAppOverride nil guard,
SSD-only app DB restore fallback.

High: double deploy race (atomic Deploying flag), delete/remove during
deploy guard, ScanStacks overwrite protection, FileBrowser mount mutex,
PushEvent history, PushOnce error handling, DB dump sync+close before
rename, restic retry fresh context, encrypt failure logging, cross-backup
path traversal validation, deepCopyStack completeness.

Security: constant-time API key comparison, login rate limiting (5/min),
git credential masking in logs, storage path prefix traversal fix.

Concurrency: MigrateEncryption lock ordering, SubdomainInUse I/O outside
lock, scheduler late-registered jobs, SQLite WAL verification, metrics
shutdown context, telemetry scan error logging, asset sync lock scope.

Optimization: streaming file copy for DB dumps, restic stats dedup,
atomic infra config copy.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 14:21:09 +01:00

666 lines
21 KiB
Go

package backup
import (
"context"
"fmt"
"io"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
"gitea.dooplex.hu/admin/felhom-controller/internal/util"
)
// DBDumper can run a database dump for a specific stack.
type DBDumper interface {
DumpStackDB(ctx context.Context, stackName string) error
}
// CrossDriveRunner handles per-app backup to secondary storage.
type CrossDriveRunner struct {
sett *settings.Settings
stackProvider StackDataProvider
dbDumper DBDumper
systemDataPath string // fallback drive for SSD-only apps
stacksDir string // path to stacks dir (for infra backup)
controllerYAMLPath string // path to controller.yaml (for infra backup)
logger *log.Logger
debug bool
mu sync.Mutex
running map[string]bool // per-app running state
}
// NewCrossDriveRunner creates a new CrossDriveRunner.
func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger, debug bool) *CrossDriveRunner {
return &CrossDriveRunner{
sett: sett,
stackProvider: provider,
systemDataPath: systemDataPath,
stacksDir: stacksDir,
controllerYAMLPath: "/opt/docker/felhom-controller/controller.yaml",
logger: logger,
debug: debug,
running: make(map[string]bool),
}
}
// SetDBDumper sets the DB dumper for pre-backup database dumps.
// Called after backup manager is initialized (avoids circular init dependency).
func (r *CrossDriveRunner) SetDBDumper(d DBDumper) {
r.dbDumper = d
}
// GetAppDrivePath returns the drive path for an app (HDD path or system data path fallback).
func (r *CrossDriveRunner) GetAppDrivePath(stackName string) string {
if hddPath := r.stackProvider.GetStackHDDPath(stackName); hddPath != "" {
return hddPath
}
return r.systemDataPath
}
// RunAppBackup runs cross-drive backup for a single app.
func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) error {
cfg := r.sett.GetCrossDriveConfig(stackName)
if cfg == nil || !cfg.Enabled {
return fmt.Errorf("cross-drive backup not configured or disabled for %s", stackName)
}
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: starting for %s, dest=%s, schedule=%s, method=%s",
stackName, cfg.DestinationPath, cfg.Schedule, cfg.Method)
}
// Prevent concurrent runs for the same app
r.mu.Lock()
if r.running[stackName] {
r.mu.Unlock()
return fmt.Errorf("cross-drive backup already running for %s", stackName)
}
r.running[stackName] = true
r.mu.Unlock()
defer func() {
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
}()
// Check if source or destination drive is disconnected
srcDrive := r.stackProvider.GetStackHDDPath(stackName)
if srcDrive != "" && r.sett.IsDisconnected(srcDrive) {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: source drive disconnected for %s: %s", stackName, srcDrive)
}
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
return fmt.Errorf("source drive disconnected: %s", srcDrive)
}
if r.sett.IsDisconnected(cfg.DestinationPath) {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: destination drive disconnected for %s: %s", stackName, cfg.DestinationPath)
}
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
return fmt.Errorf("destination drive disconnected: %s", cfg.DestinationPath)
}
// Mark as running in settings
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastStatus = "running"
})
start := time.Now()
r.logger.Printf("[INFO] Cross-drive backup starting: %s → %s (rsync)",
stackName, cfg.DestinationPath)
// Trigger fresh DB dump for this app before cross-drive backup
if r.dbDumper != nil {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: triggering pre-backup DB dump for %s", stackName)
}
if err := r.dbDumper.DumpStackDB(ctx, stackName); err != nil {
r.logger.Printf("[WARN] Pre-backup DB dump failed for %s: %v — proceeding with user data backup", stackName, err)
// Non-fatal: user data backup is still valuable without fresh dump
}
}
if err := r.ValidateDestination(cfg.DestinationPath); err != nil {
r.updateStatus(stackName, "error", err.Error(), time.Since(start), "")
return fmt.Errorf("destination validation failed: %w", err)
}
// Resolve HDD mounts for this app (may be empty for config-only apps)
mounts := r.stackProvider.GetStackHDDMounts(stackName)
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: %s has %d HDD mount(s): %v", stackName, len(mounts), mounts)
}
// Safety: destination must not overlap with any source
for _, m := range mounts {
if system.PathsOverlap(cfg.DestinationPath, m) {
msg := fmt.Sprintf("destination %s overlaps with source %s — aborted", cfg.DestinationPath, m)
r.updateStatus(stackName, "error", msg, time.Since(start), "")
return fmt.Errorf("%s", msg)
}
}
runErr := r.runRsyncBackup(ctx, stackName, cfg.DestinationPath, mounts)
duration := time.Since(start)
if runErr != nil {
r.logger.Printf("[ERROR] Cross-drive backup failed: %s: %v", stackName, runErr)
r.updateStatus(stackName, "error", runErr.Error(), duration, "")
return runErr
}
// Calculate backup size
var sizeHuman string
destDir := AppSecondaryRsyncPath(cfg.DestinationPath, stackName)
if sz, err := dirSizeBytes(destDir); err == nil {
sizeHuman = humanizeBytes(sz)
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: %s backup size at destination: %s", stackName, sizeHuman)
}
}
r.logger.Printf("[INFO] Cross-drive backup completed: %s (%s)", stackName, duration.Round(time.Second))
r.updateStatus(stackName, "ok", "", duration, sizeHuman)
return nil
}
// RunAllScheduled runs cross-drive backups for all apps matching the schedule.
// Runs sequentially (disk I/O bound).
func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) error {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: starting for schedule=%s", schedule)
}
// Auto-enable Tier 2 for small apps (no HDD mounts) before running backups
r.AutoEnableSmallApps()
// Sync infrastructure config to all secondary destinations
r.syncInfraConfig(ctx)
configs := r.sett.GetAllCrossDriveConfigs()
if len(configs) == 0 {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: no cross-drive configs found")
}
return nil
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: %d total cross-drive config(s) found", len(configs))
}
var errs []string
var scheduled, skippedDisabled, skippedWrongSchedule int
for stackName, cfg := range configs {
if !cfg.Enabled {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — disabled", stackName)
}
skippedDisabled++
continue
}
if cfg.Schedule != schedule {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — schedule mismatch (has=%s, want=%s)", stackName, cfg.Schedule, schedule)
}
skippedWrongSchedule++
continue
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: queuing %s for backup (dest=%s)", stackName, cfg.DestinationPath)
}
scheduled++
select {
case <-ctx.Done():
return ctx.Err()
default:
}
if err := r.RunAppBackup(ctx, stackName); err != nil {
errs = append(errs, fmt.Sprintf("%s: %v", stackName, err))
}
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: done — %d scheduled, %d disabled, %d wrong schedule, %d errors",
scheduled, skippedDisabled, skippedWrongSchedule, len(errs))
}
if len(errs) > 0 {
return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; "))
}
return nil
}
// RunAllConfigured runs cross-drive backup for all enabled apps, ignoring schedule.
// Used by the debug page to trigger all backups regardless of their configured schedule.
func (r *CrossDriveRunner) RunAllConfigured(ctx context.Context) error {
if r.debug {
r.logger.Printf("[DEBUG] RunAllConfigured: starting for all enabled apps")
}
r.AutoEnableSmallApps()
r.syncInfraConfig(ctx)
configs := r.sett.GetAllCrossDriveConfigs()
if len(configs) == 0 {
return nil
}
var errs []string
var ran int
for stackName, cfg := range configs {
if !cfg.Enabled {
continue
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
ran++
if err := r.RunAppBackup(ctx, stackName); err != nil {
errs = append(errs, fmt.Sprintf("%s: %v", stackName, err))
}
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllConfigured: done — %d ran, %d errors", ran, len(errs))
}
if len(errs) > 0 {
return fmt.Errorf("cross-drive errors: %s", strings.Join(errs, "; "))
}
return nil
}
// IsRunning returns true if the given app's backup is currently running.
func (r *CrossDriveRunner) IsRunning(stackName string) bool {
r.mu.Lock()
defer r.mu.Unlock()
return r.running[stackName]
}
// AnyRunning returns true if any cross-drive backup is currently in progress.
func (r *CrossDriveRunner) AnyRunning() bool {
r.mu.Lock()
defer r.mu.Unlock()
for _, running := range r.running {
if running {
return true
}
}
return false
}
// ValidateDestination checks that the destination path exists, is writable,
// and has sufficient free space. System-drive destinations get stricter limits
// (≥10 GB free, <90% used) to protect OS stability; external drives just need
// ≥100 MB. Non-mount-point destinations are allowed with a logged warning.
func (r *CrossDriveRunner) ValidateDestination(path string) error {
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: checking path=%s", path)
}
if path == "" {
return fmt.Errorf("destination path is empty")
}
if r.sett.IsDecommissioned(path) {
return fmt.Errorf("destination %s is decommissioned — choose an active drive", path)
}
if _, err := os.Stat(path); os.IsNotExist(err) {
return fmt.Errorf("destination %s does not exist", path)
}
onSystemDrive := !system.IsMountPoint(path)
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s, isMountPoint=%v", path, !onSystemDrive)
}
if onSystemDrive {
r.logger.Printf("[WARN] Destination %s is not a separate mount point (system drive) — backup will proceed but data is not protected against drive failure", path)
}
if !system.IsWritable(path) {
return fmt.Errorf("destination %s is not writable", path)
}
di := system.GetDiskUsage(path)
if di == nil {
r.logger.Printf("[WARN] Cannot determine disk usage for %s — proceeding without space verification", path)
return nil
}
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s, availGB=%.1f, usedPct=%.0f%%, onSystemDrive=%v",
path, di.AvailGB, di.UsedPercent, onSystemDrive)
}
if onSystemDrive {
// System drive: protect OS stability — require ≥10 GB free and <90% used
if di.AvailGB < 10 {
return fmt.Errorf("destination %s is on the system drive with only %.1f GB free — at least 10 GB required to protect OS stability", path, di.AvailGB)
}
if di.UsedPercent >= 90 {
return fmt.Errorf("destination %s is on the system drive at %.0f%% capacity — maximum 90%% allowed", path, di.UsedPercent)
}
} else {
// External drive: just ensure it's not completely full
if di.AvailGB < 0.1 {
return fmt.Errorf("destination %s has insufficient free space (%.1f GB free)", path, di.AvailGB)
}
}
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s passed all checks", path)
}
return nil
}
// --- rsync ---
func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
destDir := AppSecondaryRsyncPath(destBase, stackName)
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: stack=%s, destBase=%s, destDir=%s, %d mount(s)", stackName, destBase, destDir, len(mounts))
}
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("creating rsync dest dir: %w", err)
}
seen := make(map[string]bool)
for _, srcMount := range mounts {
var dstPath string
if len(mounts) == 1 {
// Single mount: rsync directly into the stack folder (no extra nesting)
dstPath = destDir
} else {
// Multiple mounts: use the leaf directory name as subfolder
leaf := filepath.Base(srcMount)
if seen[leaf] {
// Disambiguate duplicate leaf names (e.g. two mounts both named "data")
for j := 2; ; j++ {
candidate := fmt.Sprintf("%s_%d", leaf, j)
if !seen[candidate] {
leaf = candidate
break
}
}
}
seen[leaf] = true
dstPath = filepath.Join(destDir, leaf)
}
if err := os.MkdirAll(dstPath, 0755); err != nil {
return fmt.Errorf("creating rsync destination: %w", err)
}
// Ensure trailing slash on source for rsync semantics (copy contents, not the dir itself)
src := strings.TrimRight(srcMount, "/") + "/"
dst := strings.TrimRight(dstPath, "/") + "/"
// Exclude controller-managed directories (underscore prefix) to prevent --delete from removing
// _db/ and _config/ that were created by previous backup runs.
// Exclude app-internal DB dump files — the controller handles DB backups via pg_dump separately.
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete",
"--exclude", "_*",
"--exclude", "backups/*.sql.gz",
"--exclude", "backups/*.sql",
"--exclude", "backups/*.dump",
src, dst)
r.logger.Printf("[DEBUG] rsync: %s → %s", src, dst)
out, err := cmd.CombinedOutput()
if err != nil {
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: rsync failed for %s: %s", srcMount, util.TruncateStr(strings.TrimSpace(string(out)), 500))
}
return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out)))
}
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: rsync OK for mount %s → %s", src, dst)
}
}
// --- Copy DB dumps for this stack from its home drive ---
dbDestDir := filepath.Join(destDir, "_db")
if err := os.MkdirAll(dbDestDir, 0755); err != nil {
return fmt.Errorf("creating DB dump dest dir: %w", err)
}
if err := r.copyStackDBDumps(stackName, dbDestDir); err != nil {
r.logger.Printf("[WARN] Cross-drive DB dump copy failed for %s: %v", stackName, err)
// Non-fatal: user data is the primary concern
}
// --- Rsync app config (compose dir) ---
if composePath, ok := r.stackProvider.GetStackComposePath(stackName); ok {
configSrcDir := filepath.Dir(composePath)
configDestDir := filepath.Join(destDir, "_config")
if err := os.MkdirAll(configDestDir, 0755); err != nil {
return fmt.Errorf("creating config dest dir: %w", err)
}
src := strings.TrimRight(configSrcDir, "/") + "/"
dst := strings.TrimRight(configDestDir, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", src, dst)
r.logger.Printf("[DEBUG] rsync config: %s → %s", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
r.logger.Printf("[WARN] Cross-drive config rsync failed for %s: %v (%s)", stackName, err, strings.TrimSpace(string(out)))
// Non-fatal
}
}
return nil
}
// copyStackDBDumps copies DB dump files for the given stack from its home drive.
// DB dumps are at <drive>/backups/primary/<stack>/db-dumps/<stack>_<dbtype>.sql.
func (r *CrossDriveRunner) copyStackDBDumps(stackName, destDir string) error {
appDrive := r.GetAppDrivePath(stackName)
dumpDir := AppDBDumpPath(appDrive, stackName)
entries, err := os.ReadDir(dumpDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("reading DB dump dir: %w", err)
}
copied := 0
for _, e := range entries {
if e.IsDir() {
continue
}
src := filepath.Join(dumpDir, e.Name())
dst := filepath.Join(destDir, e.Name())
if err := copyFile(src, dst); err != nil {
return fmt.Errorf("copying %s: %w", e.Name(), err)
}
copied++
}
if copied > 0 {
r.logger.Printf("[DEBUG] Copied %d DB dump file(s) to %s", copied, destDir)
}
return nil
}
// --- infra backup ---
// syncInfraConfig rsyncs infrastructure config (stacks dir + controller.yaml) to all
// secondary backup destinations. Runs once per RunAllScheduled cycle, before per-app backups.
func (r *CrossDriveRunner) syncInfraConfig(ctx context.Context) {
// Collect unique destination drives from enabled cross-drive configs
destDrives := make(map[string]bool)
for _, cfg := range r.sett.GetAllCrossDriveConfigs() {
if cfg.Enabled && cfg.DestinationPath != "" {
destDrives[cfg.DestinationPath] = true
}
}
if len(destDrives) == 0 {
return
}
for dest := range destDrives {
infraDir := SecondaryInfraPath(dest)
if err := os.MkdirAll(infraDir, 0755); err != nil {
r.logger.Printf("[WARN] Cannot create infra backup dir %s: %v", infraDir, err)
continue
}
// Rsync stacks dir → _infra/stacks/
stacksDest := filepath.Join(infraDir, "stacks") + "/"
if err := os.MkdirAll(stacksDest, 0755); err == nil {
stacksSrc := strings.TrimRight(r.stacksDir, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", stacksSrc, stacksDest)
if out, err := cmd.CombinedOutput(); err != nil {
r.logger.Printf("[WARN] Infra rsync (stacks) failed for %s: %v (%s)", dest, err, strings.TrimSpace(string(out)))
}
}
// Copy controller.yaml → _infra/controller.yaml (atomic via copyFile)
if _, err := os.Stat(r.controllerYAMLPath); err == nil {
yamlDest := filepath.Join(infraDir, "controller.yaml")
if err := copyFile(r.controllerYAMLPath, yamlDest); err != nil {
r.logger.Printf("[WARN] Cannot copy controller.yaml to %s: %v", yamlDest, err)
}
}
r.logger.Printf("[INFO] Infrastructure config synced to %s", infraDir)
}
}
// --- auto-enable ---
// AutoEnableSmallApps auto-configures cross-drive backup for apps without HDD user data
// when at least 2 storage paths are registered. Apps with existing cross-drive config
// (even if disabled) are never modified.
func (r *CrossDriveRunner) AutoEnableSmallApps() {
storagePaths := r.sett.GetStoragePaths()
if len(storagePaths) < 2 {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: fewer than 2 storage paths (%d) — skipping", len(storagePaths))
}
return // no secondary drive available
}
deployed := r.stackProvider.ListDeployedStacks()
existingConfigs := r.sett.GetAllCrossDriveConfigs()
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: %d deployed stacks, %d existing configs, %d storage paths",
len(deployed), len(existingConfigs), len(storagePaths))
}
var autoEnabled int
for _, stack := range deployed {
// Skip if already has cross-drive config (user has touched it)
if _, exists := existingConfigs[stack.Name]; exists {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — already has cross-drive config", stack.Name)
}
continue
}
// Skip if app has HDD mounts (large user data — needs manual config)
if mounts := r.stackProvider.GetStackHDDMounts(stack.Name); len(mounts) > 0 {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — has %d HDD mount(s)", stack.Name, len(mounts))
}
continue
}
// Find destination: first active storage path that differs from the app's home drive
appDrive := r.GetAppDrivePath(stack.Name)
var destPath string
for _, sp := range storagePaths {
if sp.Path != appDrive && !sp.Disconnected && !sp.Decommissioned {
destPath = sp.Path
break
}
}
if destPath == "" {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — no suitable destination found", stack.Name)
}
continue // no suitable destination found
}
// Auto-configure daily rsync
cfg := &settings.CrossDriveBackup{
Enabled: true,
Method: "rsync",
DestinationPath: destPath,
Schedule: "daily",
}
if err := r.sett.SetCrossDriveConfig(stack.Name, cfg); err != nil {
r.logger.Printf("[WARN] Auto-enable Tier 2 failed for %s: %v", stack.Name, err)
continue
}
autoEnabled++
r.logger.Printf("[INFO] Auto-enabled Tier 2 backup for %s → %s (no HDD mounts, daily rsync)", stack.Name, destPath)
}
if r.debug && autoEnabled > 0 {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: auto-enabled %d app(s)", autoEnabled)
}
}
// --- helpers ---
func (r *CrossDriveRunner) updateStatus(stackName, status, errMsg string, duration time.Duration, sizeHuman string) {
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastRun = time.Now().UTC().Format(time.RFC3339)
c.LastStatus = status
c.LastError = errMsg
c.LastDuration = duration.Round(time.Second).String()
if sizeHuman != "" {
c.LastSizeHuman = sizeHuman
}
})
}
// copyFile copies src to dst using buffered streaming I/O (no full-file memory allocation).
func copyFile(src, dst string) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
tmp := dst + ".tmp"
out, err := os.Create(tmp)
if err != nil {
return err
}
if _, err := io.Copy(out, in); err != nil {
out.Close()
os.Remove(tmp)
return err
}
if err := out.Close(); err != nil {
os.Remove(tmp)
return err
}
return os.Rename(tmp, dst)
}
// dirSizeBytes returns the total byte size of all files under path.
// H7: Walk errors are now propagated instead of silently swallowed.
func dirSizeBytes(path string) (int64, error) {
var total int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err // propagate permission/IO errors
}
if !info.IsDir() {
total += info.Size()
}
return nil
})
return total, err
}