Files
deploy-felhom-compose/controller/internal/backup/crossdrive.go
T
admin f7518c0529 v0.14.1: Auto Tier 2 for small apps + infra config in cross-drive backup
- Auto-enable daily rsync Tier 2 for apps without HDD mounts when ≥2
  storage paths exist (AutoEnableSmallApps)
- Sync infrastructure config (stacks dir + controller.yaml) to all
  secondary destinations via _infra/ directory (syncInfraConfig)
- Include infra paths in cross-drive restic snapshots
- Add SecondaryInfraPath() helper to paths.go

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 19:11:57 +01:00

576 lines
19 KiB
Go

package backup
import (
"context"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
)
// DBDumper can run a database dump for a specific stack.
type DBDumper interface {
DumpStackDB(ctx context.Context, stackName string) error
}
// CrossDriveRunner handles per-app backup to secondary storage.
type CrossDriveRunner struct {
sett *settings.Settings
stackProvider StackDataProvider
dbDumper DBDumper
systemDataPath string // fallback drive for SSD-only apps
stacksDir string // path to stacks dir (for infra backup)
controllerYAMLPath string // path to controller.yaml (for infra backup)
logger *log.Logger
mu sync.Mutex
running map[string]bool // per-app running state
}
// NewCrossDriveRunner creates a new CrossDriveRunner.
func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger) *CrossDriveRunner {
return &CrossDriveRunner{
sett: sett,
stackProvider: provider,
systemDataPath: systemDataPath,
stacksDir: stacksDir,
controllerYAMLPath: "/opt/docker/felhom-controller/controller.yaml",
logger: logger,
running: make(map[string]bool),
}
}
// SetDBDumper sets the DB dumper for pre-backup database dumps.
// Called after backup manager is initialized (avoids circular init dependency).
func (r *CrossDriveRunner) SetDBDumper(d DBDumper) {
r.dbDumper = d
}
// getAppDrivePath returns the drive path for an app.
func (r *CrossDriveRunner) getAppDrivePath(stackName string) string {
if hddPath := r.stackProvider.GetStackHDDPath(stackName); hddPath != "" {
return hddPath
}
return r.systemDataPath
}
// RunAppBackup runs cross-drive backup for a single app.
func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) error {
cfg := r.sett.GetCrossDriveConfig(stackName)
if cfg == nil || !cfg.Enabled {
return fmt.Errorf("cross-drive backup not configured or disabled for %s", stackName)
}
// Prevent concurrent runs for the same app
r.mu.Lock()
if r.running[stackName] {
r.mu.Unlock()
return fmt.Errorf("cross-drive backup already running for %s", stackName)
}
r.running[stackName] = true
r.mu.Unlock()
defer func() {
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
}()
// Mark as running in settings
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastStatus = "running"
})
start := time.Now()
r.logger.Printf("[INFO] Cross-drive backup starting: %s → %s (method: %s)",
stackName, cfg.DestinationPath, cfg.Method)
// Trigger fresh DB dump for this app before cross-drive backup
if r.dbDumper != nil {
if err := r.dbDumper.DumpStackDB(ctx, stackName); err != nil {
r.logger.Printf("[WARN] Pre-backup DB dump failed for %s: %v — proceeding with user data backup", stackName, err)
// Non-fatal: user data backup is still valuable without fresh dump
}
}
if err := r.ValidateDestination(cfg.DestinationPath); err != nil {
r.updateStatus(stackName, "error", err.Error(), time.Since(start), "")
return fmt.Errorf("destination validation failed: %w", err)
}
// Resolve HDD mounts for this app (may be empty for config-only apps)
mounts := r.stackProvider.GetStackHDDMounts(stackName)
// Safety: destination must not overlap with any source
for _, m := range mounts {
if system.PathsOverlap(cfg.DestinationPath, m) {
msg := fmt.Sprintf("destination %s overlaps with source %s — aborted", cfg.DestinationPath, m)
r.updateStatus(stackName, "error", msg, time.Since(start), "")
return fmt.Errorf("%s", msg)
}
}
var runErr error
switch cfg.Method {
case "rsync":
runErr = r.runRsyncBackup(ctx, stackName, cfg.DestinationPath, mounts)
case "restic":
runErr = r.runResticBackup(ctx, stackName, cfg.DestinationPath, mounts)
default:
runErr = fmt.Errorf("unknown backup method: %s", cfg.Method)
}
duration := time.Since(start)
if runErr != nil {
r.logger.Printf("[ERROR] Cross-drive backup failed: %s: %v", stackName, runErr)
r.updateStatus(stackName, "error", runErr.Error(), duration, "")
return runErr
}
// Calculate backup size
var sizeHuman string
if cfg.Method == "rsync" {
destDir := AppSecondaryRsyncPath(cfg.DestinationPath, stackName)
if sz, err := dirSizeBytes(destDir); err == nil {
sizeHuman = humanizeBytes(sz)
}
}
r.logger.Printf("[INFO] Cross-drive backup completed: %s (%s)", stackName, duration.Round(time.Second))
r.updateStatus(stackName, "ok", "", duration, sizeHuman)
return nil
}
// RunAllScheduled runs cross-drive backups for all apps matching the schedule.
// Runs sequentially (disk I/O bound).
func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) error {
// Auto-enable Tier 2 for small apps (no HDD mounts) before running backups
r.AutoEnableSmallApps()
// Sync infrastructure config to all secondary destinations
r.syncInfraConfig(ctx)
configs := r.sett.GetAllCrossDriveConfigs()
if len(configs) == 0 {
return nil
}
var errs []string
for stackName, cfg := range configs {
if !cfg.Enabled {
continue
}
if cfg.Schedule != schedule {
continue
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
if err := r.RunAppBackup(ctx, stackName); err != nil {
errs = append(errs, fmt.Sprintf("%s: %v", stackName, err))
}
}
if len(errs) > 0 {
return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; "))
}
return nil
}
// IsRunning returns true if the given app's backup is currently running.
func (r *CrossDriveRunner) IsRunning(stackName string) bool {
r.mu.Lock()
defer r.mu.Unlock()
return r.running[stackName]
}
// ValidateDestination checks that the destination path exists, is writable,
// and has sufficient free space. System-drive destinations get stricter limits
// (≥10 GB free, <90% used) to protect OS stability; external drives just need
// ≥100 MB. Non-mount-point destinations are allowed with a logged warning.
func (r *CrossDriveRunner) ValidateDestination(path string) error {
if path == "" {
return fmt.Errorf("destination path is empty")
}
if _, err := os.Stat(path); os.IsNotExist(err) {
return fmt.Errorf("destination %s does not exist", path)
}
onSystemDrive := !system.IsMountPoint(path)
if onSystemDrive {
r.logger.Printf("[WARN] Destination %s is not a separate mount point (system drive) — backup will proceed but data is not protected against drive failure", path)
}
if !system.IsWritable(path) {
return fmt.Errorf("destination %s is not writable", path)
}
if di := system.GetDiskUsage(path); di != nil {
if onSystemDrive {
// System drive: protect OS stability — require ≥10 GB free and <90% used
if di.AvailGB < 10 {
return fmt.Errorf("destination %s is on the system drive with only %.1f GB free — at least 10 GB required to protect OS stability", path, di.AvailGB)
}
if di.UsedPercent >= 90 {
return fmt.Errorf("destination %s is on the system drive at %.0f%% capacity — maximum 90%% allowed", path, di.UsedPercent)
}
} else {
// External drive: just ensure it's not completely full
if di.AvailGB < 0.1 {
return fmt.Errorf("destination %s has insufficient free space (%.1f GB free)", path, di.AvailGB)
}
}
}
return nil
}
// --- rsync ---
func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
destDir := AppSecondaryRsyncPath(destBase, stackName)
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("creating rsync dest dir: %w", err)
}
for i, srcMount := range mounts {
var dstPath string
if len(mounts) == 1 {
// Single mount: rsync directly into the stack folder (no extra nesting)
dstPath = destDir
} else {
// Multiple mounts: use the leaf directory name as subfolder
leaf := filepath.Base(srcMount)
dstPath = filepath.Join(destDir, leaf)
// Disambiguate duplicate leaf names (e.g. two mounts both named "data")
if i > 0 {
if _, err := os.Stat(dstPath); err == nil {
dstPath = filepath.Join(destDir, fmt.Sprintf("%s_%d", leaf, i))
}
}
}
if err := os.MkdirAll(dstPath, 0755); err != nil {
return fmt.Errorf("creating rsync destination: %w", err)
}
// Ensure trailing slash on source for rsync semantics (copy contents, not the dir itself)
src := strings.TrimRight(srcMount, "/") + "/"
dst := strings.TrimRight(dstPath, "/") + "/"
// Exclude app-internal DB dump files — the controller handles DB backups via pg_dump separately.
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete",
"--exclude", "backups/*.sql.gz",
"--exclude", "backups/*.sql",
"--exclude", "backups/*.dump",
src, dst)
r.logger.Printf("[DEBUG] rsync: %s → %s", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out)))
}
}
// --- Copy DB dumps for this stack from its home drive ---
dbDestDir := filepath.Join(destDir, "_db")
if err := os.MkdirAll(dbDestDir, 0755); err != nil {
return fmt.Errorf("creating DB dump dest dir: %w", err)
}
if err := r.copyStackDBDumps(stackName, dbDestDir); err != nil {
r.logger.Printf("[WARN] Cross-drive DB dump copy failed for %s: %v", stackName, err)
// Non-fatal: user data is the primary concern
}
// --- Rsync app config (compose dir) ---
if composePath, ok := r.stackProvider.GetStackComposePath(stackName); ok {
configSrcDir := filepath.Dir(composePath)
configDestDir := filepath.Join(destDir, "_config")
if err := os.MkdirAll(configDestDir, 0755); err != nil {
return fmt.Errorf("creating config dest dir: %w", err)
}
src := strings.TrimRight(configSrcDir, "/") + "/"
dst := strings.TrimRight(configDestDir, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", src, dst)
r.logger.Printf("[DEBUG] rsync config: %s → %s", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
r.logger.Printf("[WARN] Cross-drive config rsync failed for %s: %v (%s)", stackName, err, strings.TrimSpace(string(out)))
// Non-fatal
}
}
return nil
}
// --- restic ---
func (r *CrossDriveRunner) runResticBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
repoPath := SecondaryResticRepoPath(destBase)
// Get or create the cross-drive restic password
password, err := r.sett.GetOrCreateCrossDrivePassword()
if err != nil {
return fmt.Errorf("getting restic password: %w", err)
}
// H6: Write password to temp file with safe cleanup order (close before deferred remove).
pwFile, err := os.CreateTemp("", "felhom-crossdrive-pw-*")
if err != nil {
return fmt.Errorf("creating password file: %w", err)
}
pwPath := pwFile.Name()
if _, err := pwFile.WriteString(password); err != nil {
pwFile.Close()
os.Remove(pwPath)
return fmt.Errorf("writing password file: %w", err)
}
pwFile.Close()
defer os.Remove(pwPath)
// Ensure repo is initialized
if err := r.ensureResticRepo(ctx, repoPath, pwPath); err != nil {
return err
}
// Run restic backup
args := []string{
"backup", "--repo", repoPath,
"--password-file", pwPath,
"--tag", stackName,
"--tag", "cross-drive",
}
// Include user data (HDD mounts)
args = append(args, mounts...)
// Include app config dir (compose + app.yaml + .felhom.yml)
if composePath, ok := r.stackProvider.GetStackComposePath(stackName); ok {
args = append(args, filepath.Dir(composePath))
}
// Include DB dump dir for this app (from its home drive)
appDrive := r.getAppDrivePath(stackName)
dumpDir := AppDBDumpPath(appDrive, stackName)
if _, err := os.Stat(dumpDir); err == nil {
args = append(args, dumpDir)
}
// Include infrastructure paths (same as primary restic)
args = append(args, r.stacksDir)
if _, err := os.Stat(r.controllerYAMLPath); err == nil {
args = append(args, r.controllerYAMLPath)
}
cmd := exec.CommandContext(ctx, "restic", args...)
r.logger.Printf("[DEBUG] restic backup: %v", args)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("restic backup failed: %v (%s)", err, strings.TrimSpace(string(out)))
}
// H5: Prune old snapshots to prevent unbounded accumulation.
return r.pruneResticRepo(ctx, repoPath, pwPath)
}
// pruneResticRepo forgets old snapshots in a cross-drive restic repo, keeping recent ones.
func (r *CrossDriveRunner) pruneResticRepo(ctx context.Context, repoPath, pwPath string) error {
args := []string{
"forget", "--repo", repoPath,
"--password-file", pwPath,
"--keep-daily", "7",
"--keep-weekly", "4",
"--prune",
}
cmd := exec.CommandContext(ctx, "restic", args...)
r.logger.Printf("[DEBUG] restic forget (prune): %s", repoPath)
if out, err := cmd.CombinedOutput(); err != nil {
// Non-fatal: log warning but don't fail the backup
r.logger.Printf("[WARN] restic forget failed for %s: %v (%s)", repoPath, err, strings.TrimSpace(string(out)))
}
return nil
}
func (r *CrossDriveRunner) ensureResticRepo(ctx context.Context, repoPath, pwFile string) error {
// Check if repo config exists
if _, err := os.Stat(filepath.Join(repoPath, "config")); err == nil {
return nil // already initialized
}
if err := os.MkdirAll(repoPath, 0755); err != nil {
return fmt.Errorf("creating restic repo dir: %w", err)
}
cmd := exec.CommandContext(ctx, "restic", "init", "--repo", repoPath, "--password-file", pwFile)
r.logger.Printf("[INFO] Initializing cross-drive restic repo at %s", repoPath)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("restic init failed: %v (%s)", err, strings.TrimSpace(string(out)))
}
return nil
}
// copyStackDBDumps copies DB dump files for the given stack from its home drive.
// DB dumps are at <drive>/backups/primary/<stack>/db-dumps/<stack>_<dbtype>.sql.
func (r *CrossDriveRunner) copyStackDBDumps(stackName, destDir string) error {
appDrive := r.getAppDrivePath(stackName)
dumpDir := AppDBDumpPath(appDrive, stackName)
entries, err := os.ReadDir(dumpDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("reading DB dump dir: %w", err)
}
copied := 0
for _, e := range entries {
if e.IsDir() {
continue
}
src := filepath.Join(dumpDir, e.Name())
dst := filepath.Join(destDir, e.Name())
data, err := os.ReadFile(src)
if err != nil {
return fmt.Errorf("reading %s: %w", e.Name(), err)
}
if err := os.WriteFile(dst, data, 0644); err != nil {
return fmt.Errorf("writing %s: %w", e.Name(), err)
}
copied++
}
if copied > 0 {
r.logger.Printf("[DEBUG] Copied %d DB dump file(s) to %s", copied, destDir)
}
return nil
}
// --- infra backup ---
// syncInfraConfig rsyncs infrastructure config (stacks dir + controller.yaml) to all
// secondary backup destinations. Runs once per RunAllScheduled cycle, before per-app backups.
func (r *CrossDriveRunner) syncInfraConfig(ctx context.Context) {
// Collect unique destination drives from enabled cross-drive configs
destDrives := make(map[string]bool)
for _, cfg := range r.sett.GetAllCrossDriveConfigs() {
if cfg.Enabled && cfg.DestinationPath != "" {
destDrives[cfg.DestinationPath] = true
}
}
if len(destDrives) == 0 {
return
}
for dest := range destDrives {
infraDir := SecondaryInfraPath(dest)
if err := os.MkdirAll(infraDir, 0755); err != nil {
r.logger.Printf("[WARN] Cannot create infra backup dir %s: %v", infraDir, err)
continue
}
// Rsync stacks dir → _infra/stacks/
stacksDest := filepath.Join(infraDir, "stacks") + "/"
if err := os.MkdirAll(stacksDest, 0755); err == nil {
stacksSrc := strings.TrimRight(r.stacksDir, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", stacksSrc, stacksDest)
if out, err := cmd.CombinedOutput(); err != nil {
r.logger.Printf("[WARN] Infra rsync (stacks) failed for %s: %v (%s)", dest, err, strings.TrimSpace(string(out)))
}
}
// Copy controller.yaml → _infra/controller.yaml
if _, err := os.Stat(r.controllerYAMLPath); err == nil {
yamlDest := filepath.Join(infraDir, "controller.yaml")
data, err := os.ReadFile(r.controllerYAMLPath)
if err != nil {
r.logger.Printf("[WARN] Cannot read controller.yaml for infra backup: %v", err)
} else if err := os.WriteFile(yamlDest, data, 0644); err != nil {
r.logger.Printf("[WARN] Cannot write controller.yaml to %s: %v", yamlDest, err)
}
}
r.logger.Printf("[INFO] Infrastructure config synced to %s", infraDir)
}
}
// --- auto-enable ---
// AutoEnableSmallApps auto-configures cross-drive backup for apps without HDD user data
// when at least 2 storage paths are registered. Apps with existing cross-drive config
// (even if disabled) are never modified.
func (r *CrossDriveRunner) AutoEnableSmallApps() {
storagePaths := r.sett.GetStoragePaths()
if len(storagePaths) < 2 {
return // no secondary drive available
}
deployed := r.stackProvider.ListDeployedStacks()
existingConfigs := r.sett.GetAllCrossDriveConfigs()
for _, stack := range deployed {
// Skip if already has cross-drive config (user has touched it)
if _, exists := existingConfigs[stack.Name]; exists {
continue
}
// Skip if app has HDD mounts (large user data — needs manual config)
if mounts := r.stackProvider.GetStackHDDMounts(stack.Name); len(mounts) > 0 {
continue
}
// Find destination: first storage path that differs from the app's home drive
appDrive := r.getAppDrivePath(stack.Name)
var destPath string
for _, sp := range storagePaths {
if sp.Path != appDrive {
destPath = sp.Path
break
}
}
if destPath == "" {
continue // no suitable destination found
}
// Auto-configure daily rsync
cfg := &settings.CrossDriveBackup{
Enabled: true,
Method: "rsync",
DestinationPath: destPath,
Schedule: "daily",
}
if err := r.sett.SetCrossDriveConfig(stack.Name, cfg); err != nil {
r.logger.Printf("[WARN] Auto-enable Tier 2 failed for %s: %v", stack.Name, err)
continue
}
r.logger.Printf("[INFO] Auto-enabled Tier 2 backup for %s → %s (no HDD mounts, daily rsync)", stack.Name, destPath)
}
}
// --- helpers ---
func (r *CrossDriveRunner) updateStatus(stackName, status, errMsg string, duration time.Duration, sizeHuman string) {
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastRun = time.Now().UTC().Format(time.RFC3339)
c.LastStatus = status
c.LastError = errMsg
c.LastDuration = duration.Round(time.Second).String()
if sizeHuman != "" {
c.LastSizeHuman = sizeHuman
}
})
}
// dirSizeBytes returns the total byte size of all files under path.
// H7: Walk errors are now propagated instead of silently swallowed.
func dirSizeBytes(path string) (int64, error) {
var total int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err // propagate permission/IO errors
}
if !info.IsDir() {
total += info.Size()
}
return nil
})
return total, err
}