slice 8C Phase B.2 + C.1/C.2: retire disk subsystem + rewire disk mgmt to agent

Retired (~12.3k LOC): internal/storage/* (scan/format/attach/migrate/safety),
backup restic/crossdrive/restore_drives/disk_layout/local_infra/restore_scan/
paths + restore_app, report/infra_backup*/infra_pull, setup/scanner,
monitor/watchdog+pinger, web/storage_handlers+handler_restore. Surgically split
backup.Manager to app-data only (DB dumps + volume tars + app restore; dropped
restic + cross-drive + snapshot history). Fixed router/main/web wiring.
Added agent-backed disk API (web/agent_disk_handlers.go): /api/disks list/
assign/eject/format proxying agentapi; data-bearing format refusal -> HTTP 409
'operator authorization required'. report/config_pull.go keeps the setup
fresh-install config download. go build + go test green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 13:57:27 +02:00
parent 0294513906
commit abe4e8e619
47 changed files with 404 additions and 12317 deletions
File diff suppressed because it is too large Load Diff
-734
View File
@@ -1,734 +0,0 @@
package backup
import (
"context"
"fmt"
"io"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
"gitea.dooplex.hu/admin/felhom-controller/internal/util"
)
// DBDumper can run a database dump for a specific stack.
type DBDumper interface {
DumpStackDB(ctx context.Context, stackName string) error
}
// VolumeDumper can dump Docker named volumes for a specific stack.
type VolumeDumper interface {
DumpAppVolumes(stackName string) error
DumpAppVolumesSafe(stackName string) error // stops stack before dump, restarts after
}
// CrossDriveRunner handles per-app backup to secondary storage.
type CrossDriveRunner struct {
sett *settings.Settings
stackProvider StackDataProvider
dbDumper DBDumper
volDumper VolumeDumper
systemDataPath string // fallback drive for SSD-only apps
stacksDir string // path to stacks dir (for infra backup)
controllerYAMLPath string // path to controller.yaml (for infra backup)
logger *log.Logger
debug bool
mu sync.Mutex
running map[string]bool // per-app running state
}
// NewCrossDriveRunner creates a new CrossDriveRunner.
func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger, debug bool) *CrossDriveRunner {
return &CrossDriveRunner{
sett: sett,
stackProvider: provider,
systemDataPath: systemDataPath,
stacksDir: stacksDir,
controllerYAMLPath: "/opt/docker/felhom-controller/controller.yaml",
logger: logger,
debug: debug,
running: make(map[string]bool),
}
}
// SetDBDumper sets the DB dumper for pre-backup database dumps.
// Called after backup manager is initialized (avoids circular init dependency).
func (r *CrossDriveRunner) SetDBDumper(d DBDumper) {
r.dbDumper = d
}
// SetVolumeDumper sets the volume dumper for pre-backup Docker volume dumps.
func (r *CrossDriveRunner) SetVolumeDumper(d VolumeDumper) {
r.volDumper = d
}
// GetAppDrivePath returns the drive path for an app (HDD path or system data path fallback).
func (r *CrossDriveRunner) GetAppDrivePath(stackName string) string {
if hddPath := r.stackProvider.GetStackHDDPath(stackName); hddPath != "" {
return hddPath
}
return r.systemDataPath
}
// RunAppBackup runs cross-drive backup for a single app.
func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) error {
cfg := r.sett.GetCrossDriveConfig(stackName)
if cfg == nil || !cfg.Enabled {
return fmt.Errorf("cross-drive backup not configured or disabled for %s", stackName)
}
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: starting for %s, dest=%s, schedule=%s, method=%s",
stackName, cfg.DestinationPath, cfg.Schedule, cfg.Method)
}
// Prevent concurrent runs for the same app
r.mu.Lock()
if r.running[stackName] {
r.mu.Unlock()
return fmt.Errorf("cross-drive backup already running for %s", stackName)
}
r.running[stackName] = true
r.mu.Unlock()
defer func() {
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
}()
// Check if source or destination drive is disconnected — skip silently (not an error)
srcDrive := r.stackProvider.GetStackHDDPath(stackName)
if srcDrive != "" && r.sett.IsDisconnected(srcDrive) {
r.logger.Printf("[WARN] [backup] Cross-drive backup skipped for %s: source drive disconnected (%s)", stackName, srcDrive)
return nil
}
if r.sett.IsDisconnected(cfg.DestinationPath) {
r.logger.Printf("[WARN] [backup] Cross-drive backup skipped for %s: destination drive disconnected (%s)", stackName, cfg.DestinationPath)
return nil
}
if !r.sett.IsStoragePathKnown(cfg.DestinationPath) {
r.logger.Printf("[WARN] [backup] Cross-drive backup skipped for %s: destination not a registered storage (%s)", stackName, cfg.DestinationPath)
return nil
}
if !r.sett.IsStoragePathSchedulable(cfg.DestinationPath) {
r.logger.Printf("[WARN] [backup] Cross-drive backup skipped for %s: destination drive inactive (%s)", stackName, cfg.DestinationPath)
return nil
}
// Mark as running in settings
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastStatus = "running"
})
start := time.Now()
r.logger.Printf("[INFO] [backup] Cross-drive backup starting: %s → %s (rsync)",
stackName, cfg.DestinationPath)
// Trigger fresh DB dump for this app before cross-drive backup
if r.dbDumper != nil {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: triggering pre-backup DB dump for %s", stackName)
}
if err := r.dbDumper.DumpStackDB(ctx, stackName); err != nil {
r.logger.Printf("[WARN] [backup] Pre-backup DB dump failed for %s: %v — proceeding with user data backup", stackName, err)
}
}
// Trigger fresh volume dump for this app before cross-drive backup
if r.volDumper != nil {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: triggering pre-backup volume dump for %s", stackName)
}
if err := r.volDumper.DumpAppVolumesSafe(stackName); err != nil {
r.logger.Printf("[WARN] [backup] Pre-backup volume dump failed for %s: %v — proceeding with backup", stackName, err)
}
}
if err := r.ValidateDestination(cfg.DestinationPath); err != nil {
r.updateStatus(stackName, "error", err.Error(), time.Since(start), "")
return fmt.Errorf("destination validation failed: %w", err)
}
// Resolve HDD mounts for this app (may be empty for config-only apps)
mounts := r.stackProvider.GetStackHDDMounts(stackName)
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: %s has %d HDD mount(s): %v", stackName, len(mounts), mounts)
}
// Safety: destination must not overlap with any source
for _, m := range mounts {
if system.PathsOverlap(cfg.DestinationPath, m) {
msg := fmt.Sprintf("destination %s overlaps with source %s — aborted", cfg.DestinationPath, m)
r.updateStatus(stackName, "error", msg, time.Since(start), "")
return fmt.Errorf("%s", msg)
}
}
runErr := r.runRsyncBackup(ctx, stackName, cfg.DestinationPath, mounts)
duration := time.Since(start)
if runErr != nil {
r.logger.Printf("[ERROR] [backup] Cross-drive backup failed: %s: %v", stackName, runErr)
r.updateStatus(stackName, "error", runErr.Error(), duration, "")
return runErr
}
// Calculate backup size
var sizeHuman string
destDir := AppSecondaryRsyncPath(cfg.DestinationPath, stackName)
if sz, err := dirSizeBytes(destDir); err == nil {
sizeHuman = humanizeBytes(sz)
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: %s backup size at destination: %s", stackName, sizeHuman)
}
}
r.logger.Printf("[INFO] [backup] Cross-drive backup completed: %s (%s)", stackName, duration.Round(time.Second))
r.updateStatus(stackName, "ok", "", duration, sizeHuman)
return nil
}
// RunAllScheduled runs cross-drive backups for all apps matching the schedule.
// Runs sequentially (disk I/O bound).
func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) error {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: starting for schedule=%s", schedule)
}
// Auto-enable Tier 2 for small apps (no HDD mounts) before running backups
r.AutoEnableSmallApps()
// Sync infrastructure config to all secondary destinations
r.syncInfraConfig(ctx)
configs := r.sett.GetAllCrossDriveConfigs()
if len(configs) == 0 {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: no cross-drive configs found")
}
return nil
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: %d total cross-drive config(s) found", len(configs))
}
var errs []string
var scheduled, skippedDisabled, skippedWrongSchedule int
r.logger.Printf("[INFO] [backup] Cross-drive backup: starting scheduled run for %d configured app(s), schedule=%s", len(configs), schedule)
for stackName, cfg := range configs {
if !cfg.Enabled {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — disabled", stackName)
}
skippedDisabled++
continue
}
if cfg.Schedule != schedule {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — schedule mismatch (has=%s, want=%s)", stackName, cfg.Schedule, schedule)
}
skippedWrongSchedule++
continue
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: queuing %s for backup (dest=%s)", stackName, cfg.DestinationPath)
}
scheduled++
select {
case <-ctx.Done():
return ctx.Err()
default:
}
if err := r.RunAppBackup(ctx, stackName); err != nil {
errs = append(errs, fmt.Sprintf("%s: %v", stackName, err))
}
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: done — %d scheduled, %d disabled, %d wrong schedule, %d errors",
scheduled, skippedDisabled, skippedWrongSchedule, len(errs))
}
r.logger.Printf("[INFO] [backup] Cross-drive backup complete: %d succeeded, %d failed", scheduled-len(errs), len(errs))
if len(errs) > 0 {
return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; "))
}
return nil
}
// RunAllConfigured runs cross-drive backup for all enabled apps, ignoring schedule.
// Used by the debug page to trigger all backups regardless of their configured schedule.
func (r *CrossDriveRunner) RunAllConfigured(ctx context.Context) error {
if r.debug {
r.logger.Printf("[DEBUG] RunAllConfigured: starting for all enabled apps")
}
r.AutoEnableSmallApps()
r.syncInfraConfig(ctx)
configs := r.sett.GetAllCrossDriveConfigs()
if len(configs) == 0 {
return nil
}
var errs []string
var ran int
r.logger.Printf("[INFO] [backup] Cross-drive backup: starting all configured app(s), %d total", len(configs))
for stackName, cfg := range configs {
if !cfg.Enabled {
continue
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
ran++
if err := r.RunAppBackup(ctx, stackName); err != nil {
errs = append(errs, fmt.Sprintf("%s: %v", stackName, err))
}
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllConfigured: done — %d ran, %d errors", ran, len(errs))
}
r.logger.Printf("[INFO] [backup] Cross-drive backup complete: %d succeeded, %d failed", ran-len(errs), len(errs))
if len(errs) > 0 {
return fmt.Errorf("cross-drive errors: %s", strings.Join(errs, "; "))
}
return nil
}
// IsRunning returns true if the given app's backup is currently running.
func (r *CrossDriveRunner) IsRunning(stackName string) bool {
r.mu.Lock()
defer r.mu.Unlock()
return r.running[stackName]
}
// AnyRunning returns true if any cross-drive backup is currently in progress.
func (r *CrossDriveRunner) AnyRunning() bool {
r.mu.Lock()
defer r.mu.Unlock()
for _, running := range r.running {
if running {
return true
}
}
return false
}
// ValidateDestination checks that the destination path exists, is writable,
// and has sufficient free space. System-drive destinations get stricter limits
// (≥10 GB free, <90% used) to protect OS stability; external drives just need
// ≥100 MB. Non-mount-point destinations are allowed with a logged warning.
func (r *CrossDriveRunner) ValidateDestination(path string) error {
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: checking path=%s", path)
}
if path == "" {
return fmt.Errorf("destination path is empty")
}
if r.sett.IsDecommissioned(path) {
return fmt.Errorf("destination %s is decommissioned — choose an active drive", path)
}
if _, err := os.Stat(path); os.IsNotExist(err) {
return fmt.Errorf("destination %s does not exist", path)
}
onSystemDrive := !system.IsMountPoint(path)
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s, isMountPoint=%v", path, !onSystemDrive)
}
if onSystemDrive {
r.logger.Printf("[WARN] [backup] Destination %s is not a separate mount point (system drive) — backup will proceed but data is not protected against drive failure", path)
}
if !system.IsWritable(path) {
return fmt.Errorf("destination %s is not writable", path)
}
di := system.GetDiskUsage(path)
if di == nil {
r.logger.Printf("[WARN] [backup] Cannot determine disk usage for %s — proceeding without space verification", path)
return nil
}
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s, availGB=%.1f, usedPct=%.0f%%, onSystemDrive=%v",
path, di.AvailGB, di.UsedPercent, onSystemDrive)
}
if onSystemDrive {
// System drive: protect OS stability — require ≥10 GB free and <90% used
if di.AvailGB < 10 {
return fmt.Errorf("destination %s is on the system drive with only %.1f GB free — at least 10 GB required to protect OS stability", path, di.AvailGB)
}
if di.UsedPercent >= 90 {
return fmt.Errorf("destination %s is on the system drive at %.0f%% capacity — maximum 90%% allowed", path, di.UsedPercent)
}
} else {
// External drive: just ensure it's not completely full
if di.AvailGB < 0.1 {
return fmt.Errorf("destination %s has insufficient free space (%.1f GB free)", path, di.AvailGB)
}
}
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s passed all checks", path)
}
return nil
}
// --- rsync ---
func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
destDir := AppSecondaryRsyncPath(destBase, stackName)
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: stack=%s, destBase=%s, destDir=%s, %d mount(s)", stackName, destBase, destDir, len(mounts))
}
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("creating rsync dest dir: %w", err)
}
seen := make(map[string]bool)
for _, srcMount := range mounts {
var dstPath string
if len(mounts) == 1 {
// Single mount: rsync directly into the stack folder (no extra nesting)
dstPath = destDir
} else {
// Multiple mounts: use the leaf directory name as subfolder
leaf := filepath.Base(srcMount)
if seen[leaf] {
// Disambiguate duplicate leaf names (e.g. two mounts both named "data")
for j := 2; ; j++ {
candidate := fmt.Sprintf("%s_%d", leaf, j)
if !seen[candidate] {
leaf = candidate
break
}
}
}
seen[leaf] = true
dstPath = filepath.Join(destDir, leaf)
}
if err := os.MkdirAll(dstPath, 0755); err != nil {
return fmt.Errorf("creating rsync destination: %w", err)
}
// Ensure trailing slash on source for rsync semantics (copy contents, not the dir itself)
src := strings.TrimRight(srcMount, "/") + "/"
dst := strings.TrimRight(dstPath, "/") + "/"
// Exclude controller-managed directories (underscore prefix) to prevent --delete from removing
// _db/ and _config/ that were created by previous backup runs.
// Exclude app-internal DB dump files — the controller handles DB backups via pg_dump separately.
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete",
"--exclude", "_*",
"--exclude", "backups/*.sql.gz",
"--exclude", "backups/*.sql",
"--exclude", "backups/*.dump",
src, dst)
if r.debug {
r.logger.Printf("[DEBUG] rsync: %s → %s", src, dst)
}
out, err := cmd.CombinedOutput()
if err != nil {
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: rsync failed for %s: %s", srcMount, util.TruncateStr(strings.TrimSpace(string(out)), 500))
}
r.logger.Printf("[ERROR] [backup] Rsync backup for %s failed: %v", stackName, err)
return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out)))
}
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: rsync OK for mount %s → %s", src, dst)
}
}
// --- Copy DB dumps for this stack from its home drive ---
dbDestDir := filepath.Join(destDir, "_db")
if err := os.MkdirAll(dbDestDir, 0755); err != nil {
return fmt.Errorf("creating DB dump dest dir: %w", err)
}
if err := r.copyStackDBDumps(stackName, dbDestDir); err != nil {
r.logger.Printf("[WARN] [backup] Cross-drive DB dump copy failed for %s: %v", stackName, err)
// Non-fatal: user data is the primary concern
}
// --- Copy volume dumps for this stack from its home drive ---
volDestDir := filepath.Join(destDir, "_volumes")
if err := os.MkdirAll(volDestDir, 0755); err != nil {
return fmt.Errorf("creating volume dump dest dir: %w", err)
}
if err := r.copyStackVolumeDumps(stackName, volDestDir); err != nil {
r.logger.Printf("[WARN] [backup] Cross-drive volume dump copy failed for %s: %v", stackName, err)
// Non-fatal: user data is the primary concern
}
// --- Rsync app config (compose dir) ---
if composePath, ok := r.stackProvider.GetStackComposePath(stackName); ok {
configSrcDir := filepath.Dir(composePath)
configDestDir := filepath.Join(destDir, "_config")
if err := os.MkdirAll(configDestDir, 0755); err != nil {
return fmt.Errorf("creating config dest dir: %w", err)
}
src := strings.TrimRight(configSrcDir, "/") + "/"
dst := strings.TrimRight(configDestDir, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", src, dst)
if r.debug {
r.logger.Printf("[DEBUG] rsync config: %s → %s", src, dst)
}
if out, err := cmd.CombinedOutput(); err != nil {
r.logger.Printf("[WARN] [backup] Cross-drive config rsync failed for %s: %v (%s)", stackName, err, strings.TrimSpace(string(out)))
// Non-fatal
}
}
r.logger.Printf("[INFO] [backup] Rsync backup for %s to %s complete", stackName, destDir)
return nil
}
// copyStackDBDumps copies DB dump files for the given stack from its home drive.
// DB dumps are at <drive>/backups/primary/<stack>/db-dumps/<stack>_<dbtype>.sql.
func (r *CrossDriveRunner) copyStackDBDumps(stackName, destDir string) error {
appDrive := r.GetAppDrivePath(stackName)
dumpDir := AppDBDumpPath(appDrive, stackName)
entries, err := os.ReadDir(dumpDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("reading DB dump dir: %w", err)
}
copied := 0
for _, e := range entries {
if e.IsDir() {
continue
}
src := filepath.Join(dumpDir, e.Name())
dst := filepath.Join(destDir, e.Name())
if err := copyFile(src, dst); err != nil {
return fmt.Errorf("copying %s: %w", e.Name(), err)
}
copied++
}
r.logger.Printf("[INFO] [backup] Copied %d DB dumps for %s", copied, stackName)
return nil
}
// copyStackVolumeDumps copies Docker volume dump tars for the given stack from its home drive.
func (r *CrossDriveRunner) copyStackVolumeDumps(stackName, destDir string) error {
appDrive := r.GetAppDrivePath(stackName)
dumpDir := AppVolumeDumpPath(appDrive, stackName)
entries, err := os.ReadDir(dumpDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("reading volume dump dir: %w", err)
}
copied := 0
for _, e := range entries {
if e.IsDir() || !strings.HasSuffix(e.Name(), ".tar") {
continue
}
src := filepath.Join(dumpDir, e.Name())
dst := filepath.Join(destDir, e.Name())
if err := copyFile(src, dst); err != nil {
return fmt.Errorf("copying %s: %w", e.Name(), err)
}
copied++
}
if copied > 0 {
r.logger.Printf("[INFO] [backup] Copied %d volume dump(s) for %s", copied, stackName)
}
return nil
}
// --- infra backup ---
// syncInfraConfig rsyncs infrastructure config (stacks dir + controller.yaml) to all
// secondary backup destinations. Runs once per RunAllScheduled cycle, before per-app backups.
func (r *CrossDriveRunner) syncInfraConfig(ctx context.Context) {
// Collect unique destination drives from enabled cross-drive configs
destDrives := make(map[string]bool)
for _, cfg := range r.sett.GetAllCrossDriveConfigs() {
if cfg.Enabled && cfg.DestinationPath != "" {
destDrives[cfg.DestinationPath] = true
}
}
if len(destDrives) == 0 {
return
}
for dest := range destDrives {
infraDir := SecondaryInfraPath(dest)
if err := os.MkdirAll(infraDir, 0755); err != nil {
r.logger.Printf("[WARN] [backup] Cannot create infra backup dir %s: %v", infraDir, err)
continue
}
// Rsync stacks dir → _infra/stacks/
stacksDest := filepath.Join(infraDir, "stacks") + "/"
if err := os.MkdirAll(stacksDest, 0755); err == nil {
stacksSrc := strings.TrimRight(r.stacksDir, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", stacksSrc, stacksDest)
if out, err := cmd.CombinedOutput(); err != nil {
r.logger.Printf("[WARN] [backup] Infra rsync (stacks) failed for %s: %v (%s)", dest, err, strings.TrimSpace(string(out)))
}
}
// Copy controller.yaml → _infra/controller.yaml (atomic via copyFile)
if _, err := os.Stat(r.controllerYAMLPath); err == nil {
yamlDest := filepath.Join(infraDir, "controller.yaml")
if err := copyFile(r.controllerYAMLPath, yamlDest); err != nil {
r.logger.Printf("[WARN] [backup] Cannot copy controller.yaml to %s: %v", yamlDest, err)
}
}
r.logger.Printf("[INFO] [backup] Infrastructure config synced to %s", infraDir)
}
}
// --- auto-enable ---
// AutoEnableSmallApps auto-configures cross-drive backup for apps without HDD user data
// when at least 2 storage paths are registered. Apps with existing cross-drive config
// (even if disabled) are never modified.
func (r *CrossDriveRunner) AutoEnableSmallApps() {
storagePaths := r.sett.GetStoragePaths()
if len(storagePaths) < 2 {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: fewer than 2 storage paths (%d) — skipping", len(storagePaths))
}
return // no secondary drive available
}
deployed := r.stackProvider.ListDeployedStacks()
existingConfigs := r.sett.GetAllCrossDriveConfigs()
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: %d deployed stacks, %d existing configs, %d storage paths",
len(deployed), len(existingConfigs), len(storagePaths))
}
var autoEnabled int
for _, stack := range deployed {
// Skip if already has cross-drive config (user has touched it)
if _, exists := existingConfigs[stack.Name]; exists {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — already has cross-drive config", stack.Name)
}
continue
}
// Skip if app has HDD mounts (large user data — needs manual config)
if mounts := r.stackProvider.GetStackHDDMounts(stack.Name); len(mounts) > 0 {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — has %d HDD mount(s)", stack.Name, len(mounts))
}
continue
}
// Find destination: first active storage path that differs from the app's home drive
appDrive := r.GetAppDrivePath(stack.Name)
var destPath string
for _, sp := range storagePaths {
if sp.Path != appDrive && !sp.Disconnected && !sp.Decommissioned {
destPath = sp.Path
break
}
}
if destPath == "" {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — no suitable destination found", stack.Name)
}
continue // no suitable destination found
}
// Auto-configure daily rsync
cfg := &settings.CrossDriveBackup{
Enabled: true,
Method: "rsync",
DestinationPath: destPath,
Schedule: "daily",
}
if err := r.sett.SetCrossDriveConfig(stack.Name, cfg); err != nil {
r.logger.Printf("[WARN] [backup] Auto-enable Tier 2 failed for %s: %v", stack.Name, err)
continue
}
autoEnabled++
r.logger.Printf("[INFO] [backup] Auto-enabled Tier 2 backup for %s → %s (no HDD mounts, daily rsync)", stack.Name, destPath)
}
if r.debug && autoEnabled > 0 {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: auto-enabled %d app(s)", autoEnabled)
}
}
// --- helpers ---
func (r *CrossDriveRunner) updateStatus(stackName, status, errMsg string, duration time.Duration, sizeHuman string) {
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastRun = time.Now().UTC().Format(time.RFC3339)
c.LastStatus = status
c.LastError = errMsg
c.LastDuration = duration.Round(time.Second).String()
if sizeHuman != "" {
c.LastSizeHuman = sizeHuman
}
})
}
// copyFile copies src to dst using buffered streaming I/O (no full-file memory allocation).
func copyFile(src, dst string) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
tmp := dst + ".tmp"
out, err := os.Create(tmp)
if err != nil {
return err
}
if _, err := io.Copy(out, in); err != nil {
out.Close()
os.Remove(tmp)
return err
}
if err := out.Close(); err != nil {
os.Remove(tmp)
return err
}
return os.Rename(tmp, dst)
}
// dirSizeBytes returns the total byte size of all files under path.
// H7: Walk errors are now propagated instead of silently swallowed.
func dirSizeBytes(path string) (int64, error) {
var total int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err // propagate permission/IO errors
}
if !info.IsDir() {
total += info.Size()
}
return nil
})
return total, err
}
-19
View File
@@ -1,19 +0,0 @@
package backup
// DiskLayout holds the fstab-derived mount topology for disaster recovery.
type DiskLayout struct {
Mounts []DiskMount `json:"mounts"`
}
// DiskMount represents a single mount entry from fstab.
type DiskMount struct {
UUID string `json:"uuid"`
Label string `json:"label"`
MountPoint string `json:"mount_point"`
FSType string `json:"fs_type"`
SizeBytes int64 `json:"size_bytes"`
FstabOptions string `json:"fstab_options"`
Role string `json:"role"` // "system_data", "hdd_storage"
BindSubdir string `json:"bind_subdir"` // e.g., "felhom_data"
RawMount string `json:"raw_mount"` // e.g., "/mnt/.felhom-raw/hdd_1"
}
-368
View File
@@ -1,368 +0,0 @@
package backup
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
// MaxSchemaVersion is the highest infra backup schema version this controller can read.
const MaxSchemaVersion = 1
// maxLocalHistory is the number of previous backup versions to keep per drive.
const maxLocalHistory = 5
// InfraMetadata is the lightweight metadata file written alongside backup.json.
type InfraMetadata struct {
SchemaVersion int `json:"schema_version"`
Timestamp string `json:"timestamp"`
CustomerID string `json:"customer_id"`
ControllerVersion string `json:"controller_version"`
Checksum string `json:"checksum"` // SHA256 hex of backup.json
}
// WriteLocalInfraBackup writes the infra backup to .felhom-infra-backup/ on each drive.
// Individual drive failures are logged but not returned — the function is best-effort.
func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, timestamp string, drives []string, logger *log.Logger, debug bool) {
if len(drives) == 0 {
logger.Printf("[DEBUG] No drives configured for local infra backup")
return
}
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: payload size=%d bytes, %d target drive(s): %v", len(backupJSON), len(drives), drives)
}
// Compute checksum of backup data
hash := sha256.Sum256(backupJSON)
checksum := hex.EncodeToString(hash[:])
meta := InfraMetadata{
SchemaVersion: 1,
Timestamp: timestamp,
CustomerID: customerID,
ControllerVersion: controllerVersion,
Checksum: checksum,
}
metaJSON, err := json.MarshalIndent(meta, "", " ")
if err != nil {
logger.Printf("[ERROR] Local infra backup: failed to marshal metadata: %v", err)
return
}
written := 0
for _, drive := range drives {
dir := InfraBackupDir(drive)
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: writing to drive=%s, dir=%s", drive, dir)
}
if err := writeInfraToDir(dir, backupJSON, metaJSON, logger); err != nil {
logger.Printf("[WARN] Local infra backup: failed to write to %s: %v", drive, err)
continue
}
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: write OK to %s", drive)
}
written++
}
logger.Printf("[INFO] Local infra backup written to %d/%d drive(s)", written, len(drives))
}
// writeInfraToDir rotates the current backup into history/ then writes new backup.json and metadata.json.
func writeInfraToDir(dir string, backupData, metaData []byte, logger *log.Logger) error {
if err := os.MkdirAll(dir, 0700); err != nil {
return fmt.Errorf("creating dir: %w", err)
}
// Rotate current backup to history (best-effort)
rotateToHistory(dir, logger)
// Write backup.json atomically
backupPath := filepath.Join(dir, "backup.json")
if err := atomicWrite(backupPath, backupData, 0600); err != nil {
return fmt.Errorf("writing backup.json: %w", err)
}
// Write metadata.json atomically
metaPath := filepath.Join(dir, "metadata.json")
if err := atomicWrite(metaPath, metaData, 0600); err != nil {
return fmt.Errorf("writing metadata.json: %w", err)
}
return nil
}
// rotateToHistory moves the current backup.json + metadata.json into history/{timestamp}-*.
func rotateToHistory(dir string, logger *log.Logger) {
metaPath := filepath.Join(dir, "metadata.json")
backupPath := filepath.Join(dir, "backup.json")
// Read current metadata to get timestamp
metaData, err := os.ReadFile(metaPath)
if err != nil {
return // no existing backup to rotate
}
var meta InfraMetadata
if err := json.Unmarshal(metaData, &meta); err != nil {
return
}
// Parse timestamp, fall back to file mtime
ts := sanitizeTimestamp(meta.Timestamp)
if ts == "" {
if fi, err := os.Stat(metaPath); err == nil {
ts = fi.ModTime().UTC().Format("20060102T150405Z")
} else {
ts = time.Now().UTC().Format("20060102T150405Z")
}
}
histDir := filepath.Join(dir, "history")
if err := os.MkdirAll(histDir, 0700); err != nil {
if logger != nil {
logger.Printf("[WARN] Local infra history: cannot create history dir: %v", err)
}
return
}
// Move files
histBackup := filepath.Join(histDir, ts+"-backup.json")
histMeta := filepath.Join(histDir, ts+"-metadata.json")
// Copy rather than rename to avoid cross-device issues
if data, err := os.ReadFile(backupPath); err == nil {
os.WriteFile(histBackup, data, 0600) //nolint:errcheck
}
os.WriteFile(histMeta, metaData, 0600) //nolint:errcheck
// Prune old history entries
pruneLocalHistory(histDir, maxLocalHistory, logger)
}
// pruneLocalHistory keeps at most maxKeep metadata+backup pairs, deleting the oldest.
func pruneLocalHistory(histDir string, maxKeep int, logger *log.Logger) {
entries, err := os.ReadDir(histDir)
if err != nil {
return
}
// Collect unique timestamps (each has -backup.json and -metadata.json)
timestamps := make(map[string]bool)
for _, e := range entries {
name := e.Name()
if strings.HasSuffix(name, "-metadata.json") {
ts := strings.TrimSuffix(name, "-metadata.json")
timestamps[ts] = true
}
}
if len(timestamps) <= maxKeep {
return
}
// Sort timestamps ascending (oldest first)
sorted := make([]string, 0, len(timestamps))
for ts := range timestamps {
sorted = append(sorted, ts)
}
sort.Strings(sorted)
// Delete oldest entries beyond limit
toDelete := len(sorted) - maxKeep
for i := 0; i < toDelete; i++ {
ts := sorted[i]
os.Remove(filepath.Join(histDir, ts+"-backup.json"))
os.Remove(filepath.Join(histDir, ts+"-metadata.json"))
if logger != nil {
logger.Printf("[DEBUG] Local infra history: pruned old version %s", ts)
}
}
if logger != nil && toDelete > 0 {
logger.Printf("[INFO] [backup] Pruning old backup versions: kept %d, removed %d", maxKeep, toDelete)
}
}
// sanitizeTimestamp converts an RFC3339 timestamp to a filename-safe format.
func sanitizeTimestamp(ts string) string {
t, err := time.Parse(time.RFC3339, ts)
if err != nil {
t, err = time.Parse(time.RFC3339Nano, ts)
if err != nil {
return ""
}
}
return t.UTC().Format("20060102T150405Z")
}
// atomicWrite writes data to a .tmp file then renames to the target path.
func atomicWrite(path string, data []byte, perm os.FileMode) error {
tmp := path + ".tmp"
if err := os.WriteFile(tmp, data, perm); err != nil {
os.Remove(tmp)
return err
}
if err := os.Rename(tmp, path); err != nil {
os.Remove(tmp)
return err
}
return nil
}
// ReadLocalInfraBackup reads and validates an infra backup from a mount point.
// Returns the raw backup JSON, metadata, and any error.
func ReadLocalInfraBackup(mountPath string) ([]byte, *InfraMetadata, error) {
dir := InfraBackupDir(mountPath)
return readInfraBackupFromDir(dir)
}
// ReadLocalInfraBackupFromHistory reads a specific historical version by its timestamp prefix.
func ReadLocalInfraBackupFromHistory(mountPath, historyPrefix string) ([]byte, *InfraMetadata, error) {
histDir := InfraBackupHistoryDir(mountPath)
metaPath := filepath.Join(histDir, historyPrefix+"-metadata.json")
backupPath := filepath.Join(histDir, historyPrefix+"-backup.json")
return readInfraBackupFromFiles(backupPath, metaPath)
}
// LocalBackupVersion holds summary info for a historical backup version found on a drive.
type LocalBackupVersion struct {
Timestamp string `json:"timestamp"`
CustomerID string `json:"customer_id"`
ControllerVersion string `json:"controller_version"`
IntegrityOK bool `json:"integrity_ok"`
Error string `json:"error,omitempty"`
StackCount int `json:"stack_count"`
StackNames []string `json:"stack_names,omitempty"`
DiskCount int `json:"disk_count"`
HistoryFile string `json:"history_file,omitempty"` // empty = current, timestamp prefix for history
}
// ReadLocalInfraHistory reads all historical backup versions from a mount point's history/ directory.
// Returns newest-first. Does NOT include the current backup (use ReadLocalInfraBackup for that).
func ReadLocalInfraHistory(mountPath string) []LocalBackupVersion {
histDir := InfraBackupHistoryDir(mountPath)
entries, err := os.ReadDir(histDir)
if err != nil {
return nil
}
// Collect unique timestamps
var timestamps []string
seen := make(map[string]bool)
for _, e := range entries {
name := e.Name()
if strings.HasSuffix(name, "-metadata.json") {
ts := strings.TrimSuffix(name, "-metadata.json")
if !seen[ts] {
seen[ts] = true
timestamps = append(timestamps, ts)
}
}
}
// Sort descending (newest first)
sort.Sort(sort.Reverse(sort.StringSlice(timestamps)))
var versions []LocalBackupVersion
for _, ts := range timestamps {
v := LocalBackupVersion{HistoryFile: ts}
backupPath := filepath.Join(histDir, ts+"-backup.json")
metaPath := filepath.Join(histDir, ts+"-metadata.json")
backupData, meta, err := readInfraBackupFromFiles(backupPath, metaPath)
if meta != nil {
v.Timestamp = meta.Timestamp
v.CustomerID = meta.CustomerID
v.ControllerVersion = meta.ControllerVersion
}
if err != nil {
v.IntegrityOK = false
v.Error = err.Error()
} else {
v.IntegrityOK = true
ParseBackupCounts(backupData, &v.StackCount, &v.StackNames, &v.DiskCount)
}
versions = append(versions, v)
}
return versions
}
// ParseBackupCounts extracts stack/disk counts from backup JSON (for display purposes).
func ParseBackupCounts(backupJSON []byte, stackCount *int, stackNames *[]string, diskCount *int) {
var parsed struct {
DeployedStacks []struct {
Name string `json:"name"`
DisplayName string `json:"display_name"`
} `json:"deployed_stacks"`
DiskLayout struct {
Mounts []json.RawMessage `json:"mounts"`
} `json:"disk_layout"`
}
if err := json.Unmarshal(backupJSON, &parsed); err != nil {
return
}
*stackCount = len(parsed.DeployedStacks)
*diskCount = len(parsed.DiskLayout.Mounts)
if stackNames != nil {
for _, s := range parsed.DeployedStacks {
name := s.DisplayName
if name == "" {
name = s.Name
}
*stackNames = append(*stackNames, name)
}
}
}
func readInfraBackupFromDir(dir string) ([]byte, *InfraMetadata, error) {
metaPath := filepath.Join(dir, "metadata.json")
backupPath := filepath.Join(dir, "backup.json")
return readInfraBackupFromFiles(backupPath, metaPath)
}
func readInfraBackupFromFiles(backupPath, metaPath string) ([]byte, *InfraMetadata, error) {
// Read metadata
metaData, err := os.ReadFile(metaPath)
if err != nil {
return nil, nil, fmt.Errorf("reading metadata.json: %w", err)
}
var meta InfraMetadata
if err := json.Unmarshal(metaData, &meta); err != nil {
return nil, nil, fmt.Errorf("parsing metadata.json: %w", err)
}
// Check schema version
if meta.SchemaVersion > MaxSchemaVersion {
return nil, &meta, fmt.Errorf("backup schema version %d is newer than supported version %d — upgrade the controller", meta.SchemaVersion, MaxSchemaVersion)
}
// Read backup data
backupData, err := os.ReadFile(backupPath)
if err != nil {
return nil, &meta, fmt.Errorf("reading backup.json: %w", err)
}
// Verify checksum
hash := sha256.Sum256(backupData)
actual := hex.EncodeToString(hash[:])
if actual != meta.Checksum {
return nil, &meta, fmt.Errorf("checksum mismatch: expected %s, got %s", meta.Checksum, actual)
}
return backupData, &meta, nil
}
@@ -1,163 +0,0 @@
package backup
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"log"
"os"
"path/filepath"
"testing"
)
func TestWriteAndReadLocalInfraBackup(t *testing.T) {
tmpDir := t.TempDir()
drive := filepath.Join(tmpDir, "mnt", "hdd_0")
if err := os.MkdirAll(drive, 0755); err != nil {
t.Fatal(err)
}
backupJSON := []byte(`{"customer_id":"test-123","domain":"test.hu","controller_version":"v0.21.0","timestamp":"2026-02-21T10:00:00Z"}`)
logger := testLogger(t)
WriteLocalInfraBackup(backupJSON, "test-123", "v0.21.0", "2026-02-21T10:00:00Z", []string{drive}, logger, false)
// Verify files exist
dir := InfraBackupDir(drive)
if _, err := os.Stat(filepath.Join(dir, "backup.json")); err != nil {
t.Fatalf("backup.json not found: %v", err)
}
if _, err := os.Stat(filepath.Join(dir, "metadata.json")); err != nil {
t.Fatalf("metadata.json not found: %v", err)
}
// Read back
data, meta, err := ReadLocalInfraBackup(drive)
if err != nil {
t.Fatalf("ReadLocalInfraBackup failed: %v", err)
}
if string(data) != string(backupJSON) {
t.Errorf("backup data mismatch: got %s", string(data))
}
if meta.SchemaVersion != 1 {
t.Errorf("expected schema version 1, got %d", meta.SchemaVersion)
}
if meta.CustomerID != "test-123" {
t.Errorf("expected customer_id test-123, got %s", meta.CustomerID)
}
if meta.ControllerVersion != "v0.21.0" {
t.Errorf("expected controller version v0.21.0, got %s", meta.ControllerVersion)
}
// Verify checksum
hash := sha256.Sum256(backupJSON)
expected := hex.EncodeToString(hash[:])
if meta.Checksum != expected {
t.Errorf("checksum mismatch: expected %s, got %s", expected, meta.Checksum)
}
}
func TestReadLocalInfraBackup_ChecksumMismatch(t *testing.T) {
tmpDir := t.TempDir()
drive := filepath.Join(tmpDir, "mnt", "hdd_0")
dir := InfraBackupDir(drive)
if err := os.MkdirAll(dir, 0700); err != nil {
t.Fatal(err)
}
// Write valid metadata with wrong checksum
meta := InfraMetadata{SchemaVersion: 1, Checksum: "0000000000000000000000000000000000000000000000000000000000000000"}
metaJSON, _ := json.Marshal(meta)
os.WriteFile(filepath.Join(dir, "metadata.json"), metaJSON, 0600)
os.WriteFile(filepath.Join(dir, "backup.json"), []byte(`{"test":true}`), 0600)
_, _, err := ReadLocalInfraBackup(drive)
if err == nil {
t.Fatal("expected checksum mismatch error")
}
if got := err.Error(); !contains(got, "checksum mismatch") {
t.Errorf("expected checksum mismatch error, got: %s", got)
}
}
func TestReadLocalInfraBackup_SchemaVersionTooNew(t *testing.T) {
tmpDir := t.TempDir()
drive := filepath.Join(tmpDir, "mnt", "hdd_0")
dir := InfraBackupDir(drive)
if err := os.MkdirAll(dir, 0700); err != nil {
t.Fatal(err)
}
meta := InfraMetadata{SchemaVersion: 999}
metaJSON, _ := json.Marshal(meta)
os.WriteFile(filepath.Join(dir, "metadata.json"), metaJSON, 0600)
os.WriteFile(filepath.Join(dir, "backup.json"), []byte(`{}`), 0600)
_, _, err := ReadLocalInfraBackup(drive)
if err == nil {
t.Fatal("expected schema version error")
}
if got := err.Error(); !contains(got, "newer than supported") {
t.Errorf("expected schema version error, got: %s", got)
}
}
func TestReadLocalInfraBackup_MissingFiles(t *testing.T) {
tmpDir := t.TempDir()
_, _, err := ReadLocalInfraBackup(tmpDir)
if err == nil {
t.Fatal("expected error for missing files")
}
}
func TestWriteLocalInfraBackup_MultipleDrives(t *testing.T) {
tmpDir := t.TempDir()
drives := []string{
filepath.Join(tmpDir, "drive1"),
filepath.Join(tmpDir, "drive2"),
filepath.Join(tmpDir, "drive3_fail"), // won't be created as a dir, but MkdirAll should handle it
}
for _, d := range drives {
os.MkdirAll(d, 0755)
}
backupJSON := []byte(`{"test":"multi"}`)
logger := testLogger(t)
WriteLocalInfraBackup(backupJSON, "multi-test", "v1.0", "2026-01-01T00:00:00Z", drives, logger, false)
// All 3 should succeed
for _, d := range drives {
data, _, err := ReadLocalInfraBackup(d)
if err != nil {
t.Errorf("drive %s: read failed: %v", d, err)
continue
}
if string(data) != string(backupJSON) {
t.Errorf("drive %s: data mismatch", d)
}
}
}
func TestWriteLocalInfraBackup_NoDrives(t *testing.T) {
logger := testLogger(t)
// Should not panic
WriteLocalInfraBackup([]byte(`{}`), "test", "v1.0", "2026-01-01T00:00:00Z", nil, logger, false)
}
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsStr(s, substr))
}
func containsStr(s, substr string) bool {
for i := 0; i+len(substr) <= len(s); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}
func testLogger(t *testing.T) *log.Logger {
return log.New(os.Stderr, "[test] ", log.LstdFlags)
}
-42
View File
@@ -1,42 +0,0 @@
package backup
import "path/filepath"
// Keep-side path helpers (FelhomDataDir, PrimaryBackupPath, AppDBDumpPath,
// AppVolumeDumpPath, AppDataDir) now live in internal/appbackup and are
// re-exposed here via aliases/forwarders in appbackup_bridge.go.
// PrimaryResticRepoPath returns the restic repo path on a drive's primary backup.
func PrimaryResticRepoPath(drivePath string) string {
return filepath.Join(drivePath, FelhomDataDir, "backups", "primary", "restic")
}
// SecondaryBackupPath returns the root secondary backup directory for a drive.
func SecondaryBackupPath(drivePath string) string {
return filepath.Join(drivePath, FelhomDataDir, "backups", "secondary")
}
// AppSecondaryRsyncPath returns the rsync destination for an app's secondary backup.
func AppSecondaryRsyncPath(drivePath, stackName string) string {
return filepath.Join(drivePath, FelhomDataDir, "backups", "secondary", stackName, "rsync")
}
// SecondaryResticRepoPath returns the restic repo path on a drive's secondary backup.
func SecondaryResticRepoPath(drivePath string) string {
return filepath.Join(drivePath, FelhomDataDir, "backups", "secondary", "restic")
}
// SecondaryInfraPath returns the infrastructure config mirror directory on a drive's secondary backup.
func SecondaryInfraPath(drivePath string) string {
return filepath.Join(drivePath, FelhomDataDir, "backups", "secondary", "_infra")
}
// InfraBackupDir returns the hidden infra backup directory on a drive.
func InfraBackupDir(mountPath string) string {
return filepath.Join(mountPath, ".felhom-infra-backup")
}
// InfraBackupHistoryDir returns the history subdirectory for versioned infra backups on a drive.
func InfraBackupHistoryDir(mountPath string) string {
return filepath.Join(mountPath, ".felhom-infra-backup", "history")
}
-497
View File
@@ -1,497 +0,0 @@
package backup
import (
"context"
"crypto/rand"
"encoding/base64"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
)
// ResticManager handles restic backup operations.
// All methods accept repoPath as parameter to support per-drive repos.
type ResticManager struct {
passwordFile string
logger *log.Logger
customerID string
cacheDir string
debug bool
}
// SnapshotResult holds the outcome of a restic backup.
type SnapshotResult struct {
SnapshotID string
FilesNew int
FilesChanged int
DataAdded string
Duration time.Duration
}
// SnapshotInfo holds information about a restic snapshot.
type SnapshotInfo struct {
ID string `json:"short_id"`
Time time.Time `json:"time"`
Paths []string `json:"paths"`
Tags []string `json:"tags"`
RepoPath string `json:"-"` // set by caller for multi-repo aggregation
Tier int `json:"tier"` // 1 = primary, 2 = secondary
DriveLabel string `json:"drive_label"` // filled by caller from settings
Source string `json:"source"` // "restic" or "rsync"
}
// RepoStats holds repository statistics.
type RepoStats struct {
TotalSize string
TotalSizeBytes int64
SnapshotCount int
LatestSnapshot *SnapshotInfo
}
// NewResticManager creates a new restic manager.
func NewResticManager(cfg *config.Config, logger *log.Logger) *ResticManager {
return &ResticManager{
passwordFile: cfg.Backup.ResticPasswordFile,
logger: logger,
customerID: cfg.Customer.ID,
cacheDir: filepath.Join(cfg.Paths.DataDir, "restic-cache"),
}
}
// SetDebug enables or disables debug logging.
func (r *ResticManager) SetDebug(debug bool) {
r.debug = debug
}
// EnsureInitialized checks if the restic repo exists and initializes it if not.
// Also auto-generates the password file if missing.
func (r *ResticManager) EnsureInitialized(repoPath string) error {
if r.debug {
r.logger.Printf("[DEBUG] [restic] EnsureInitialized: repoPath=%s, passwordFile=%s", repoPath, r.passwordFile)
}
// Ensure password file exists
if _, err := os.Stat(r.passwordFile); os.IsNotExist(err) {
if err := r.generatePassword(); err != nil {
return fmt.Errorf("generating restic password: %w", err)
}
}
// Ensure cache dir exists
os.MkdirAll(r.cacheDir, 0700)
// Check if repo is already initialized
configPath := filepath.Join(repoPath, "config")
if _, err := os.Stat(configPath); err == nil {
r.logger.Printf("[INFO] [backup] Restic repo already initialized at %s", repoPath)
return nil
}
// Ensure repo directory exists
if err := os.MkdirAll(repoPath, 0700); err != nil {
return fmt.Errorf("creating repo dir: %w", err)
}
// Initialize repo
r.logger.Printf("[INFO] [backup] Initializing restic repository at %s", repoPath)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
cmd := r.command(ctx, repoPath, "init")
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("restic init failed: %v — %s", err, truncate(string(out), 200))
}
r.logger.Printf("[INFO] [backup] Restic repository initialized successfully")
return nil
}
// Snapshot creates a new backup snapshot of the given paths.
func (r *ResticManager) Snapshot(repoPath string, paths []string, tags []string) (*SnapshotResult, error) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
start := time.Now()
if r.debug {
r.logger.Printf("[DEBUG] [restic] Snapshot: repo=%s, paths=%v, tags=%v", repoPath, paths, tags)
}
args := []string{"backup", "--json"}
for _, tag := range tags {
args = append(args, "--tag", tag)
}
args = append(args, "--host", r.customerID)
// Only include paths that exist
var existingPaths []string
for _, p := range paths {
if _, err := os.Stat(p); err == nil {
existingPaths = append(existingPaths, p)
} else {
r.logger.Printf("[WARN] [backup] Backup path does not exist, skipping: %s", p)
}
}
if len(existingPaths) == 0 {
return nil, fmt.Errorf("no backup paths exist")
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] Snapshot: %d/%d paths exist, backing up: %v", len(existingPaths), len(paths), existingPaths)
}
args = append(args, existingPaths...)
cmd := r.command(ctx, repoPath, args...)
out, err := cmd.Output()
if err != nil {
// Check for stale lock — restic writes lock errors to stderr, not stdout
errStr := string(out)
if exitErr, ok := err.(*exec.ExitError); ok {
errStr += string(exitErr.Stderr)
}
if strings.Contains(errStr, "lock") || strings.Contains(errStr, "locked") {
r.logger.Printf("[WARN] [backup] Restic repo locked — attempting unlock")
unlockCmd := r.command(ctx, repoPath, "unlock")
if unlockErr := unlockCmd.Run(); unlockErr != nil {
r.logger.Printf("[WARN] [backup] Restic unlock failed: %v", unlockErr)
}
// Retry once with a fresh context (H9 fix — original may be nearly expired).
retryCtx, retryCancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer retryCancel()
cmd = r.command(retryCtx, repoPath, args...)
out, err = cmd.Output()
if err != nil {
return nil, fmt.Errorf("restic backup failed after unlock: %v", err)
}
} else {
return nil, fmt.Errorf("restic backup failed: %v", err)
}
}
result := &SnapshotResult{
Duration: time.Since(start),
}
// Parse JSON output — look for the summary line
for _, line := range strings.Split(string(out), "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
var msg struct {
MessageType string `json:"message_type"`
FilesNew int `json:"files_new"`
FilesChanged int `json:"files_changed"`
DataAdded int64 `json:"data_added"`
SnapshotID string `json:"snapshot_id"`
}
if err := json.Unmarshal([]byte(line), &msg); err != nil {
continue
}
if msg.MessageType == "summary" {
result.SnapshotID = msg.SnapshotID
result.FilesNew = msg.FilesNew
result.FilesChanged = msg.FilesChanged
result.DataAdded = humanizeBytes(msg.DataAdded)
}
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] Snapshot: completed in %s, snapshotID=%s, filesNew=%d, filesChanged=%d, dataAdded=%s",
result.Duration, result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded)
}
r.logger.Printf("[INFO] [backup] Restic snapshot complete for %s", repoPath)
return result, nil
}
// Prune removes old snapshots according to retention policy.
func (r *ResticManager) Prune(repoPath string, retention config.RetentionConfig) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] Prune: repo=%s, keepDaily=%d, keepWeekly=%d, keepMonthly=%d",
repoPath, retention.KeepDaily, retention.KeepWeekly, retention.KeepMonthly)
}
start := time.Now()
args := []string{
"forget",
"--keep-daily", fmt.Sprintf("%d", retention.KeepDaily),
"--keep-weekly", fmt.Sprintf("%d", retention.KeepWeekly),
"--keep-monthly", fmt.Sprintf("%d", retention.KeepMonthly),
"--prune",
}
cmd := r.command(ctx, repoPath, args...)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("restic forget/prune failed: %v — %s", err, truncate(string(out), 200))
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] Prune: completed in %s, output=%d bytes", time.Since(start), len(out))
}
r.logger.Printf("[INFO] [backup] Restic prune completed for %s", repoPath)
return nil
}
// Check verifies repository integrity.
func (r *ResticManager) Check(repoPath string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] Check: repo=%s", repoPath)
}
start := time.Now()
cmd := r.command(ctx, repoPath, "check")
out, err := cmd.CombinedOutput()
if err != nil {
if r.debug {
r.logger.Printf("[DEBUG] [restic] Check: failed after %s, output=%s", time.Since(start), truncate(string(out), 300))
}
return fmt.Errorf("restic check failed: %v — %s", err, truncate(string(out), 200))
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] Check: repo=%s OK, completed in %s", repoPath, time.Since(start))
}
r.logger.Printf("[INFO] [backup] Restic check passed for repo %s", repoPath)
return nil
}
// ListSnapshots returns all snapshots, newest first, limited to N entries.
func (r *ResticManager) ListSnapshots(repoPath string, limit int) ([]SnapshotInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] ListSnapshots: repo=%s, limit=%d", repoPath, limit)
}
cmd := r.command(ctx, repoPath, "snapshots", "--json")
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("restic snapshots failed: %v", err)
}
var snapshots []SnapshotInfo
if err := json.Unmarshal(out, &snapshots); err != nil {
return nil, fmt.Errorf("parsing snapshot JSON: %v", err)
}
// Reverse for newest first
for i, j := 0, len(snapshots)-1; i < j; i, j = i+1, j-1 {
snapshots[i], snapshots[j] = snapshots[j], snapshots[i]
}
if limit > 0 && len(snapshots) > limit {
snapshots = snapshots[:limit]
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] ListSnapshots: repo=%s, found %d total snapshots, returning %d",
repoPath, len(snapshots), len(snapshots))
}
return snapshots, nil
}
// LatestSnapshot returns the most recent snapshot info.
func (r *ResticManager) LatestSnapshot(repoPath string) (*SnapshotInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] LatestSnapshot: repo=%s", repoPath)
}
cmd := r.command(ctx, repoPath, "snapshots", "--latest", "1", "--json")
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("restic snapshots failed: %v", err)
}
var snapshots []SnapshotInfo
if err := json.Unmarshal(out, &snapshots); err != nil {
return nil, fmt.Errorf("parsing snapshot JSON: %v", err)
}
if len(snapshots) == 0 {
if r.debug {
r.logger.Printf("[DEBUG] [restic] LatestSnapshot: repo=%s, no snapshots found", repoPath)
}
return nil, nil
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] LatestSnapshot: repo=%s, id=%s, time=%s, paths=%v",
repoPath, snapshots[0].ID, snapshots[0].Time.Format(time.RFC3339), snapshots[0].Paths)
}
return &snapshots[0], nil
}
// Stats returns repository statistics.
func (r *ResticManager) Stats(repoPath string) (*RepoStats, error) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] Stats: repo=%s", repoPath)
}
start := time.Now()
stats := &RepoStats{}
// Get repo size
cmd := r.command(ctx, repoPath, "stats", "--json")
out, err := cmd.Output()
if err == nil {
var raw struct {
TotalSize uint64 `json:"total_size"`
}
if json.Unmarshal(out, &raw) == nil {
stats.TotalSizeBytes = int64(raw.TotalSize)
stats.TotalSize = humanizeBytes(stats.TotalSizeBytes)
}
}
// Count snapshots
cmd = r.command(ctx, repoPath, "snapshots", "--json")
out, err = cmd.Output()
if err == nil {
var snapshots []SnapshotInfo
if json.Unmarshal(out, &snapshots) == nil {
stats.SnapshotCount = len(snapshots)
if len(snapshots) > 0 {
latest := snapshots[len(snapshots)-1]
stats.LatestSnapshot = &latest
}
}
}
if r.debug {
latestID := "none"
if stats.LatestSnapshot != nil {
latestID = stats.LatestSnapshot.ID
}
r.logger.Printf("[DEBUG] [restic] Stats: repo=%s, totalSize=%s, snapshots=%d, latest=%s, took %s",
repoPath, stats.TotalSize, stats.SnapshotCount, latestID, time.Since(start))
}
return stats, nil
}
// GetPassword reads and returns the restic repository password.
func (r *ResticManager) GetPassword() (string, error) {
data, err := os.ReadFile(r.passwordFile)
if err != nil {
return "", fmt.Errorf("reading restic password: %w", err)
}
return strings.TrimSpace(string(data)), nil
}
// RestoreAppData restores specific paths from a restic snapshot.
func (r *ResticManager) RestoreAppData(repoPath string, snapshotID string, paths []string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
args := []string{
"restore", snapshotID,
"--target", "/",
}
for _, p := range paths {
args = append(args, "--include", p)
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] RestoreAppData: repo=%s, snapshot=%s, %d include paths=%v",
repoPath, snapshotID, len(paths), paths)
}
start := time.Now()
r.logger.Printf("[INFO] [backup] Restore started: repo=%s, snapshot=%s, paths=%v", repoPath, snapshotID, paths)
cmd := r.command(ctx, repoPath, args...)
output, err := cmd.CombinedOutput()
if err != nil {
r.logger.Printf("[ERROR] [backup] Restore failed: %v, output: %s", err, truncate(string(output), 500))
return fmt.Errorf("restic restore failed: %w", err)
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] RestoreAppData: completed in %s, output=%d bytes", time.Since(start), len(output))
}
r.logger.Printf("[INFO] [backup] Restore completed: snapshot=%s, paths=%v", snapshotID, paths)
return nil
}
// RepoExists checks if a restic repo is initialized at the given path.
func (r *ResticManager) RepoExists(repoPath string) bool {
exists := false
_, err := os.Stat(filepath.Join(repoPath, "config"))
exists = err == nil
if r.debug {
r.logger.Printf("[DEBUG] [restic] RepoExists: repo=%s, exists=%v", repoPath, exists)
}
return exists
}
// UnlockCommand returns an exec.Cmd that runs restic unlock on the given repo.
func (r *ResticManager) UnlockCommand(ctx context.Context, repoPath string) *exec.Cmd {
return r.command(ctx, repoPath, "unlock")
}
func (r *ResticManager) command(ctx context.Context, repoPath string, args ...string) *exec.Cmd {
if r.debug {
r.logger.Printf("[DEBUG] [restic] command: restic %s (repo=%s)", strings.Join(args, " "), repoPath)
}
cmd := exec.CommandContext(ctx, "restic", args...)
cmd.Env = append(os.Environ(),
"RESTIC_REPOSITORY="+repoPath,
"RESTIC_PASSWORD_FILE="+r.passwordFile,
"RESTIC_CACHE_DIR="+r.cacheDir,
)
return cmd
}
func (r *ResticManager) generatePassword() error {
// Ensure directory exists
dir := filepath.Dir(r.passwordFile)
if err := os.MkdirAll(dir, 0700); err != nil {
return fmt.Errorf("creating password dir: %w", err)
}
// Generate 32 random bytes, base64url-encode
b := make([]byte, 32)
if _, err := rand.Read(b); err != nil {
return fmt.Errorf("generating random bytes: %w", err)
}
password := base64.URLEncoding.EncodeToString(b)
if err := os.WriteFile(r.passwordFile, []byte(password), 0600); err != nil {
return fmt.Errorf("writing password file: %w", err)
}
r.logger.Printf("[INFO] [backup] Generated new restic repository password at %s", r.passwordFile)
r.logger.Printf("[WARN] [backup] Save this password externally — losing it means losing access to ALL backups")
return nil
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
+16 -295
View File
@@ -5,27 +5,24 @@ import (
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"time"
)
// snapshotIDRe validates restic snapshot IDs: 8-64 lowercase hex characters.
var snapshotIDRe = regexp.MustCompile(`^[0-9a-f]{8,64}$`)
// RestoreApp restores an app from a restic snapshot.
// All apps get config + DB dump restored. Apps with HDD data also get user data restored.
// RestoreApp restores an app's data from its on-disk app-data backup.
//
// Disk-tier (restic snapshot) restore has moved to the host agent. This keep-side
// restore re-imports the Docker-volume tar dumps that the app-data backup produced
// (AppVolumeDumpPath) and relies on the DB dumps already present on the app's drive.
// The stack is stopped before the volume import and restarted after.
//
// snapshotID is retained for API/UI signature compatibility; with restic removed it
// is only used for logging (the source of truth is now the on-disk volume tars).
func (m *Manager) RestoreApp(stackName, snapshotID string) error {
if m.stackProvider == nil {
return fmt.Errorf("stack provider not configured")
}
// Validate snapshot ID format
if !snapshotIDRe.MatchString(snapshotID) {
return fmt.Errorf("invalid snapshot ID: must be 8-64 lowercase hex characters")
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: stack=%s, snapshotID=%s", stackName, snapshotID)
}
@@ -44,87 +41,24 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
m.mu.Unlock()
}()
// Determine what to restore
hddMounts := m.stackProvider.GetStackHDDMounts(stackName)
hasHDD := len(hddMounts) > 0
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: %s has %d HDD mount(s), hasHDD=%v", stackName, len(hddMounts), hasHDD)
}
// Build list of paths to restore from the snapshot
var restorePaths []string
// Always restore the stack's config dir (compose + app.yaml + .felhom.yml)
composePath, ok := m.stackProvider.GetStackComposePath(stackName)
if ok {
stackDir := filepath.Dir(composePath)
restorePaths = append(restorePaths, stackDir)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: will restore config dir: %s", stackDir)
}
}
// Restore DB dump files for this stack (per-drive path)
drivePath := m.GetAppDrivePath(stackName)
dumpDir := AppDBDumpPath(drivePath, stackName)
restorePaths = append(restorePaths, dumpDir)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: will restore DB dump dir: %s", dumpDir)
if drivePath == "" {
return fmt.Errorf("cannot determine drive path for %s", stackName)
}
// Restore HDD data (always included for apps that have it — backup is mandatory)
if hasHDD {
restorePaths = append(restorePaths, hddMounts...)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: will restore HDD data: %v", hddMounts)
}
}
// Restore Docker volume dumps (if present in snapshot)
volDumpDir := AppVolumeDumpPath(drivePath, stackName)
restorePaths = append(restorePaths, volDumpDir)
if len(restorePaths) == 0 {
return fmt.Errorf("no restorable paths found for %s", stackName)
}
// Use the app's primary restic repo
repoPath := PrimaryResticRepoPath(drivePath)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: using repo=%s, %d restore path(s)", repoPath, len(restorePaths))
}
m.logger.Printf("[INFO] [backup] Starting restore for %s (snapshot=%s, repo=%s, paths=%v, hasHDD=%v)",
stackName, snapshotID, repoPath, restorePaths, hasHDD)
m.logger.Printf("[INFO] [backup] Starting app-data restore for %s (drive=%s)", stackName, drivePath)
// Stop the app before restore
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 1/4 — stopping app %s", stackName)
m.logger.Printf("[DEBUG] RestoreApp: step 1/3 — stopping app %s", stackName)
}
if err := m.stackProvider.StopStack(stackName); err != nil {
m.logger.Printf("[WARN] RESTORE could not stop %s: %v (proceeding anyway)", stackName, err)
}
// Execute restore via restic
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 2/4 — restoring data from snapshot %s", snapshotID)
}
if err := m.restic.RestoreAppData(repoPath, snapshotID, restorePaths); err != nil {
m.logger.Printf("[ERROR] RESTORE failed for %s: %v", stackName, err)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 3/4 — restarting app %s after failure", stackName)
}
if startErr := m.stackProvider.StartStack(stackName); startErr != nil {
m.logger.Printf("[WARN] RESTORE could not restart %s after failure: %v", stackName, startErr)
}
return err
}
// Populate Docker volumes from restored tars
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 3/5 — restoring Docker volumes for %s", stackName)
m.logger.Printf("[DEBUG] RestoreApp: step 2/3 — restoring Docker volumes for %s", stackName)
}
if err := m.restoreDockerVolumes(stackName, drivePath); err != nil {
m.logger.Printf("[WARN] RESTORE volume restore failed for %s: %v (continuing)", stackName, err)
@@ -132,7 +66,7 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
// Restart the app
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 4/5 — restarting app %s after successful restore", stackName)
m.logger.Printf("[DEBUG] RestoreApp: step 3/3 — restarting app %s after restore", stackName)
}
if err := m.stackProvider.StartStack(stackName); err != nil {
m.logger.Printf("[WARN] RESTORE could not restart %s after restore: %v", stackName, err)
@@ -143,219 +77,7 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
m.logger.Printf("[WARN] [backup] Restore completed but app health check failed: %v", err)
}
hasVolumes := len(m.stackProvider.GetDockerVolumes(stackName)) > 0
restoreType := "config+DB"
if hasHDD || hasVolumes {
restoreType = "full (config+DB+userdata)"
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 5/5 — restore completed, type=%s", restoreType)
}
m.logger.Printf("[INFO] RESTORE completed: stack=%s, snapshot=%s, type=%s", stackName, snapshotID, restoreType)
return nil
}
// RestoreAppFromTier2 restores an app from its cross-drive rsync backup mirror.
func (m *Manager) RestoreAppFromTier2(stackName string) error {
if m.stackProvider == nil {
return fmt.Errorf("stack provider not configured")
}
if m.settings == nil {
return fmt.Errorf("settings not available")
}
cdCfg := m.settings.GetCrossDriveConfig(stackName)
if cdCfg == nil || !cdCfg.Enabled {
return fmt.Errorf("cross-drive backup not configured for %s", stackName)
}
rsyncDir := AppSecondaryRsyncPath(cdCfg.DestinationPath, stackName)
if _, err := os.Stat(rsyncDir); os.IsNotExist(err) {
return fmt.Errorf("Tier 2 backup directory not found: %s", rsyncDir)
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreAppFromTier2: stack=%s, rsyncDir=%s", stackName, rsyncDir)
}
// Prevent concurrent operations
m.mu.Lock()
if m.running {
m.mu.Unlock()
return fmt.Errorf("backup or restore already in progress")
}
m.running = true
m.mu.Unlock()
defer func() {
m.mu.Lock()
m.running = false
m.mu.Unlock()
}()
hddMounts := m.stackProvider.GetStackHDDMounts(stackName)
hasHDD := len(hddMounts) > 0
drivePath := m.GetAppDrivePath(stackName)
m.logger.Printf("[INFO] [backup] Starting Tier 2 restore for %s from %s", stackName, rsyncDir)
// Step 1: Stop the app
if err := m.stackProvider.StopStack(stackName); err != nil {
m.logger.Printf("[WARN] RESTORE could not stop %s: %v (proceeding anyway)", stackName, err)
}
// Step 2: Restore config from _config/
configSrc := filepath.Join(rsyncDir, "_config") + "/"
if _, err := os.Stat(filepath.Join(rsyncDir, "_config")); err == nil {
if composePath, ok := m.stackProvider.GetStackComposePath(stackName); ok {
configDst := filepath.Dir(composePath) + "/"
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreAppFromTier2: rsync config %s → %s", configSrc, configDst)
}
cmd := exec.Command("rsync", "-a", "--delete", configSrc, configDst)
if out, err := cmd.CombinedOutput(); err != nil {
m.logger.Printf("[ERROR] [backup] Tier 2 config restore failed for %s: %v (%s)", stackName, err, strings.TrimSpace(string(out)))
// Try to restart and return error
m.stackProvider.StartStack(stackName)
return fmt.Errorf("config restore failed: %w", err)
}
}
}
// Step 3: Restore HDD data
if hasHDD {
// Check for data directory structure — single mount vs multi-mount
if len(hddMounts) == 1 {
// Single mount: data is directly in rsyncDir (excluding _* dirs)
src := strings.TrimRight(rsyncDir, "/") + "/"
dst := strings.TrimRight(hddMounts[0], "/") + "/"
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreAppFromTier2: rsync HDD data %s → %s", src, dst)
}
cmd := exec.Command("rsync", "-a", "--delete",
"--exclude", "_*",
src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
m.logger.Printf("[ERROR] [backup] Tier 2 HDD data restore failed for %s: %v (%s)", stackName, err, strings.TrimSpace(string(out)))
m.stackProvider.StartStack(stackName)
return fmt.Errorf("HDD data restore failed: %w", err)
}
} else {
// Multiple mounts: each has a subdirectory named by leaf
for _, mount := range hddMounts {
leaf := filepath.Base(mount)
src := filepath.Join(rsyncDir, leaf) + "/"
dst := strings.TrimRight(mount, "/") + "/"
if _, err := os.Stat(filepath.Join(rsyncDir, leaf)); os.IsNotExist(err) {
m.logger.Printf("[WARN] [backup] Tier 2 restore: no backup data for mount %s", mount)
continue
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreAppFromTier2: rsync HDD mount %s → %s", src, dst)
}
cmd := exec.Command("rsync", "-a", "--delete", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
m.logger.Printf("[ERROR] [backup] Tier 2 HDD restore failed for mount %s: %v (%s)", mount, err, strings.TrimSpace(string(out)))
m.stackProvider.StartStack(stackName)
return fmt.Errorf("HDD restore failed for %s: %w", mount, err)
}
}
}
}
// Step 4: Restore DB dumps from _db/
dbSrc := filepath.Join(rsyncDir, "_db")
if _, err := os.Stat(dbSrc); err == nil {
dbDst := AppDBDumpPath(drivePath, stackName)
if err := os.MkdirAll(dbDst, 0755); err == nil {
entries, _ := os.ReadDir(dbSrc)
for _, e := range entries {
if !e.IsDir() {
src := filepath.Join(dbSrc, e.Name())
dst := filepath.Join(dbDst, e.Name())
if err := copyFile(src, dst); err != nil {
m.logger.Printf("[WARN] [backup] Failed to copy DB dump %s: %v", e.Name(), err)
}
}
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreAppFromTier2: restored DB dumps from %s", dbSrc)
}
}
}
// Step 5: Restore Docker volumes from _volumes/
volSrc := filepath.Join(rsyncDir, "_volumes")
if _, err := os.Stat(volSrc); err == nil {
if err := m.restoreDockerVolumesFromDir(stackName, volSrc); err != nil {
m.logger.Printf("[WARN] [backup] Tier 2 volume restore failed for %s: %v (continuing)", stackName, err)
}
}
// Step 6: Restart the app
if err := m.stackProvider.StartStack(stackName); err != nil {
m.logger.Printf("[WARN] RESTORE could not restart %s after Tier 2 restore: %v", stackName, err)
}
// Verify app started successfully
if err := m.waitForHealthy(stackName, 90*time.Second); err != nil {
m.logger.Printf("[WARN] [backup] Tier 2 restore completed but app health check failed: %v", err)
}
hasVolumes := len(m.stackProvider.GetDockerVolumes(stackName)) > 0
restoreType := "config+DB"
if hasHDD || hasVolumes {
restoreType = "full (config+DB+userdata)"
}
m.logger.Printf("[INFO] RESTORE (Tier 2) completed: stack=%s, type=%s", stackName, restoreType)
return nil
}
// restoreDockerVolumesFromDir populates Docker volumes from tar files in an arbitrary directory.
// Used by Tier 2 restore where volume tars are in the rsync mirror's _volumes/ dir.
func (m *Manager) restoreDockerVolumesFromDir(stackName, dumpDir string) error {
entries, err := os.ReadDir(dumpDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("reading volume dump dir: %w", err)
}
var restored int
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".tar") {
continue
}
volName := strings.TrimSuffix(entry.Name(), ".tar")
m.logger.Printf("[INFO] [backup] Restoring Docker volume %s for %s (Tier 2)", volName, stackName)
exec.Command("docker", "volume", "rm", "-f", volName).Run()
if out, err := exec.Command("docker", "volume", "create", volName).CombinedOutput(); err != nil {
m.logger.Printf("[WARN] [backup] Failed to create volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err)
continue
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
"-v", volName+":/vol",
"-v", dumpDir+":/in:ro",
"alpine", "tar", "xf", "/in/"+entry.Name(), "-C", "/vol")
out, err := cmd.CombinedOutput()
cancel()
if err != nil {
m.logger.Printf("[WARN] [backup] Failed to populate volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err)
continue
}
restored++
}
if restored > 0 {
m.logger.Printf("[INFO] [backup] Restored %d Docker volume(s) for %s (Tier 2)", restored, stackName)
}
m.logger.Printf("[INFO] RESTORE completed: stack=%s", stackName)
return nil
}
@@ -416,7 +138,6 @@ func (m *Manager) restoreDockerVolumes(stackName, drivePath string) error {
// waitForHealthy waits for a stack to reach running state after restore.
// Forces a docker ps refresh on each poll to avoid stale state.
// Acceptable overhead for a rare operation (restore).
func (m *Manager) waitForHealthy(stackName string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
interval := 5 * time.Second
@@ -1,246 +0,0 @@
//go:build linux
package backup
import (
"context"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
// RestoreAppFromBackup restores a single app from its cross-drive backup.
// Steps: restore config → verify/restore data → copy DB dumps → docker compose up.
func RestoreAppFromBackup(ctx context.Context, app *RestorableApp, stacksDir string, logger *log.Logger) error {
stackDir := filepath.Join(stacksDir, app.Name)
start := time.Now()
logger.Printf("[DEBUG] [backup] RestoreAppFromBackup: app=%s, stackDir=%s, hasConfig=%v, hasData=%v, hasDBDump=%v, hasRsyncData=%v",
app.Name, stackDir, app.HasConfig, app.HasData, app.HasDBDump, app.HasRsyncData)
// Step 1: Restore stack config from _config/ backup
if app.HasConfig {
logger.Printf("[INFO] Restoring config for %s from %s", app.Name, app.ConfigPath)
stepStart := time.Now()
if err := restoreConfigDir(ctx, app.ConfigPath, stackDir); err != nil {
return fmt.Errorf("restoring config: %w", err)
}
logger.Printf("[DEBUG] [backup] RestoreAppFromBackup: config restore for %s completed in %s", app.Name, time.Since(stepStart))
} else {
// No config backup — check if stack dir already exists (from catalog sync)
if !dirExists(stackDir) {
return fmt.Errorf("no config backup and no stack directory for %s", app.Name)
}
logger.Printf("[INFO] No config backup for %s — using existing stack dir", app.Name)
}
// Step 2: Verify app data on HDD (common case: HDD survived, data is intact)
if app.NeedsHDD && !app.HasData && app.HasRsyncData {
// App data is missing but rsync backup exists — restore it
logger.Printf("[INFO] Restoring user data for %s from rsync backup", app.Name)
stepStart := time.Now()
if err := restoreUserData(ctx, app, logger); err != nil {
logger.Printf("[WARN] User data restore failed for %s: %v", app.Name, err)
// Non-fatal: app might still start without all data
} else {
logger.Printf("[DEBUG] [backup] RestoreAppFromBackup: user data restore for %s completed in %s", app.Name, time.Since(stepStart))
}
} else if app.HasData {
logger.Printf("[INFO] App data for %s found at %s — no restore needed", app.Name, app.DataPath)
} else {
logger.Printf("[DEBUG] [backup] RestoreAppFromBackup: %s — no user data to restore (needsHDD=%v, hasData=%v, hasRsyncData=%v)",
app.Name, app.NeedsHDD, app.HasData, app.HasRsyncData)
}
// Step 3: Copy DB dumps to primary backup location
if app.HasDBDump {
logger.Printf("[INFO] Restoring DB dumps for %s", app.Name)
stepStart := time.Now()
if err := restoreDBDumps(app, logger); err != nil {
logger.Printf("[WARN] DB dump restore failed for %s: %v", app.Name, err)
// Non-fatal
} else {
logger.Printf("[DEBUG] [backup] RestoreAppFromBackup: DB dump restore for %s completed in %s", app.Name, time.Since(stepStart))
}
}
// Step 4: Docker compose pull + up
composePath := filepath.Join(stackDir, "docker-compose.yml")
if !fileExistsCheck(composePath) {
composePath = filepath.Join(stackDir, "compose.yml")
if !fileExistsCheck(composePath) {
return fmt.Errorf("no compose file found in %s", stackDir)
}
}
composeDir := filepath.Dir(composePath)
logger.Printf("[DEBUG] [backup] RestoreAppFromBackup: %s using compose file %s", app.Name, composePath)
logger.Printf("[INFO] Pulling images for %s", app.Name)
pullStart := time.Now()
pullCmd := exec.CommandContext(ctx, "docker", "compose", "-f", composePath, "pull")
pullCmd.Dir = composeDir
if out, err := pullCmd.CombinedOutput(); err != nil {
logger.Printf("[WARN] docker compose pull failed for %s: %v (%s)", app.Name, err, strings.TrimSpace(string(out)))
// Non-fatal: might work with cached images
} else {
logger.Printf("[DEBUG] [backup] RestoreAppFromBackup: docker compose pull for %s completed in %s", app.Name, time.Since(pullStart))
}
logger.Printf("[INFO] Starting %s", app.Name)
upStart := time.Now()
upCmd := exec.CommandContext(ctx, "docker", "compose", "-f", composePath, "up", "-d")
upCmd.Dir = composeDir
if out, err := upCmd.CombinedOutput(); err != nil {
return fmt.Errorf("docker compose up: %v (%s)", err, strings.TrimSpace(string(out)))
}
logger.Printf("[DEBUG] [backup] RestoreAppFromBackup: %s fully restored and started in %s", app.Name, time.Since(start))
logger.Printf("[DEBUG] [backup] RestoreAppFromBackup: docker compose up for %s completed in %s", app.Name, time.Since(upStart))
return nil
}
// restoreConfigDir rsyncs the backed-up _config/ directory to the stack directory.
func restoreConfigDir(ctx context.Context, configBackupDir, stackDir string) error {
if err := os.MkdirAll(stackDir, 0755); err != nil {
return fmt.Errorf("creating stack dir: %w", err)
}
src := strings.TrimRight(configBackupDir, "/") + "/"
dst := strings.TrimRight(stackDir, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("rsync config: %v (%s)", err, strings.TrimSpace(string(out)))
}
return nil
}
// restoreUserData rsyncs user data from cross-drive backup back to the app's HDD path.
func restoreUserData(ctx context.Context, app *RestorableApp, logger *log.Logger) error {
if app.RsyncDataPath == "" || app.HDDPath == "" {
return fmt.Errorf("no rsync data path or HDD path")
}
logger.Printf("[DEBUG] [backup] restoreUserData: app=%s, rsyncPath=%s, hddPath=%s", app.Name, app.RsyncDataPath, app.HDDPath)
// The rsync backup contains the app's data directories.
// Walk the backup dir and rsync each subdirectory (excluding _config/_db)
// back to the app's HDD data directory.
entries, err := os.ReadDir(app.RsyncDataPath)
if err != nil {
return err
}
dataDir := AppDataDir(app.HDDPath, app.Name)
logger.Printf("[DEBUG] [backup] restoreUserData: %s — target dataDir=%s, %d entries in backup", app.Name, dataDir, len(entries))
if err := os.MkdirAll(dataDir, 0755); err != nil {
return fmt.Errorf("creating data dir: %w", err)
}
restored := 0
for _, e := range entries {
name := e.Name()
if name == "_config" || name == "_db" || strings.HasPrefix(name, ".") {
continue
}
src := filepath.Join(app.RsyncDataPath, name)
dst := filepath.Join(dataDir, name)
if e.IsDir() {
src = strings.TrimRight(src, "/") + "/"
if err := os.MkdirAll(dst, 0755); err != nil {
logger.Printf("[ERROR] [backup] Cannot create %s: %v", dst, err)
continue
}
dst = strings.TrimRight(dst, "/") + "/"
logger.Printf("[DEBUG] [backup] restoreUserData: %s — rsync dir %s → %s", app.Name, src, dst)
cmd := exec.CommandContext(ctx, "rsync", "-a", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
logger.Printf("[ERROR] [backup] rsync data %s: %v (%s)", name, err, strings.TrimSpace(string(out)))
} else {
restored++
}
} else {
// Single file — copy directly
data, err := os.ReadFile(src)
if err != nil {
logger.Printf("[ERROR] [backup] Cannot read %s: %v", src, err)
continue
}
logger.Printf("[DEBUG] [backup] restoreUserData: %s — copying file %s (%d bytes)", app.Name, name, len(data))
if err := os.WriteFile(dst, data, 0644); err != nil {
logger.Printf("[ERROR] [backup] Cannot write %s: %v", dst, err)
} else {
restored++
}
}
}
logger.Printf("[DEBUG] [backup] restoreUserData: %s — restored %d items", app.Name, restored)
return nil
}
// restoreDBDumps copies DB dump files from cross-drive backup to the primary dump dir.
func restoreDBDumps(app *RestorableApp, logger *log.Logger) error {
if app.DBDumpPath == "" {
return nil
}
// Use HDDPath for apps with HDD data, fall back to DrivePath (system data path)
// for SSD-only apps whose DB dumps live under the system drive.
drivePath := app.HDDPath
if drivePath == "" {
drivePath = app.DrivePath
}
if drivePath == "" {
logger.Printf("[WARN] Cannot restore DB dumps for %s: no drive path", app.Name)
return nil
}
destDir := AppDBDumpPath(drivePath, app.Name)
logger.Printf("[DEBUG] [backup] restoreDBDumps: app=%s, src=%s, destDir=%s", app.Name, app.DBDumpPath, destDir)
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("creating dump dir: %w", err)
}
entries, err := os.ReadDir(app.DBDumpPath)
if err != nil {
return err
}
copied := 0
for _, e := range entries {
if e.IsDir() {
continue
}
src := filepath.Join(app.DBDumpPath, e.Name())
dst := filepath.Join(destDir, e.Name())
data, err := os.ReadFile(src)
if err != nil {
logger.Printf("[ERROR] [backup] Cannot read dump %s: %v", e.Name(), err)
continue
}
logger.Printf("[DEBUG] [backup] restoreDBDumps: %s — copying %s (%d bytes)", app.Name, e.Name(), len(data))
if err := os.WriteFile(dst, data, 0644); err != nil {
logger.Printf("[ERROR] [backup] Cannot write dump %s: %v", e.Name(), err)
} else {
copied++
}
}
logger.Printf("[DEBUG] [backup] restoreDBDumps: %s — copied %d dump files", app.Name, copied)
return nil
}
// fileExistsCheck returns true if path exists and is a file.
func fileExistsCheck(path string) bool {
info, err := os.Stat(path)
return err == nil && !info.IsDir()
}
@@ -1,13 +0,0 @@
//go:build !linux
package backup
import (
"context"
"log"
)
// RestoreAppFromBackup is a no-op on non-Linux platforms.
func RestoreAppFromBackup(ctx context.Context, app *RestorableApp, stacksDir string, logger *log.Logger) error {
return nil
}
@@ -1,325 +0,0 @@
//go:build linux
package backup
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
)
// MountDrivesFromLayout scans block devices for disks matching the stored
// disk layout and mounts them using the felhom two-layer mount pattern
// (raw mount → bind mount).
//
// The controller container runs privileged with:
// - /host-dev mounted from host /dev
// - /host-fstab mounted from host /etc/fstab
// - /mnt with rshared propagation
//
// Returns the list of successfully mounted final mount paths.
func MountDrivesFromLayout(ctx context.Context, layout DiskLayout, logger *log.Logger) ([]string, error) {
if len(layout.Mounts) == 0 {
logger.Printf("[DEBUG] [backup] MountDrivesFromLayout: no mounts in layout, nothing to do")
return nil, nil
}
logger.Printf("[DEBUG] [backup] MountDrivesFromLayout: processing %d mount entries from disk layout", len(layout.Mounts))
// Get current block devices with UUIDs
uuidToDevice, err := scanBlockDeviceUUIDs(ctx)
if err != nil {
return nil, fmt.Errorf("scanning block devices: %w", err)
}
logger.Printf("[DEBUG] [backup] MountDrivesFromLayout: discovered %d block devices with UUIDs", len(uuidToDevice))
for uuid, dev := range uuidToDevice {
uuidShort := uuid
if len(uuidShort) > 12 {
uuidShort = uuidShort[:12]
}
logger.Printf("[DEBUG] [backup] MountDrivesFromLayout: device %s → UUID=%s...", dev, uuidShort)
}
var mounted []string
for _, dm := range layout.Mounts {
if dm.UUID == "" {
logger.Printf("[DEBUG] [backup] MountDrivesFromLayout: skipping mount entry with empty UUID (label=%s)", dm.Label)
continue
}
logger.Printf("[DEBUG] [backup] MountDrivesFromLayout: processing %s (UUID=%s, mountPoint=%s, rawMount=%s, fsType=%s)",
dm.Label, dm.UUID, dm.MountPoint, dm.RawMount, dm.FSType)
// Find matching device by UUID
device := uuidToDevice[dm.UUID]
if device == "" {
logger.Printf("[WARN] Disk UUID %s (%s) not found — drive may be missing or disconnected",
dm.UUID, dm.Label)
continue
}
// Check if already mounted
finalMount := dm.MountPoint
if isMountedPath(finalMount) {
logger.Printf("[INFO] %s already mounted at %s", dm.Label, finalMount)
mounted = append(mounted, finalMount)
continue
}
if dm.RawMount != "" && isMountedPath(dm.RawMount) {
logger.Printf("[INFO] %s raw mount already at %s", dm.Label, dm.RawMount)
mounted = append(mounted, finalMount)
continue
}
uuidShort := dm.UUID
if len(uuidShort) > 12 {
uuidShort = uuidShort[:12]
}
logger.Printf("[INFO] Found disk %s (UUID=%s, label=%s) — mounting to %s",
device, uuidShort, dm.Label, finalMount)
// Mount using the appropriate pattern
if dm.RawMount != "" && dm.BindSubdir != "" {
// Two-layer HDD mount: raw → bind
logger.Printf("[DEBUG] [backup] MountDrivesFromLayout: %s — two-layer mount (raw=%s, bindSubdir=%s)",
dm.Label, dm.RawMount, dm.BindSubdir)
if err := mountRawAndBind(ctx, device, dm, logger); err != nil {
logger.Printf("[ERROR] Failed to mount %s: %v", dm.Label, err)
continue
}
} else {
// Simple direct mount (e.g., sys_drive)
logger.Printf("[DEBUG] [backup] MountDrivesFromLayout: %s — direct mount to %s", dm.Label, dm.MountPoint)
if err := mountDirect(ctx, device, dm, logger); err != nil {
logger.Printf("[ERROR] Failed to mount %s: %v", dm.Label, err)
continue
}
}
// Update host fstab so mount persists across reboots
if err := addDRFstabEntries(dm, logger); err != nil {
logger.Printf("[WARN] Failed to update fstab for %s: %v — mount works but won't persist", dm.Label, err)
}
mounted = append(mounted, finalMount)
logger.Printf("[INFO] Successfully mounted %s at %s", dm.Label, finalMount)
}
logger.Printf("[DEBUG] [backup] MountDrivesFromLayout: done — %d/%d drives mounted", len(mounted), len(layout.Mounts))
return mounted, nil
}
// scanBlockDeviceUUIDs runs lsblk + blkid to build a UUID -> device path map.
func scanBlockDeviceUUIDs(ctx context.Context) (map[string]string, error) {
// First try lsblk with UUID output
out, err := exec.CommandContext(ctx, "lsblk", "-J", "-o", "NAME,UUID,FSTYPE,MOUNTPOINT").Output()
if err != nil {
return nil, fmt.Errorf("lsblk failed: %w", err)
}
var parsed struct {
BlockDevices []struct {
Name string `json:"name"`
UUID *string `json:"uuid"`
FSType *string `json:"fstype"`
Mount *string `json:"mountpoint"`
Children []struct {
Name string `json:"name"`
UUID *string `json:"uuid"`
FSType *string `json:"fstype"`
Mount *string `json:"mountpoint"`
} `json:"children"`
} `json:"blockdevices"`
}
if err := json.Unmarshal(out, &parsed); err != nil {
return nil, fmt.Errorf("lsblk parse failed: %w", err)
}
devices := make(map[string]string) // UUID → /dev/path
for _, dev := range parsed.BlockDevices {
if dev.UUID != nil && *dev.UUID != "" {
devices[*dev.UUID] = "/dev/" + dev.Name
}
for _, child := range dev.Children {
if child.UUID != nil && *child.UUID != "" {
devices[*child.UUID] = "/dev/" + child.Name
}
}
}
// If lsblk didn't return UUIDs (common inside containers), enrich via blkid
if len(devices) == 0 {
// Try blkid on /host-dev devices
blkOut, err := exec.CommandContext(ctx, "blkid").Output()
if err == nil {
for _, line := range strings.Split(string(blkOut), "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// Parse: /dev/sdb1: UUID="277a2179-..." TYPE="ext4" ...
colonIdx := strings.Index(line, ":")
if colonIdx < 0 {
continue
}
devPath := line[:colonIdx]
if uuidIdx := strings.Index(line, `UUID="`); uuidIdx >= 0 {
rest := line[uuidIdx+6:]
if endIdx := strings.Index(rest, `"`); endIdx >= 0 {
uuid := rest[:endIdx]
devices[uuid] = devPath
}
}
}
}
}
return devices, nil
}
// mountDirect creates a simple direct mount.
func mountDirect(ctx context.Context, device string, dm DiskMount, logger *log.Logger) error {
if err := os.MkdirAll(dm.MountPoint, 0755); err != nil {
return fmt.Errorf("creating mount point: %w", err)
}
// Use host device path if available
devPath := hostDevPath(device)
logger.Printf("[DEBUG] [backup] mountDirect: mount -t %s -o noatime %s %s", dm.FSType, devPath, dm.MountPoint)
cmd := exec.CommandContext(ctx, "mount", "-t", dm.FSType, "-o", "noatime", devPath, dm.MountPoint)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("mount %s: %s: %w", devPath, strings.TrimSpace(string(out)), err)
}
logger.Printf("[DEBUG] [backup] mountDirect: %s mounted successfully at %s", devPath, dm.MountPoint)
return nil
}
// mountRawAndBind implements the two-layer felhom mount pattern.
func mountRawAndBind(ctx context.Context, device string, dm DiskMount, logger *log.Logger) error {
// Layer 1: raw mount
if err := os.MkdirAll(dm.RawMount, 0755); err != nil {
return fmt.Errorf("creating raw mount point: %w", err)
}
devPath := hostDevPath(device)
logger.Printf("[DEBUG] [backup] mountRawAndBind: layer 1 — mount -t %s -o noatime %s %s", dm.FSType, devPath, dm.RawMount)
cmd := exec.CommandContext(ctx, "mount", "-t", dm.FSType, "-o", "noatime", devPath, dm.RawMount)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("raw mount %s -> %s: %s: %w", devPath, dm.RawMount, strings.TrimSpace(string(out)), err)
}
logger.Printf("[DEBUG] [backup] mountRawAndBind: layer 1 OK — %s mounted at %s", devPath, dm.RawMount)
// Layer 2: bind mount (subdir -> final mount point)
bindSrc := filepath.Join(dm.RawMount, dm.BindSubdir)
if err := os.MkdirAll(bindSrc, 0755); err != nil {
return fmt.Errorf("creating bind source dir: %w", err)
}
if err := os.MkdirAll(dm.MountPoint, 0755); err != nil {
return fmt.Errorf("creating final mount point: %w", err)
}
logger.Printf("[DEBUG] [backup] mountRawAndBind: layer 2 — mount --bind %s %s", bindSrc, dm.MountPoint)
cmd = exec.CommandContext(ctx, "mount", "--bind", bindSrc, dm.MountPoint)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("bind mount %s -> %s: %s: %w", bindSrc, dm.MountPoint, strings.TrimSpace(string(out)), err)
}
logger.Printf("[DEBUG] [backup] mountRawAndBind: layer 2 OK — %s bound to %s", bindSrc, dm.MountPoint)
return nil
}
// addDRFstabEntries adds fstab entries so mounts persist across host reboots.
func addDRFstabEntries(dm DiskMount, logger *log.Logger) error {
const fstabPath = "/host-fstab"
logger.Printf("[INFO] [backup] Adding fstab entries for disaster recovery (%s, UUID=%s)", dm.Label, dm.UUID)
logger.Printf("[DEBUG] [backup] addDRFstabEntries: checking fstab for %s (UUID=%s)", dm.Label, dm.UUID)
data, err := os.ReadFile(fstabPath)
if err != nil {
return fmt.Errorf("reading fstab: %w", err)
}
content := string(data)
// Skip if UUID already in fstab (idempotent)
if strings.Contains(content, dm.UUID) {
logger.Printf("[DEBUG] [backup] addDRFstabEntries: UUID %s already in fstab — skipping", dm.UUID)
return nil
}
var additions strings.Builder
additions.WriteString("\n# Restored by felhom-controller DR\n")
entryCount := 0
if dm.RawMount != "" {
// Raw mount entry
additions.WriteString(fmt.Sprintf("UUID=%s\t%s\t%s\t%s\t0 2\n",
dm.UUID, dm.RawMount, dm.FSType, dm.FstabOptions))
entryCount++
}
if dm.BindSubdir != "" && dm.RawMount != "" {
// Bind mount entry
additions.WriteString(fmt.Sprintf("%s/%s\t%s\tnone\tbind,nofail\t0 0\n",
dm.RawMount, dm.BindSubdir, dm.MountPoint))
entryCount++
} else if dm.RawMount == "" {
// Direct mount entry (no bind)
additions.WriteString(fmt.Sprintf("UUID=%s\t%s\t%s\t%s\t0 2\n",
dm.UUID, dm.MountPoint, dm.FSType, dm.FstabOptions))
entryCount++
}
newContent := content + additions.String()
// Write atomically (try rename, fallback to direct write for bind-mounted fstab)
tmpPath := fstabPath + ".tmp"
if err := os.WriteFile(tmpPath, []byte(newContent), 0644); err != nil {
return fmt.Errorf("writing fstab tmp: %w", err)
}
if err := os.Rename(tmpPath, fstabPath); err != nil {
os.Remove(tmpPath)
// Fallback: direct write (bind-mounted files can't be renamed)
if err := os.WriteFile(fstabPath, []byte(newContent), 0644); err != nil {
return fmt.Errorf("writing fstab: %w", err)
}
}
logger.Printf("[INFO] [backup] Added %d fstab entries for %s", entryCount, dm.Label)
return nil
}
// isMountedPath checks if a path is currently a mount point via /proc/mounts.
func isMountedPath(path string) bool {
if path == "" {
return false
}
data, err := os.ReadFile("/proc/mounts")
if err != nil {
return false
}
cleanPath := filepath.Clean(path)
for _, line := range strings.Split(string(data), "\n") {
fields := strings.Fields(line)
if len(fields) >= 2 && filepath.Clean(fields[1]) == cleanPath {
return true
}
}
return false
}
// hostDevPath converts /dev/xxx to /host-dev/xxx for container access.
func hostDevPath(devPath string) string {
if strings.HasPrefix(devPath, "/dev/") {
return "/host-dev/" + strings.TrimPrefix(devPath, "/dev/")
}
return devPath
}
@@ -1,13 +0,0 @@
//go:build !linux
package backup
import (
"context"
"log"
)
// MountDrivesFromLayout is a no-op on non-Linux platforms.
func MountDrivesFromLayout(ctx context.Context, layout DiskLayout, logger *log.Logger) ([]string, error) {
return nil, nil
}
-310
View File
@@ -1,310 +0,0 @@
package backup
import (
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
)
// RestorableApp describes an app that can be restored during DR.
type RestorableApp struct {
Name string `json:"name"`
DisplayName string `json:"display_name"`
NeedsHDD bool `json:"needs_hdd"`
HDDPath string `json:"hdd_path,omitempty"`
// What was found on disk
HasConfig bool `json:"has_config"` // _config/ dir with compose files
ConfigPath string `json:"config_path"` // full path to _config/ backup
HasData bool `json:"has_data"` // app data dir exists on HDD
DataPath string `json:"data_path"` // e.g., /mnt/hdd_1/appdata/immich
HasDBDump bool `json:"has_db_dump"` // _db/ dir with dump files
DBDumpPath string `json:"db_dump_path"` // full path to _db/ backup
HasRsyncData bool `json:"has_rsync_data"` // rsync user data (excl _config/_db)
RsyncDataPath string `json:"rsync_data_path"` // full path to rsync backup
DrivePath string `json:"drive_path"` // which drive has the backup
DriveLabel string `json:"drive_label"` // label for display
// Restore progress (updated during restore)
Status string `json:"status"` // "pending", "restoring", "done", "failed", "skipped"
Error string `json:"error,omitempty"`
StartedAt string `json:"started_at,omitempty"`
CompletedAt string `json:"completed_at,omitempty"`
}
// RestorePlan holds the complete DR restore plan.
type RestorePlan struct {
mu sync.RWMutex
CustomerID string `json:"customer_id"`
Domain string `json:"domain"`
Timestamp string `json:"timestamp"` // when the infra backup was taken
Apps []RestorableApp `json:"apps"`
// Drive summary
Drives []DriveInfo `json:"drives"`
// Overall status
Status string `json:"status"` // "pending", "restoring", "done"
}
// DriveInfo summarizes a mounted drive for display.
type DriveInfo struct {
Path string `json:"path"`
Label string `json:"label"`
Available bool `json:"available"` // mount is accessible
HasBackup bool `json:"has_backup"` // has backups/secondary/ dir
}
// GetApps returns a snapshot of the apps list.
func (rp *RestorePlan) GetApps() []RestorableApp {
rp.mu.RLock()
defer rp.mu.RUnlock()
apps := make([]RestorableApp, len(rp.Apps))
copy(apps, rp.Apps)
return apps
}
// Snapshot returns a thread-safe snapshot of the plan for JSON serialization.
func (rp *RestorePlan) Snapshot() map[string]interface{} {
rp.mu.RLock()
defer rp.mu.RUnlock()
apps := make([]RestorableApp, len(rp.Apps))
copy(apps, rp.Apps)
drives := make([]DriveInfo, len(rp.Drives))
copy(drives, rp.Drives)
return map[string]interface{}{
"ok": true,
"status": rp.Status,
"apps": apps,
"drives": drives,
}
}
// TryStartRestore atomically sets status to "restoring" if not already restoring.
// Returns false if a restore is already in progress (prevents double-restore race).
func (rp *RestorePlan) TryStartRestore() bool {
rp.mu.Lock()
defer rp.mu.Unlock()
if rp.Status == "restoring" {
return false
}
rp.Status = "restoring"
return true
}
// SetStatus sets the overall plan status under lock.
func (rp *RestorePlan) SetStatus(status string) {
rp.mu.Lock()
defer rp.mu.Unlock()
rp.Status = status
}
// GetStatus returns the current plan status under lock.
func (rp *RestorePlan) GetStatus() string {
rp.mu.RLock()
defer rp.mu.RUnlock()
return rp.Status
}
// UpdateApp updates a single app's status in the plan.
func (rp *RestorePlan) UpdateApp(name, status, errMsg string) {
rp.mu.Lock()
defer rp.mu.Unlock()
for i := range rp.Apps {
if rp.Apps[i].Name == name {
rp.Apps[i].Status = status
rp.Apps[i].Error = errMsg
if status == "restoring" {
rp.Apps[i].StartedAt = time.Now().UTC().Format(time.RFC3339)
}
if status == "done" || status == "failed" {
rp.Apps[i].CompletedAt = time.Now().UTC().Format(time.RFC3339)
}
return
}
}
}
// AllDone returns true if all apps are done/failed/skipped.
// Returns false for empty plans (no apps to restore).
func (rp *RestorePlan) AllDone() bool {
rp.mu.RLock()
defer rp.mu.RUnlock()
if len(rp.Apps) == 0 {
return false
}
for _, app := range rp.Apps {
if app.Status != "done" && app.Status != "failed" && app.Status != "skipped" {
return false
}
}
return true
}
// InfraStackInfo is a minimal stack descriptor from the Hub infra backup.
// Used to pass deployed_stacks info into the scan without importing report.
type InfraStackInfo struct {
Name string
DisplayName string
HDDPath string
NeedsHDD bool
}
// ScanDrivesForBackups scans mounted drives for cross-drive backup data
// and correlates with the deployed stacks manifest from the Hub.
func ScanDrivesForBackups(mountedPaths []string, stacks []InfraStackInfo, logger *log.Logger) *RestorePlan {
plan := &RestorePlan{
Status: "pending",
}
logger.Printf("[DEBUG] [backup] ScanDrivesForBackups: scanning %d mount paths, %d stacks from manifest",
len(mountedPaths), len(stacks))
// Build drive info and find backup directories
type driveBackup struct {
drivePath string
label string
secPath string // backups/secondary/ path
}
var backupDrives []driveBackup
for _, mp := range mountedPaths {
label := filepath.Base(mp)
avail := dirExists(mp)
di := DriveInfo{
Path: mp,
Label: label,
Available: avail,
}
logger.Printf("[DEBUG] [backup] ScanDrivesForBackups: checking drive %s (label=%s, available=%v)", mp, label, avail)
secPath := SecondaryBackupPath(mp)
if dirExists(secPath) {
di.HasBackup = true
backupDrives = append(backupDrives, driveBackup{
drivePath: mp,
label: label,
secPath: secPath,
})
logger.Printf("[INFO] Found backup data on %s (%s)", mp, secPath)
}
plan.Drives = append(plan.Drives, di)
}
logger.Printf("[DEBUG] [backup] ScanDrivesForBackups: found %d drives with backup data", len(backupDrives))
// For each stack from the manifest, look for backup data on drives
for _, stack := range stacks {
app := RestorableApp{
Name: stack.Name,
DisplayName: stack.DisplayName,
NeedsHDD: stack.NeedsHDD,
HDDPath: stack.HDDPath,
Status: "pending",
}
logger.Printf("[DEBUG] [backup] ScanDrivesForBackups: scanning for app %s (needsHDD=%v, hddPath=%s)",
stack.Name, stack.NeedsHDD, stack.HDDPath)
// Check if app data exists directly on HDD (common case: HDD survived)
if stack.HDDPath != "" {
dataDir := AppDataDir(stack.HDDPath, stack.Name)
if dirExists(dataDir) {
app.HasData = true
app.DataPath = dataDir
logger.Printf("[DEBUG] [backup] ScanDrivesForBackups: %s — live data found at %s", stack.Name, dataDir)
}
}
// Scan each drive for cross-drive backup of this app
for _, db := range backupDrives {
rsyncBase := AppSecondaryRsyncPath(db.drivePath, stack.Name)
if !dirExists(rsyncBase) {
continue
}
// Found a backup for this app
app.DrivePath = db.drivePath
app.DriveLabel = db.label
logger.Printf("[DEBUG] [backup] ScanDrivesForBackups: %s — backup found on drive %s at %s",
stack.Name, db.label, rsyncBase)
// Check for _config/ (stack compose directory backup)
configDir := filepath.Join(rsyncBase, "_config")
if dirExists(configDir) {
app.HasConfig = true
app.ConfigPath = configDir
}
// Check for _db/ (database dump backup)
dbDir := filepath.Join(rsyncBase, "_db")
if dirExists(dbDir) && !dirIsEmpty(dbDir) {
app.HasDBDump = true
app.DBDumpPath = dbDir
}
// Check for user data in rsync (anything besides _config and _db)
if hasUserData(rsyncBase) {
app.HasRsyncData = true
app.RsyncDataPath = rsyncBase
}
logger.Printf("[DEBUG] [backup] ScanDrivesForBackups: %s — config=%v, dbDump=%v, rsyncData=%v",
stack.Name, app.HasConfig, app.HasDBDump, app.HasRsyncData)
break // use first drive with backup for this app
}
plan.Apps = append(plan.Apps, app)
}
if len(plan.Apps) == 0 {
plan.Apps = []RestorableApp{}
}
logger.Printf("[INFO] Restore plan: %d apps, %d drives (%d with backups)",
len(plan.Apps), len(plan.Drives), len(backupDrives))
return plan
}
// dirExists checks if a directory exists and is accessible.
func dirExists(path string) bool {
info, err := os.Stat(path)
return err == nil && info.IsDir()
}
// dirIsEmpty returns true if a directory has no entries.
// Returns false on read errors (assume non-empty — safer for backup detection).
func dirIsEmpty(path string) bool {
entries, err := os.ReadDir(path)
if err != nil {
return false
}
return len(entries) == 0
}
// hasUserData checks if the rsync backup dir has user data (not just _config/_db).
func hasUserData(rsyncBase string) bool {
entries, err := os.ReadDir(rsyncBase)
if err != nil {
return false
}
for _, e := range entries {
name := e.Name()
if name != "_config" && name != "_db" && !strings.HasPrefix(name, ".") {
return true
}
}
return false
}