feat: drive migration & Tier 2 restic deprecation (v0.18.0)

Phase 1: Deprecate restic as Tier 2 method (rsync only), auto-migrate on startup
Phase 2: Enhanced per-app migration with backup awareness, DB dump copy, auto-cleanup
Phase 3: Full drive migration with decommissioned state, rollback support, wizard UI
Phase 4: Hub report includes decommissioned drive state

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 21:49:14 +01:00
parent bdbe170a54
commit 99bf3ca7a8
22 changed files with 1725 additions and 402 deletions
+136 -88
View File
@@ -43,6 +43,10 @@ type Manager struct {
// AfterBackup is called after a backup completes to refresh the cache.
// Set by main.go to avoid circular import with scheduler.
AfterBackup func()
// MigrationActiveCheck returns true if a full drive migration is in progress.
// Set by main.go to coordinate with DriveMigrator.
MigrationActiveCheck func() bool
}
// SnapshotRecord combines restic snapshot metadata with our run stats.
@@ -243,12 +247,17 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
for _, db := range dbs {
drivePath := m.GetAppDrivePath(db.StackName)
// Skip if drive is disconnected
// Skip if drive is disconnected or decommissioned
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
m.logger.Printf("[WARN] Skipping DB dump for %s — drive disconnected: %s", db.StackName, drivePath)
summary = append(summary, fmt.Sprintf("SKIP %s (drive disconnected)", db.ContainerName))
continue
}
if m.settings != nil && m.settings.IsDecommissioned(drivePath) {
m.logger.Printf("[WARN] Skipping DB dump for %s — drive decommissioned: %s", db.StackName, drivePath)
summary = append(summary, fmt.Sprintf("SKIP %s (drive decommissioned)", db.ContainerName))
continue
}
dumpDir := AppDBDumpPath(drivePath, db.StackName)
@@ -319,6 +328,12 @@ func (m *Manager) RunBackup(ctx context.Context) error {
// runBackupInternal is the implementation of RunBackup. Caller must hold the running flag.
func (m *Manager) runBackupInternal(ctx context.Context) error {
// Skip if a full drive migration is in progress
if m.MigrationActiveCheck != nil && m.MigrationActiveCheck() {
m.logger.Printf("[WARN] Skipping nightly backup — drive migration in progress")
return nil
}
start := time.Now()
m.logger.Printf("[INFO] Starting restic backup (per-drive)")
@@ -339,68 +354,14 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
driveCount := 0
for drivePath, stacks := range driveStacks {
// Skip disconnected drives
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
m.logger.Printf("[WARN] Skipping backup for drive %s — disconnected", drivePath)
continue
}
repoPath := PrimaryResticRepoPath(drivePath)
// Ensure repo is initialized
if err := m.restic.EnsureInitialized(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
anyErr = err
continue
}
// Build paths for this drive
var paths []string
paths = append(paths, infraPaths...)
for _, stack := range stacks {
// App data (appdata/<stack>/)
appData := AppDataDir(drivePath, stack.Name)
if _, err := os.Stat(appData); err == nil {
paths = append(paths, appData)
}
// HDD mounts (for apps with custom mount points)
if m.stackProvider != nil {
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
if _, err := os.Stat(mount); err == nil {
paths = append(paths, mount)
}
}
}
// DB dumps for this stack
dumpDir := AppDBDumpPath(drivePath, stack.Name)
if _, err := os.Stat(dumpDir); err == nil {
paths = append(paths, dumpDir)
}
}
// Deduplicate paths
paths = dedup(paths)
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
result, err := m.restic.Snapshot(repoPath, paths, tags)
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
if err != nil {
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
anyErr = err
continue
}
lastResult = result
driveCount++
// Prune check (weekly — Sunday)
if shouldPrune(m.cfg.Backup.PruneSchedule) {
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
}
if result != nil {
lastResult = result
driveCount++
}
}
@@ -463,6 +424,120 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
return anyErr
}
// backupDrive runs restic backup for a single drive. Returns nil result if skipped.
// Caller must hold the running flag.
func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []StackSummary, infraPaths []string) (*SnapshotResult, error) {
// Skip disconnected or decommissioned drives
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
m.logger.Printf("[WARN] Skipping backup for drive %s — disconnected", drivePath)
return nil, nil
}
if m.settings != nil && m.settings.IsDecommissioned(drivePath) {
m.logger.Printf("[WARN] Skipping backup for drive %s — decommissioned", drivePath)
return nil, nil
}
repoPath := PrimaryResticRepoPath(drivePath)
// Ensure repo is initialized
if err := m.restic.EnsureInitialized(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
return nil, err
}
// Build paths for this drive
var paths []string
paths = append(paths, infraPaths...)
for _, stack := range stacks {
// App data (appdata/<stack>/)
appData := AppDataDir(drivePath, stack.Name)
if _, err := os.Stat(appData); err == nil {
paths = append(paths, appData)
}
// HDD mounts (for apps with custom mount points)
if m.stackProvider != nil {
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
if _, err := os.Stat(mount); err == nil {
paths = append(paths, mount)
}
}
}
// DB dumps for this stack
dumpDir := AppDBDumpPath(drivePath, stack.Name)
if _, err := os.Stat(dumpDir); err == nil {
paths = append(paths, dumpDir)
}
}
// Deduplicate paths
paths = dedup(paths)
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
result, err := m.restic.Snapshot(repoPath, paths, tags)
if err != nil {
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
return nil, err
}
// Prune check (weekly — Sunday)
if shouldPrune(m.cfg.Backup.PruneSchedule) {
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
}
}
return result, nil
}
// tryAcquireRunning attempts to set the running flag without blocking.
// Returns true if acquired, false if already running.
func (m *Manager) tryAcquireRunning() bool {
m.mu.Lock()
defer m.mu.Unlock()
if m.running {
return false
}
m.running = true
return true
}
// TryRunDriveBackup runs a backup for a single drive if no other backup is in progress.
// Returns error if the backup lock cannot be acquired or if backup fails.
func (m *Manager) TryRunDriveBackup(ctx context.Context, drivePath string) error {
if !m.tryAcquireRunning() {
return fmt.Errorf("backup already in progress")
}
defer m.releaseRunning()
driveStacks := m.groupStacksByDrive()
stacks, ok := driveStacks[drivePath]
if !ok || len(stacks) == 0 {
m.logger.Printf("[INFO] No deployed stacks on drive %s — skipping backup", drivePath)
return nil
}
infraPaths := []string{
m.cfg.Paths.StacksDir,
"/opt/docker/felhom-controller/controller.yaml",
}
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
if err != nil {
return err
}
if result != nil {
m.logger.Printf("[INFO] Single-drive backup for %s: snapshot %s, %d new, %d changed, %s added",
drivePath, result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded)
}
return nil
}
// RunIntegrityCheck runs restic check on all primary repos and pings healthchecks.
func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
m.logger.Printf("[INFO] Starting restic integrity check")
@@ -596,13 +671,12 @@ func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) {
return allSnapshots, nil
}
// ListAllSnapshots returns snapshots from both primary and secondary restic repos.
// Primary snapshots get Tier=1, secondary snapshots get Tier=2.
// ListAllSnapshots returns snapshots from primary restic repos across all active drives.
// All snapshots get Tier=1.
func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
drives := m.activeDrives()
var allSnapshots []SnapshotInfo
// Tier 1: primary repos (same as ListSnapshots)
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
@@ -620,32 +694,6 @@ func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
allSnapshots = append(allSnapshots, snapshots...)
}
// Tier 2: secondary restic repos on cross-drive destinations
if m.settings != nil {
destPaths := make(map[string]bool)
for _, cfg := range m.settings.GetAllCrossDriveConfigs() {
if cfg != nil && cfg.Method == "restic" && cfg.DestinationPath != "" {
destPaths[cfg.DestinationPath] = true
}
}
for destPath := range destPaths {
repoPath := SecondaryResticRepoPath(destPath)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
if err != nil {
m.logger.Printf("[WARN] Could not list secondary snapshots from %s: %v", repoPath, err)
continue
}
for i := range snapshots {
snapshots[i].RepoPath = repoPath
snapshots[i].Tier = 2
}
allSnapshots = append(allSnapshots, snapshots...)
}
}
// Sort newest first
sort.Slice(allSnapshots, func(i, j int) bool {
return allSnapshots[i].Time.After(allSnapshots[j].Time)
+23 -120
View File
@@ -102,8 +102,8 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
})
start := time.Now()
r.logger.Printf("[INFO] Cross-drive backup starting: %s → %s (method: %s)",
stackName, cfg.DestinationPath, cfg.Method)
r.logger.Printf("[INFO] Cross-drive backup starting: %s → %s (rsync)",
stackName, cfg.DestinationPath)
// Trigger fresh DB dump for this app before cross-drive backup
if r.dbDumper != nil {
@@ -130,15 +130,7 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
}
}
var runErr error
switch cfg.Method {
case "rsync":
runErr = r.runRsyncBackup(ctx, stackName, cfg.DestinationPath, mounts)
case "restic":
runErr = r.runResticBackup(ctx, stackName, cfg.DestinationPath, mounts)
default:
runErr = fmt.Errorf("unknown backup method: %s", cfg.Method)
}
runErr := r.runRsyncBackup(ctx, stackName, cfg.DestinationPath, mounts)
duration := time.Since(start)
@@ -150,11 +142,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
// Calculate backup size
var sizeHuman string
if cfg.Method == "rsync" {
destDir := AppSecondaryRsyncPath(cfg.DestinationPath, stackName)
if sz, err := dirSizeBytes(destDir); err == nil {
sizeHuman = humanizeBytes(sz)
}
destDir := AppSecondaryRsyncPath(cfg.DestinationPath, stackName)
if sz, err := dirSizeBytes(destDir); err == nil {
sizeHuman = humanizeBytes(sz)
}
r.logger.Printf("[INFO] Cross-drive backup completed: %s (%s)", stackName, duration.Round(time.Second))
@@ -209,6 +199,18 @@ func (r *CrossDriveRunner) IsRunning(stackName string) bool {
return r.running[stackName]
}
// AnyRunning returns true if any cross-drive backup is currently in progress.
func (r *CrossDriveRunner) AnyRunning() bool {
r.mu.Lock()
defer r.mu.Unlock()
for _, running := range r.running {
if running {
return true
}
}
return false
}
// ValidateDestination checks that the destination path exists, is writable,
// and has sufficient free space. System-drive destinations get stricter limits
// (≥10 GB free, <90% used) to protect OS stability; external drives just need
@@ -217,6 +219,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error {
if path == "" {
return fmt.Errorf("destination path is empty")
}
if r.sett.IsDecommissioned(path) {
return fmt.Errorf("destination %s is decommissioned — choose an active drive", path)
}
if _, err := os.Stat(path); os.IsNotExist(err) {
return fmt.Errorf("destination %s does not exist", path)
}
@@ -326,108 +331,6 @@ func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBa
return nil
}
// --- restic ---
func (r *CrossDriveRunner) runResticBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
repoPath := SecondaryResticRepoPath(destBase)
// Get or create the cross-drive restic password
password, err := r.sett.GetOrCreateCrossDrivePassword()
if err != nil {
return fmt.Errorf("getting restic password: %w", err)
}
// H6: Write password to temp file with safe cleanup order (close before deferred remove).
pwFile, err := os.CreateTemp("", "felhom-crossdrive-pw-*")
if err != nil {
return fmt.Errorf("creating password file: %w", err)
}
pwPath := pwFile.Name()
if _, err := pwFile.WriteString(password); err != nil {
pwFile.Close()
os.Remove(pwPath)
return fmt.Errorf("writing password file: %w", err)
}
pwFile.Close()
defer os.Remove(pwPath)
// Ensure repo is initialized
if err := r.ensureResticRepo(ctx, repoPath, pwPath); err != nil {
return err
}
// Run restic backup
args := []string{
"backup", "--repo", repoPath,
"--password-file", pwPath,
"--tag", stackName,
"--tag", "cross-drive",
}
// Include user data (HDD mounts)
args = append(args, mounts...)
// Include app config dir (compose + app.yaml + .felhom.yml)
if composePath, ok := r.stackProvider.GetStackComposePath(stackName); ok {
args = append(args, filepath.Dir(composePath))
}
// Include DB dump dir for this app (from its home drive)
appDrive := r.getAppDrivePath(stackName)
dumpDir := AppDBDumpPath(appDrive, stackName)
if _, err := os.Stat(dumpDir); err == nil {
args = append(args, dumpDir)
}
// Include infrastructure paths (same as primary restic)
args = append(args, r.stacksDir)
if _, err := os.Stat(r.controllerYAMLPath); err == nil {
args = append(args, r.controllerYAMLPath)
}
cmd := exec.CommandContext(ctx, "restic", args...)
r.logger.Printf("[DEBUG] restic backup: %v", args)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("restic backup failed: %v (%s)", err, strings.TrimSpace(string(out)))
}
// H5: Prune old snapshots to prevent unbounded accumulation.
return r.pruneResticRepo(ctx, repoPath, pwPath)
}
// pruneResticRepo forgets old snapshots in a cross-drive restic repo, keeping recent ones.
func (r *CrossDriveRunner) pruneResticRepo(ctx context.Context, repoPath, pwPath string) error {
args := []string{
"forget", "--repo", repoPath,
"--password-file", pwPath,
"--keep-daily", "7",
"--keep-weekly", "4",
"--prune",
}
cmd := exec.CommandContext(ctx, "restic", args...)
r.logger.Printf("[DEBUG] restic forget (prune): %s", repoPath)
if out, err := cmd.CombinedOutput(); err != nil {
// Non-fatal: log warning but don't fail the backup
r.logger.Printf("[WARN] restic forget failed for %s: %v (%s)", repoPath, err, strings.TrimSpace(string(out)))
}
return nil
}
func (r *CrossDriveRunner) ensureResticRepo(ctx context.Context, repoPath, pwFile string) error {
// Check if repo config exists
if _, err := os.Stat(filepath.Join(repoPath, "config")); err == nil {
return nil // already initialized
}
if err := os.MkdirAll(repoPath, 0755); err != nil {
return fmt.Errorf("creating restic repo dir: %w", err)
}
cmd := exec.CommandContext(ctx, "restic", "init", "--repo", repoPath, "--password-file", pwFile)
r.logger.Printf("[INFO] Initializing cross-drive restic repo at %s", repoPath)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("restic init failed: %v (%s)", err, strings.TrimSpace(string(out)))
}
return nil
}
// copyStackDBDumps copies DB dump files for the given stack from its home drive.
// DB dumps are at <drive>/backups/primary/<stack>/db-dumps/<stack>_<dbtype>.sql.
func (r *CrossDriveRunner) copyStackDBDumps(stackName, destDir string) error {
@@ -537,11 +440,11 @@ func (r *CrossDriveRunner) AutoEnableSmallApps() {
continue
}
// Find destination: first storage path that differs from the app's home drive
// Find destination: first active storage path that differs from the app's home drive
appDrive := r.getAppDrivePath(stack.Name)
var destPath string
for _, sp := range storagePaths {
if sp.Path != appDrive {
if sp.Path != appDrive && !sp.Disconnected && !sp.Decommissioned {
destPath = sp.Path
break
}