feat: drive migration & Tier 2 restic deprecation (v0.18.0)

Phase 1: Deprecate restic as Tier 2 method (rsync only), auto-migrate on startup
Phase 2: Enhanced per-app migration with backup awareness, DB dump copy, auto-cleanup
Phase 3: Full drive migration with decommissioned state, rollback support, wizard UI
Phase 4: Hub report includes decommissioned drive state

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 21:49:14 +01:00
parent bdbe170a54
commit 99bf3ca7a8
22 changed files with 1725 additions and 402 deletions
+136 -88
View File
@@ -43,6 +43,10 @@ type Manager struct {
// AfterBackup is called after a backup completes to refresh the cache.
// Set by main.go to avoid circular import with scheduler.
AfterBackup func()
// MigrationActiveCheck returns true if a full drive migration is in progress.
// Set by main.go to coordinate with DriveMigrator.
MigrationActiveCheck func() bool
}
// SnapshotRecord combines restic snapshot metadata with our run stats.
@@ -243,12 +247,17 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
for _, db := range dbs {
drivePath := m.GetAppDrivePath(db.StackName)
// Skip if drive is disconnected
// Skip if drive is disconnected or decommissioned
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
m.logger.Printf("[WARN] Skipping DB dump for %s — drive disconnected: %s", db.StackName, drivePath)
summary = append(summary, fmt.Sprintf("SKIP %s (drive disconnected)", db.ContainerName))
continue
}
if m.settings != nil && m.settings.IsDecommissioned(drivePath) {
m.logger.Printf("[WARN] Skipping DB dump for %s — drive decommissioned: %s", db.StackName, drivePath)
summary = append(summary, fmt.Sprintf("SKIP %s (drive decommissioned)", db.ContainerName))
continue
}
dumpDir := AppDBDumpPath(drivePath, db.StackName)
@@ -319,6 +328,12 @@ func (m *Manager) RunBackup(ctx context.Context) error {
// runBackupInternal is the implementation of RunBackup. Caller must hold the running flag.
func (m *Manager) runBackupInternal(ctx context.Context) error {
// Skip if a full drive migration is in progress
if m.MigrationActiveCheck != nil && m.MigrationActiveCheck() {
m.logger.Printf("[WARN] Skipping nightly backup — drive migration in progress")
return nil
}
start := time.Now()
m.logger.Printf("[INFO] Starting restic backup (per-drive)")
@@ -339,68 +354,14 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
driveCount := 0
for drivePath, stacks := range driveStacks {
// Skip disconnected drives
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
m.logger.Printf("[WARN] Skipping backup for drive %s — disconnected", drivePath)
continue
}
repoPath := PrimaryResticRepoPath(drivePath)
// Ensure repo is initialized
if err := m.restic.EnsureInitialized(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
anyErr = err
continue
}
// Build paths for this drive
var paths []string
paths = append(paths, infraPaths...)
for _, stack := range stacks {
// App data (appdata/<stack>/)
appData := AppDataDir(drivePath, stack.Name)
if _, err := os.Stat(appData); err == nil {
paths = append(paths, appData)
}
// HDD mounts (for apps with custom mount points)
if m.stackProvider != nil {
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
if _, err := os.Stat(mount); err == nil {
paths = append(paths, mount)
}
}
}
// DB dumps for this stack
dumpDir := AppDBDumpPath(drivePath, stack.Name)
if _, err := os.Stat(dumpDir); err == nil {
paths = append(paths, dumpDir)
}
}
// Deduplicate paths
paths = dedup(paths)
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
result, err := m.restic.Snapshot(repoPath, paths, tags)
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
if err != nil {
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
anyErr = err
continue
}
lastResult = result
driveCount++
// Prune check (weekly — Sunday)
if shouldPrune(m.cfg.Backup.PruneSchedule) {
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
}
if result != nil {
lastResult = result
driveCount++
}
}
@@ -463,6 +424,120 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
return anyErr
}
// backupDrive runs restic backup for a single drive. Returns nil result if skipped.
// Caller must hold the running flag.
func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []StackSummary, infraPaths []string) (*SnapshotResult, error) {
// Skip disconnected or decommissioned drives
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
m.logger.Printf("[WARN] Skipping backup for drive %s — disconnected", drivePath)
return nil, nil
}
if m.settings != nil && m.settings.IsDecommissioned(drivePath) {
m.logger.Printf("[WARN] Skipping backup for drive %s — decommissioned", drivePath)
return nil, nil
}
repoPath := PrimaryResticRepoPath(drivePath)
// Ensure repo is initialized
if err := m.restic.EnsureInitialized(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
return nil, err
}
// Build paths for this drive
var paths []string
paths = append(paths, infraPaths...)
for _, stack := range stacks {
// App data (appdata/<stack>/)
appData := AppDataDir(drivePath, stack.Name)
if _, err := os.Stat(appData); err == nil {
paths = append(paths, appData)
}
// HDD mounts (for apps with custom mount points)
if m.stackProvider != nil {
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
if _, err := os.Stat(mount); err == nil {
paths = append(paths, mount)
}
}
}
// DB dumps for this stack
dumpDir := AppDBDumpPath(drivePath, stack.Name)
if _, err := os.Stat(dumpDir); err == nil {
paths = append(paths, dumpDir)
}
}
// Deduplicate paths
paths = dedup(paths)
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
result, err := m.restic.Snapshot(repoPath, paths, tags)
if err != nil {
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
return nil, err
}
// Prune check (weekly — Sunday)
if shouldPrune(m.cfg.Backup.PruneSchedule) {
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
}
}
return result, nil
}
// tryAcquireRunning attempts to set the running flag without blocking.
// Returns true if acquired, false if already running.
func (m *Manager) tryAcquireRunning() bool {
m.mu.Lock()
defer m.mu.Unlock()
if m.running {
return false
}
m.running = true
return true
}
// TryRunDriveBackup runs a backup for a single drive if no other backup is in progress.
// Returns error if the backup lock cannot be acquired or if backup fails.
func (m *Manager) TryRunDriveBackup(ctx context.Context, drivePath string) error {
if !m.tryAcquireRunning() {
return fmt.Errorf("backup already in progress")
}
defer m.releaseRunning()
driveStacks := m.groupStacksByDrive()
stacks, ok := driveStacks[drivePath]
if !ok || len(stacks) == 0 {
m.logger.Printf("[INFO] No deployed stacks on drive %s — skipping backup", drivePath)
return nil
}
infraPaths := []string{
m.cfg.Paths.StacksDir,
"/opt/docker/felhom-controller/controller.yaml",
}
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
if err != nil {
return err
}
if result != nil {
m.logger.Printf("[INFO] Single-drive backup for %s: snapshot %s, %d new, %d changed, %s added",
drivePath, result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded)
}
return nil
}
// RunIntegrityCheck runs restic check on all primary repos and pings healthchecks.
func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
m.logger.Printf("[INFO] Starting restic integrity check")
@@ -596,13 +671,12 @@ func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) {
return allSnapshots, nil
}
// ListAllSnapshots returns snapshots from both primary and secondary restic repos.
// Primary snapshots get Tier=1, secondary snapshots get Tier=2.
// ListAllSnapshots returns snapshots from primary restic repos across all active drives.
// All snapshots get Tier=1.
func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
drives := m.activeDrives()
var allSnapshots []SnapshotInfo
// Tier 1: primary repos (same as ListSnapshots)
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
@@ -620,32 +694,6 @@ func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
allSnapshots = append(allSnapshots, snapshots...)
}
// Tier 2: secondary restic repos on cross-drive destinations
if m.settings != nil {
destPaths := make(map[string]bool)
for _, cfg := range m.settings.GetAllCrossDriveConfigs() {
if cfg != nil && cfg.Method == "restic" && cfg.DestinationPath != "" {
destPaths[cfg.DestinationPath] = true
}
}
for destPath := range destPaths {
repoPath := SecondaryResticRepoPath(destPath)
if !m.restic.RepoExists(repoPath) {
continue
}
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
if err != nil {
m.logger.Printf("[WARN] Could not list secondary snapshots from %s: %v", repoPath, err)
continue
}
for i := range snapshots {
snapshots[i].RepoPath = repoPath
snapshots[i].Tier = 2
}
allSnapshots = append(allSnapshots, snapshots...)
}
}
// Sort newest first
sort.Slice(allSnapshots, func(i, j int) bool {
return allSnapshots[i].Time.After(allSnapshots[j].Time)