feat: drive migration & Tier 2 restic deprecation (v0.18.0)
Phase 1: Deprecate restic as Tier 2 method (rsync only), auto-migrate on startup Phase 2: Enhanced per-app migration with backup awareness, DB dump copy, auto-cleanup Phase 3: Full drive migration with decommissioned state, rollback support, wizard UI Phase 4: Hub report includes decommissioned drive state Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -43,6 +43,10 @@ type Manager struct {
|
||||
// AfterBackup is called after a backup completes to refresh the cache.
|
||||
// Set by main.go to avoid circular import with scheduler.
|
||||
AfterBackup func()
|
||||
|
||||
// MigrationActiveCheck returns true if a full drive migration is in progress.
|
||||
// Set by main.go to coordinate with DriveMigrator.
|
||||
MigrationActiveCheck func() bool
|
||||
}
|
||||
|
||||
// SnapshotRecord combines restic snapshot metadata with our run stats.
|
||||
@@ -243,12 +247,17 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
|
||||
for _, db := range dbs {
|
||||
drivePath := m.GetAppDrivePath(db.StackName)
|
||||
|
||||
// Skip if drive is disconnected
|
||||
// Skip if drive is disconnected or decommissioned
|
||||
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
|
||||
m.logger.Printf("[WARN] Skipping DB dump for %s — drive disconnected: %s", db.StackName, drivePath)
|
||||
summary = append(summary, fmt.Sprintf("SKIP %s (drive disconnected)", db.ContainerName))
|
||||
continue
|
||||
}
|
||||
if m.settings != nil && m.settings.IsDecommissioned(drivePath) {
|
||||
m.logger.Printf("[WARN] Skipping DB dump for %s — drive decommissioned: %s", db.StackName, drivePath)
|
||||
summary = append(summary, fmt.Sprintf("SKIP %s (drive decommissioned)", db.ContainerName))
|
||||
continue
|
||||
}
|
||||
|
||||
dumpDir := AppDBDumpPath(drivePath, db.StackName)
|
||||
|
||||
@@ -319,6 +328,12 @@ func (m *Manager) RunBackup(ctx context.Context) error {
|
||||
|
||||
// runBackupInternal is the implementation of RunBackup. Caller must hold the running flag.
|
||||
func (m *Manager) runBackupInternal(ctx context.Context) error {
|
||||
// Skip if a full drive migration is in progress
|
||||
if m.MigrationActiveCheck != nil && m.MigrationActiveCheck() {
|
||||
m.logger.Printf("[WARN] Skipping nightly backup — drive migration in progress")
|
||||
return nil
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
m.logger.Printf("[INFO] Starting restic backup (per-drive)")
|
||||
|
||||
@@ -339,68 +354,14 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
|
||||
driveCount := 0
|
||||
|
||||
for drivePath, stacks := range driveStacks {
|
||||
// Skip disconnected drives
|
||||
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
|
||||
m.logger.Printf("[WARN] Skipping backup for drive %s — disconnected", drivePath)
|
||||
continue
|
||||
}
|
||||
|
||||
repoPath := PrimaryResticRepoPath(drivePath)
|
||||
|
||||
// Ensure repo is initialized
|
||||
if err := m.restic.EnsureInitialized(repoPath); err != nil {
|
||||
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
|
||||
anyErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
// Build paths for this drive
|
||||
var paths []string
|
||||
paths = append(paths, infraPaths...)
|
||||
|
||||
for _, stack := range stacks {
|
||||
// App data (appdata/<stack>/)
|
||||
appData := AppDataDir(drivePath, stack.Name)
|
||||
if _, err := os.Stat(appData); err == nil {
|
||||
paths = append(paths, appData)
|
||||
}
|
||||
// HDD mounts (for apps with custom mount points)
|
||||
if m.stackProvider != nil {
|
||||
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
|
||||
if _, err := os.Stat(mount); err == nil {
|
||||
paths = append(paths, mount)
|
||||
}
|
||||
}
|
||||
}
|
||||
// DB dumps for this stack
|
||||
dumpDir := AppDBDumpPath(drivePath, stack.Name)
|
||||
if _, err := os.Stat(dumpDir); err == nil {
|
||||
paths = append(paths, dumpDir)
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate paths
|
||||
paths = dedup(paths)
|
||||
|
||||
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
|
||||
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
|
||||
|
||||
result, err := m.restic.Snapshot(repoPath, paths, tags)
|
||||
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
|
||||
if err != nil {
|
||||
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
|
||||
anyErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
lastResult = result
|
||||
driveCount++
|
||||
|
||||
// Prune check (weekly — Sunday)
|
||||
if shouldPrune(m.cfg.Backup.PruneSchedule) {
|
||||
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
|
||||
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
|
||||
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
|
||||
}
|
||||
if result != nil {
|
||||
lastResult = result
|
||||
driveCount++
|
||||
}
|
||||
}
|
||||
|
||||
@@ -463,6 +424,120 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
|
||||
return anyErr
|
||||
}
|
||||
|
||||
// backupDrive runs restic backup for a single drive. Returns nil result if skipped.
|
||||
// Caller must hold the running flag.
|
||||
func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []StackSummary, infraPaths []string) (*SnapshotResult, error) {
|
||||
// Skip disconnected or decommissioned drives
|
||||
if m.settings != nil && m.settings.IsDisconnected(drivePath) {
|
||||
m.logger.Printf("[WARN] Skipping backup for drive %s — disconnected", drivePath)
|
||||
return nil, nil
|
||||
}
|
||||
if m.settings != nil && m.settings.IsDecommissioned(drivePath) {
|
||||
m.logger.Printf("[WARN] Skipping backup for drive %s — decommissioned", drivePath)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
repoPath := PrimaryResticRepoPath(drivePath)
|
||||
|
||||
// Ensure repo is initialized
|
||||
if err := m.restic.EnsureInitialized(repoPath); err != nil {
|
||||
m.logger.Printf("[ERROR] Restic init failed for %s: %v", repoPath, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Build paths for this drive
|
||||
var paths []string
|
||||
paths = append(paths, infraPaths...)
|
||||
|
||||
for _, stack := range stacks {
|
||||
// App data (appdata/<stack>/)
|
||||
appData := AppDataDir(drivePath, stack.Name)
|
||||
if _, err := os.Stat(appData); err == nil {
|
||||
paths = append(paths, appData)
|
||||
}
|
||||
// HDD mounts (for apps with custom mount points)
|
||||
if m.stackProvider != nil {
|
||||
for _, mount := range m.stackProvider.GetStackHDDMounts(stack.Name) {
|
||||
if _, err := os.Stat(mount); err == nil {
|
||||
paths = append(paths, mount)
|
||||
}
|
||||
}
|
||||
}
|
||||
// DB dumps for this stack
|
||||
dumpDir := AppDBDumpPath(drivePath, stack.Name)
|
||||
if _, err := os.Stat(dumpDir); err == nil {
|
||||
paths = append(paths, dumpDir)
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate paths
|
||||
paths = dedup(paths)
|
||||
|
||||
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
|
||||
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
|
||||
|
||||
result, err := m.restic.Snapshot(repoPath, paths, tags)
|
||||
if err != nil {
|
||||
m.logger.Printf("[ERROR] Restic backup failed for drive %s: %v", drivePath, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Prune check (weekly — Sunday)
|
||||
if shouldPrune(m.cfg.Backup.PruneSchedule) {
|
||||
m.logger.Printf("[INFO] Running weekly prune for %s", repoPath)
|
||||
if err := m.restic.Prune(repoPath, m.cfg.Backup.Retention); err != nil {
|
||||
m.logger.Printf("[WARN] Restic prune failed for %s: %v", repoPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// tryAcquireRunning attempts to set the running flag without blocking.
|
||||
// Returns true if acquired, false if already running.
|
||||
func (m *Manager) tryAcquireRunning() bool {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if m.running {
|
||||
return false
|
||||
}
|
||||
m.running = true
|
||||
return true
|
||||
}
|
||||
|
||||
// TryRunDriveBackup runs a backup for a single drive if no other backup is in progress.
|
||||
// Returns error if the backup lock cannot be acquired or if backup fails.
|
||||
func (m *Manager) TryRunDriveBackup(ctx context.Context, drivePath string) error {
|
||||
if !m.tryAcquireRunning() {
|
||||
return fmt.Errorf("backup already in progress")
|
||||
}
|
||||
defer m.releaseRunning()
|
||||
|
||||
driveStacks := m.groupStacksByDrive()
|
||||
stacks, ok := driveStacks[drivePath]
|
||||
if !ok || len(stacks) == 0 {
|
||||
m.logger.Printf("[INFO] No deployed stacks on drive %s — skipping backup", drivePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
infraPaths := []string{
|
||||
m.cfg.Paths.StacksDir,
|
||||
"/opt/docker/felhom-controller/controller.yaml",
|
||||
}
|
||||
|
||||
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if result != nil {
|
||||
m.logger.Printf("[INFO] Single-drive backup for %s: snapshot %s, %d new, %d changed, %s added",
|
||||
drivePath, result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RunIntegrityCheck runs restic check on all primary repos and pings healthchecks.
|
||||
func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
|
||||
m.logger.Printf("[INFO] Starting restic integrity check")
|
||||
@@ -596,13 +671,12 @@ func (m *Manager) ListSnapshots(limit int) ([]SnapshotInfo, error) {
|
||||
return allSnapshots, nil
|
||||
}
|
||||
|
||||
// ListAllSnapshots returns snapshots from both primary and secondary restic repos.
|
||||
// Primary snapshots get Tier=1, secondary snapshots get Tier=2.
|
||||
// ListAllSnapshots returns snapshots from primary restic repos across all active drives.
|
||||
// All snapshots get Tier=1.
|
||||
func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
|
||||
drives := m.activeDrives()
|
||||
var allSnapshots []SnapshotInfo
|
||||
|
||||
// Tier 1: primary repos (same as ListSnapshots)
|
||||
for _, drive := range drives {
|
||||
repoPath := PrimaryResticRepoPath(drive)
|
||||
if !m.restic.RepoExists(repoPath) {
|
||||
@@ -620,32 +694,6 @@ func (m *Manager) ListAllSnapshots(limit int) ([]SnapshotInfo, error) {
|
||||
allSnapshots = append(allSnapshots, snapshots...)
|
||||
}
|
||||
|
||||
// Tier 2: secondary restic repos on cross-drive destinations
|
||||
if m.settings != nil {
|
||||
destPaths := make(map[string]bool)
|
||||
for _, cfg := range m.settings.GetAllCrossDriveConfigs() {
|
||||
if cfg != nil && cfg.Method == "restic" && cfg.DestinationPath != "" {
|
||||
destPaths[cfg.DestinationPath] = true
|
||||
}
|
||||
}
|
||||
for destPath := range destPaths {
|
||||
repoPath := SecondaryResticRepoPath(destPath)
|
||||
if !m.restic.RepoExists(repoPath) {
|
||||
continue
|
||||
}
|
||||
snapshots, err := m.restic.ListSnapshots(repoPath, 0)
|
||||
if err != nil {
|
||||
m.logger.Printf("[WARN] Could not list secondary snapshots from %s: %v", repoPath, err)
|
||||
continue
|
||||
}
|
||||
for i := range snapshots {
|
||||
snapshots[i].RepoPath = repoPath
|
||||
snapshots[i].Tier = 2
|
||||
}
|
||||
allSnapshots = append(allSnapshots, snapshots...)
|
||||
}
|
||||
}
|
||||
|
||||
// Sort newest first
|
||||
sort.Slice(allSnapshots, func(i, j int) bool {
|
||||
return allSnapshots[i].Time.After(allSnapshots[j].Time)
|
||||
|
||||
Reference in New Issue
Block a user