fb11c3b75a
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
443 lines
15 KiB
Go
443 lines
15 KiB
Go
package backup
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// snapshotIDRe validates restic snapshot IDs: 8-64 lowercase hex characters.
|
|
var snapshotIDRe = regexp.MustCompile(`^[0-9a-f]{8,64}$`)
|
|
|
|
// RestoreApp restores an app from a restic snapshot.
|
|
// All apps get config + DB dump restored. Apps with HDD data also get user data restored.
|
|
func (m *Manager) RestoreApp(stackName, snapshotID string) error {
|
|
if m.stackProvider == nil {
|
|
return fmt.Errorf("stack provider not configured")
|
|
}
|
|
|
|
// Validate snapshot ID format
|
|
if !snapshotIDRe.MatchString(snapshotID) {
|
|
return fmt.Errorf("invalid snapshot ID: must be 8-64 lowercase hex characters")
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: stack=%s, snapshotID=%s", stackName, snapshotID)
|
|
}
|
|
|
|
// Prevent concurrent operations
|
|
m.mu.Lock()
|
|
if m.running {
|
|
m.mu.Unlock()
|
|
return fmt.Errorf("backup or restore already in progress")
|
|
}
|
|
m.running = true
|
|
m.mu.Unlock()
|
|
defer func() {
|
|
m.mu.Lock()
|
|
m.running = false
|
|
m.mu.Unlock()
|
|
}()
|
|
|
|
// Determine what to restore
|
|
hddMounts := m.stackProvider.GetStackHDDMounts(stackName)
|
|
hasHDD := len(hddMounts) > 0
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: %s has %d HDD mount(s), hasHDD=%v", stackName, len(hddMounts), hasHDD)
|
|
}
|
|
|
|
// Build list of paths to restore from the snapshot
|
|
var restorePaths []string
|
|
|
|
// Always restore the stack's config dir (compose + app.yaml + .felhom.yml)
|
|
composePath, ok := m.stackProvider.GetStackComposePath(stackName)
|
|
if ok {
|
|
stackDir := filepath.Dir(composePath)
|
|
restorePaths = append(restorePaths, stackDir)
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: will restore config dir: %s", stackDir)
|
|
}
|
|
}
|
|
|
|
// Restore DB dump files for this stack (per-drive path)
|
|
drivePath := m.GetAppDrivePath(stackName)
|
|
dumpDir := AppDBDumpPath(drivePath, stackName)
|
|
restorePaths = append(restorePaths, dumpDir)
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: will restore DB dump dir: %s", dumpDir)
|
|
}
|
|
|
|
// Restore HDD data (always included for apps that have it — backup is mandatory)
|
|
if hasHDD {
|
|
restorePaths = append(restorePaths, hddMounts...)
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: will restore HDD data: %v", hddMounts)
|
|
}
|
|
}
|
|
|
|
// Restore Docker volume dumps (if present in snapshot)
|
|
volDumpDir := AppVolumeDumpPath(drivePath, stackName)
|
|
restorePaths = append(restorePaths, volDumpDir)
|
|
|
|
if len(restorePaths) == 0 {
|
|
return fmt.Errorf("no restorable paths found for %s", stackName)
|
|
}
|
|
|
|
// Use the app's primary restic repo
|
|
repoPath := PrimaryResticRepoPath(drivePath)
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: using repo=%s, %d restore path(s)", repoPath, len(restorePaths))
|
|
}
|
|
|
|
m.logger.Printf("[INFO] [backup] Starting restore for %s (snapshot=%s, repo=%s, paths=%v, hasHDD=%v)",
|
|
stackName, snapshotID, repoPath, restorePaths, hasHDD)
|
|
|
|
// Stop the app before restore
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: step 1/4 — stopping app %s", stackName)
|
|
}
|
|
if err := m.stackProvider.StopStack(stackName); err != nil {
|
|
m.logger.Printf("[WARN] RESTORE could not stop %s: %v (proceeding anyway)", stackName, err)
|
|
}
|
|
|
|
// Execute restore via restic
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: step 2/4 — restoring data from snapshot %s", snapshotID)
|
|
}
|
|
if err := m.restic.RestoreAppData(repoPath, snapshotID, restorePaths); err != nil {
|
|
m.logger.Printf("[ERROR] RESTORE failed for %s: %v", stackName, err)
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: step 3/4 — restarting app %s after failure", stackName)
|
|
}
|
|
if startErr := m.stackProvider.StartStack(stackName); startErr != nil {
|
|
m.logger.Printf("[WARN] RESTORE could not restart %s after failure: %v", stackName, startErr)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Populate Docker volumes from restored tars
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: step 3/5 — restoring Docker volumes for %s", stackName)
|
|
}
|
|
if err := m.restoreDockerVolumes(stackName, drivePath); err != nil {
|
|
m.logger.Printf("[WARN] RESTORE volume restore failed for %s: %v (continuing)", stackName, err)
|
|
}
|
|
|
|
// Restart the app
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: step 4/5 — restarting app %s after successful restore", stackName)
|
|
}
|
|
if err := m.stackProvider.StartStack(stackName); err != nil {
|
|
m.logger.Printf("[WARN] RESTORE could not restart %s after restore: %v", stackName, err)
|
|
}
|
|
|
|
// Verify app started successfully
|
|
if err := m.waitForHealthy(stackName, 90*time.Second); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Restore completed but app health check failed: %v", err)
|
|
}
|
|
|
|
hasVolumes := len(m.stackProvider.GetDockerVolumes(stackName)) > 0
|
|
restoreType := "config+DB"
|
|
if hasHDD || hasVolumes {
|
|
restoreType = "full (config+DB+userdata)"
|
|
}
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: step 5/5 — restore completed, type=%s", restoreType)
|
|
}
|
|
m.logger.Printf("[INFO] RESTORE completed: stack=%s, snapshot=%s, type=%s", stackName, snapshotID, restoreType)
|
|
return nil
|
|
}
|
|
|
|
// RestoreAppFromTier2 restores an app from its cross-drive rsync backup mirror.
|
|
func (m *Manager) RestoreAppFromTier2(stackName string) error {
|
|
if m.stackProvider == nil {
|
|
return fmt.Errorf("stack provider not configured")
|
|
}
|
|
if m.settings == nil {
|
|
return fmt.Errorf("settings not available")
|
|
}
|
|
|
|
cdCfg := m.settings.GetCrossDriveConfig(stackName)
|
|
if cdCfg == nil || !cdCfg.Enabled {
|
|
return fmt.Errorf("cross-drive backup not configured for %s", stackName)
|
|
}
|
|
|
|
rsyncDir := AppSecondaryRsyncPath(cdCfg.DestinationPath, stackName)
|
|
if _, err := os.Stat(rsyncDir); os.IsNotExist(err) {
|
|
return fmt.Errorf("Tier 2 backup directory not found: %s", rsyncDir)
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreAppFromTier2: stack=%s, rsyncDir=%s", stackName, rsyncDir)
|
|
}
|
|
|
|
// Prevent concurrent operations
|
|
m.mu.Lock()
|
|
if m.running {
|
|
m.mu.Unlock()
|
|
return fmt.Errorf("backup or restore already in progress")
|
|
}
|
|
m.running = true
|
|
m.mu.Unlock()
|
|
defer func() {
|
|
m.mu.Lock()
|
|
m.running = false
|
|
m.mu.Unlock()
|
|
}()
|
|
|
|
hddMounts := m.stackProvider.GetStackHDDMounts(stackName)
|
|
hasHDD := len(hddMounts) > 0
|
|
drivePath := m.GetAppDrivePath(stackName)
|
|
|
|
m.logger.Printf("[INFO] [backup] Starting Tier 2 restore for %s from %s", stackName, rsyncDir)
|
|
|
|
// Step 1: Stop the app
|
|
if err := m.stackProvider.StopStack(stackName); err != nil {
|
|
m.logger.Printf("[WARN] RESTORE could not stop %s: %v (proceeding anyway)", stackName, err)
|
|
}
|
|
|
|
// Step 2: Restore config from _config/
|
|
configSrc := filepath.Join(rsyncDir, "_config") + "/"
|
|
if _, err := os.Stat(filepath.Join(rsyncDir, "_config")); err == nil {
|
|
if composePath, ok := m.stackProvider.GetStackComposePath(stackName); ok {
|
|
configDst := filepath.Dir(composePath) + "/"
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreAppFromTier2: rsync config %s → %s", configSrc, configDst)
|
|
}
|
|
cmd := exec.Command("rsync", "-a", "--delete", configSrc, configDst)
|
|
if out, err := cmd.CombinedOutput(); err != nil {
|
|
m.logger.Printf("[ERROR] [backup] Tier 2 config restore failed for %s: %v (%s)", stackName, err, strings.TrimSpace(string(out)))
|
|
// Try to restart and return error
|
|
m.stackProvider.StartStack(stackName)
|
|
return fmt.Errorf("config restore failed: %w", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 3: Restore HDD data
|
|
if hasHDD {
|
|
// Check for data directory structure — single mount vs multi-mount
|
|
if len(hddMounts) == 1 {
|
|
// Single mount: data is directly in rsyncDir (excluding _* dirs)
|
|
src := strings.TrimRight(rsyncDir, "/") + "/"
|
|
dst := strings.TrimRight(hddMounts[0], "/") + "/"
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreAppFromTier2: rsync HDD data %s → %s", src, dst)
|
|
}
|
|
cmd := exec.Command("rsync", "-a", "--delete",
|
|
"--exclude", "_*",
|
|
src, dst)
|
|
if out, err := cmd.CombinedOutput(); err != nil {
|
|
m.logger.Printf("[ERROR] [backup] Tier 2 HDD data restore failed for %s: %v (%s)", stackName, err, strings.TrimSpace(string(out)))
|
|
m.stackProvider.StartStack(stackName)
|
|
return fmt.Errorf("HDD data restore failed: %w", err)
|
|
}
|
|
} else {
|
|
// Multiple mounts: each has a subdirectory named by leaf
|
|
for _, mount := range hddMounts {
|
|
leaf := filepath.Base(mount)
|
|
src := filepath.Join(rsyncDir, leaf) + "/"
|
|
dst := strings.TrimRight(mount, "/") + "/"
|
|
if _, err := os.Stat(filepath.Join(rsyncDir, leaf)); os.IsNotExist(err) {
|
|
m.logger.Printf("[WARN] [backup] Tier 2 restore: no backup data for mount %s", mount)
|
|
continue
|
|
}
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreAppFromTier2: rsync HDD mount %s → %s", src, dst)
|
|
}
|
|
cmd := exec.Command("rsync", "-a", "--delete", src, dst)
|
|
if out, err := cmd.CombinedOutput(); err != nil {
|
|
m.logger.Printf("[ERROR] [backup] Tier 2 HDD restore failed for mount %s: %v (%s)", mount, err, strings.TrimSpace(string(out)))
|
|
m.stackProvider.StartStack(stackName)
|
|
return fmt.Errorf("HDD restore failed for %s: %w", mount, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 4: Restore DB dumps from _db/
|
|
dbSrc := filepath.Join(rsyncDir, "_db")
|
|
if _, err := os.Stat(dbSrc); err == nil {
|
|
dbDst := AppDBDumpPath(drivePath, stackName)
|
|
if err := os.MkdirAll(dbDst, 0755); err == nil {
|
|
entries, _ := os.ReadDir(dbSrc)
|
|
for _, e := range entries {
|
|
if !e.IsDir() {
|
|
src := filepath.Join(dbSrc, e.Name())
|
|
dst := filepath.Join(dbDst, e.Name())
|
|
if err := copyFile(src, dst); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Failed to copy DB dump %s: %v", e.Name(), err)
|
|
}
|
|
}
|
|
}
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreAppFromTier2: restored DB dumps from %s", dbSrc)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 5: Restore Docker volumes from _volumes/
|
|
volSrc := filepath.Join(rsyncDir, "_volumes")
|
|
if _, err := os.Stat(volSrc); err == nil {
|
|
if err := m.restoreDockerVolumesFromDir(stackName, volSrc); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Tier 2 volume restore failed for %s: %v (continuing)", stackName, err)
|
|
}
|
|
}
|
|
|
|
// Step 6: Restart the app
|
|
if err := m.stackProvider.StartStack(stackName); err != nil {
|
|
m.logger.Printf("[WARN] RESTORE could not restart %s after Tier 2 restore: %v", stackName, err)
|
|
}
|
|
|
|
// Verify app started successfully
|
|
if err := m.waitForHealthy(stackName, 90*time.Second); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Tier 2 restore completed but app health check failed: %v", err)
|
|
}
|
|
|
|
hasVolumes := len(m.stackProvider.GetDockerVolumes(stackName)) > 0
|
|
restoreType := "config+DB"
|
|
if hasHDD || hasVolumes {
|
|
restoreType = "full (config+DB+userdata)"
|
|
}
|
|
m.logger.Printf("[INFO] RESTORE (Tier 2) completed: stack=%s, type=%s", stackName, restoreType)
|
|
return nil
|
|
}
|
|
|
|
// restoreDockerVolumesFromDir populates Docker volumes from tar files in an arbitrary directory.
|
|
// Used by Tier 2 restore where volume tars are in the rsync mirror's _volumes/ dir.
|
|
func (m *Manager) restoreDockerVolumesFromDir(stackName, dumpDir string) error {
|
|
entries, err := os.ReadDir(dumpDir)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("reading volume dump dir: %w", err)
|
|
}
|
|
|
|
var restored int
|
|
for _, entry := range entries {
|
|
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".tar") {
|
|
continue
|
|
}
|
|
volName := strings.TrimSuffix(entry.Name(), ".tar")
|
|
|
|
m.logger.Printf("[INFO] [backup] Restoring Docker volume %s for %s (Tier 2)", volName, stackName)
|
|
|
|
exec.Command("docker", "volume", "rm", "-f", volName).Run()
|
|
|
|
if out, err := exec.Command("docker", "volume", "create", volName).CombinedOutput(); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Failed to create volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err)
|
|
continue
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
|
cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
|
|
"-v", volName+":/vol",
|
|
"-v", dumpDir+":/in:ro",
|
|
"alpine", "tar", "xf", "/in/"+entry.Name(), "-C", "/vol")
|
|
out, err := cmd.CombinedOutput()
|
|
cancel()
|
|
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] [backup] Failed to populate volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err)
|
|
continue
|
|
}
|
|
|
|
restored++
|
|
}
|
|
|
|
if restored > 0 {
|
|
m.logger.Printf("[INFO] [backup] Restored %d Docker volume(s) for %s (Tier 2)", restored, stackName)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// restoreDockerVolumes populates Docker volumes from tar files in the volume dump directory.
|
|
func (m *Manager) restoreDockerVolumes(stackName, drivePath string) error {
|
|
dumpDir := AppVolumeDumpPath(drivePath, stackName)
|
|
entries, err := os.ReadDir(dumpDir)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil // No volume dumps to restore
|
|
}
|
|
return fmt.Errorf("reading volume dump dir: %w", err)
|
|
}
|
|
|
|
var restored int
|
|
for _, entry := range entries {
|
|
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".tar") {
|
|
continue
|
|
}
|
|
volName := strings.TrimSuffix(entry.Name(), ".tar")
|
|
|
|
m.logger.Printf("[INFO] [backup] Restoring Docker volume %s for %s", volName, stackName)
|
|
|
|
// Remove existing volume (ignore errors — may not exist)
|
|
exec.Command("docker", "volume", "rm", "-f", volName).Run()
|
|
|
|
// Create fresh volume
|
|
if out, err := exec.Command("docker", "volume", "create", volName).CombinedOutput(); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Failed to create volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err)
|
|
continue
|
|
}
|
|
|
|
// Populate from tar
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
|
cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
|
|
"-v", volName+":/vol",
|
|
"-v", dumpDir+":/in:ro",
|
|
"alpine", "tar", "xf", "/in/"+entry.Name(), "-C", "/vol")
|
|
out, err := cmd.CombinedOutput()
|
|
cancel()
|
|
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] [backup] Failed to populate volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err)
|
|
continue
|
|
}
|
|
|
|
restored++
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [backup] Volume %s restored successfully", volName)
|
|
}
|
|
}
|
|
|
|
if restored > 0 {
|
|
m.logger.Printf("[INFO] [backup] Restored %d Docker volume(s) for %s", restored, stackName)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// waitForHealthy waits for a stack to reach running state after restore.
|
|
// Forces a docker ps refresh on each poll to avoid stale state.
|
|
// Acceptable overhead for a rare operation (restore).
|
|
func (m *Manager) waitForHealthy(stackName string, timeout time.Duration) error {
|
|
deadline := time.Now().Add(timeout)
|
|
interval := 5 * time.Second
|
|
|
|
time.Sleep(3 * time.Second) // initial settling time
|
|
|
|
for time.Now().Before(deadline) {
|
|
if m.stackProvider == nil {
|
|
return fmt.Errorf("no stack provider")
|
|
}
|
|
if m.stackProvider.RefreshAndIsRunning(stackName) {
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [backup] Post-restore health check: %s is running", stackName)
|
|
}
|
|
return nil
|
|
}
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [backup] Post-restore health check: %s not yet running, waiting...", stackName)
|
|
}
|
|
time.Sleep(interval)
|
|
}
|
|
return fmt.Errorf("stack %s did not reach running state within %s after restore", stackName, timeout)
|
|
}
|