package backup import ( "context" "fmt" "os" "os/exec" "strings" "time" ) // RestoreApp restores an app's data from its on-disk app-data backup. // // Disk-tier (restic snapshot) restore has moved to the host agent. This keep-side // restore re-imports the Docker-volume tar dumps that the app-data backup produced // (AppVolumeDumpPath) and relies on the DB dumps already present on the app's drive. // The stack is stopped before the volume import and restarted after. // // snapshotID is retained for API/UI signature compatibility; with restic removed it // is only used for logging (the source of truth is now the on-disk volume tars). func (m *Manager) RestoreApp(stackName, snapshotID string) error { if m.stackProvider == nil { return fmt.Errorf("stack provider not configured") } if m.isDebug() { m.logger.Printf("[DEBUG] RestoreApp: stack=%s, snapshotID=%s", stackName, snapshotID) } // Prevent concurrent operations m.mu.Lock() if m.running { m.mu.Unlock() return fmt.Errorf("backup or restore already in progress") } m.running = true m.mu.Unlock() defer func() { m.mu.Lock() m.running = false m.mu.Unlock() }() drivePath := m.GetAppDrivePath(stackName) if drivePath == "" { return fmt.Errorf("cannot determine drive path for %s", stackName) } m.logger.Printf("[INFO] [backup] Starting app-data restore for %s (drive=%s)", stackName, drivePath) // Stop the app before restore if m.isDebug() { m.logger.Printf("[DEBUG] RestoreApp: step 1/3 — stopping app %s", stackName) } if err := m.stackProvider.StopStack(stackName); err != nil { m.logger.Printf("[WARN] RESTORE could not stop %s: %v (proceeding anyway)", stackName, err) } // Populate Docker volumes from restored tars if m.isDebug() { m.logger.Printf("[DEBUG] RestoreApp: step 2/3 — restoring Docker volumes for %s", stackName) } if err := m.restoreDockerVolumes(stackName, drivePath); err != nil { m.logger.Printf("[WARN] RESTORE volume restore failed for %s: %v (continuing)", stackName, err) } // Restart the app if m.isDebug() { m.logger.Printf("[DEBUG] RestoreApp: step 3/3 — restarting app %s after restore", stackName) } if err := m.stackProvider.StartStack(stackName); err != nil { m.logger.Printf("[WARN] RESTORE could not restart %s after restore: %v", stackName, err) } // Verify app started successfully if err := m.waitForHealthy(stackName, 90*time.Second); err != nil { m.logger.Printf("[WARN] [backup] Restore completed but app health check failed: %v", err) } m.logger.Printf("[INFO] RESTORE completed: stack=%s", stackName) return nil } // restoreDockerVolumes populates Docker volumes from tar files in the volume dump directory. func (m *Manager) restoreDockerVolumes(stackName, drivePath string) error { dumpDir := AppVolumeDumpPath(m.namespaceRoot(drivePath), stackName) entries, err := os.ReadDir(dumpDir) if err != nil { if os.IsNotExist(err) { return nil // No volume dumps to restore } return fmt.Errorf("reading volume dump dir: %w", err) } var restored int for _, entry := range entries { if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".tar") { continue } volName := strings.TrimSuffix(entry.Name(), ".tar") m.logger.Printf("[INFO] [backup] Restoring Docker volume %s for %s", volName, stackName) // Remove existing volume (ignore errors — may not exist) exec.Command("docker", "volume", "rm", "-f", volName).Run() // Create fresh volume if out, err := exec.Command("docker", "volume", "create", volName).CombinedOutput(); err != nil { m.logger.Printf("[WARN] [backup] Failed to create volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err) continue } // Populate from tar ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) cmd := exec.CommandContext(ctx, "docker", "run", "--rm", "-v", volName+":/vol", "-v", dumpDir+":/in:ro", "alpine", "tar", "xf", "/in/"+entry.Name(), "-C", "/vol") out, err := cmd.CombinedOutput() cancel() if err != nil { m.logger.Printf("[WARN] [backup] Failed to populate volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err) continue } restored++ if m.isDebug() { m.logger.Printf("[DEBUG] [backup] Volume %s restored successfully", volName) } } if restored > 0 { m.logger.Printf("[INFO] [backup] Restored %d Docker volume(s) for %s", restored, stackName) } return nil } // waitForHealthy waits for a stack to reach running state after restore. // Forces a docker ps refresh on each poll to avoid stale state. func (m *Manager) waitForHealthy(stackName string, timeout time.Duration) error { deadline := time.Now().Add(timeout) interval := 5 * time.Second time.Sleep(3 * time.Second) // initial settling time for time.Now().Before(deadline) { if m.stackProvider == nil { return fmt.Errorf("no stack provider") } if m.stackProvider.RefreshAndIsRunning(stackName) { if m.isDebug() { m.logger.Printf("[DEBUG] [backup] Post-restore health check: %s is running", stackName) } return nil } if m.isDebug() { m.logger.Printf("[DEBUG] [backup] Post-restore health check: %s not yet running, waiting...", stackName) } time.Sleep(interval) } return fmt.Errorf("stack %s did not reach running state within %s after restore", stackName, timeout) }