abe4e8e619
Retired (~12.3k LOC): internal/storage/* (scan/format/attach/migrate/safety), backup restic/crossdrive/restore_drives/disk_layout/local_infra/restore_scan/ paths + restore_app, report/infra_backup*/infra_pull, setup/scanner, monitor/watchdog+pinger, web/storage_handlers+handler_restore. Surgically split backup.Manager to app-data only (DB dumps + volume tars + app restore; dropped restic + cross-drive + snapshot history). Fixed router/main/web wiring. Added agent-backed disk API (web/agent_disk_handlers.go): /api/disks list/ assign/eject/format proxying agentapi; data-bearing format refusal -> HTTP 409 'operator authorization required'. report/config_pull.go keeps the setup fresh-install config download. go build + go test green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
164 lines
5.2 KiB
Go
164 lines
5.2 KiB
Go
package backup
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// RestoreApp restores an app's data from its on-disk app-data backup.
|
|
//
|
|
// Disk-tier (restic snapshot) restore has moved to the host agent. This keep-side
|
|
// restore re-imports the Docker-volume tar dumps that the app-data backup produced
|
|
// (AppVolumeDumpPath) and relies on the DB dumps already present on the app's drive.
|
|
// The stack is stopped before the volume import and restarted after.
|
|
//
|
|
// snapshotID is retained for API/UI signature compatibility; with restic removed it
|
|
// is only used for logging (the source of truth is now the on-disk volume tars).
|
|
func (m *Manager) RestoreApp(stackName, snapshotID string) error {
|
|
if m.stackProvider == nil {
|
|
return fmt.Errorf("stack provider not configured")
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: stack=%s, snapshotID=%s", stackName, snapshotID)
|
|
}
|
|
|
|
// Prevent concurrent operations
|
|
m.mu.Lock()
|
|
if m.running {
|
|
m.mu.Unlock()
|
|
return fmt.Errorf("backup or restore already in progress")
|
|
}
|
|
m.running = true
|
|
m.mu.Unlock()
|
|
defer func() {
|
|
m.mu.Lock()
|
|
m.running = false
|
|
m.mu.Unlock()
|
|
}()
|
|
|
|
drivePath := m.GetAppDrivePath(stackName)
|
|
if drivePath == "" {
|
|
return fmt.Errorf("cannot determine drive path for %s", stackName)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] [backup] Starting app-data restore for %s (drive=%s)", stackName, drivePath)
|
|
|
|
// Stop the app before restore
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: step 1/3 — stopping app %s", stackName)
|
|
}
|
|
if err := m.stackProvider.StopStack(stackName); err != nil {
|
|
m.logger.Printf("[WARN] RESTORE could not stop %s: %v (proceeding anyway)", stackName, err)
|
|
}
|
|
|
|
// Populate Docker volumes from restored tars
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: step 2/3 — restoring Docker volumes for %s", stackName)
|
|
}
|
|
if err := m.restoreDockerVolumes(stackName, drivePath); err != nil {
|
|
m.logger.Printf("[WARN] RESTORE volume restore failed for %s: %v (continuing)", stackName, err)
|
|
}
|
|
|
|
// Restart the app
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] RestoreApp: step 3/3 — restarting app %s after restore", stackName)
|
|
}
|
|
if err := m.stackProvider.StartStack(stackName); err != nil {
|
|
m.logger.Printf("[WARN] RESTORE could not restart %s after restore: %v", stackName, err)
|
|
}
|
|
|
|
// Verify app started successfully
|
|
if err := m.waitForHealthy(stackName, 90*time.Second); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Restore completed but app health check failed: %v", err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] RESTORE completed: stack=%s", stackName)
|
|
return nil
|
|
}
|
|
|
|
// restoreDockerVolumes populates Docker volumes from tar files in the volume dump directory.
|
|
func (m *Manager) restoreDockerVolumes(stackName, drivePath string) error {
|
|
dumpDir := AppVolumeDumpPath(drivePath, stackName)
|
|
entries, err := os.ReadDir(dumpDir)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil // No volume dumps to restore
|
|
}
|
|
return fmt.Errorf("reading volume dump dir: %w", err)
|
|
}
|
|
|
|
var restored int
|
|
for _, entry := range entries {
|
|
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".tar") {
|
|
continue
|
|
}
|
|
volName := strings.TrimSuffix(entry.Name(), ".tar")
|
|
|
|
m.logger.Printf("[INFO] [backup] Restoring Docker volume %s for %s", volName, stackName)
|
|
|
|
// Remove existing volume (ignore errors — may not exist)
|
|
exec.Command("docker", "volume", "rm", "-f", volName).Run()
|
|
|
|
// Create fresh volume
|
|
if out, err := exec.Command("docker", "volume", "create", volName).CombinedOutput(); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Failed to create volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err)
|
|
continue
|
|
}
|
|
|
|
// Populate from tar
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
|
cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
|
|
"-v", volName+":/vol",
|
|
"-v", dumpDir+":/in:ro",
|
|
"alpine", "tar", "xf", "/in/"+entry.Name(), "-C", "/vol")
|
|
out, err := cmd.CombinedOutput()
|
|
cancel()
|
|
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] [backup] Failed to populate volume %s: %s — %v", volName, strings.TrimSpace(string(out)), err)
|
|
continue
|
|
}
|
|
|
|
restored++
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [backup] Volume %s restored successfully", volName)
|
|
}
|
|
}
|
|
|
|
if restored > 0 {
|
|
m.logger.Printf("[INFO] [backup] Restored %d Docker volume(s) for %s", restored, stackName)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// waitForHealthy waits for a stack to reach running state after restore.
|
|
// Forces a docker ps refresh on each poll to avoid stale state.
|
|
func (m *Manager) waitForHealthy(stackName string, timeout time.Duration) error {
|
|
deadline := time.Now().Add(timeout)
|
|
interval := 5 * time.Second
|
|
|
|
time.Sleep(3 * time.Second) // initial settling time
|
|
|
|
for time.Now().Before(deadline) {
|
|
if m.stackProvider == nil {
|
|
return fmt.Errorf("no stack provider")
|
|
}
|
|
if m.stackProvider.RefreshAndIsRunning(stackName) {
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [backup] Post-restore health check: %s is running", stackName)
|
|
}
|
|
return nil
|
|
}
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [backup] Post-restore health check: %s not yet running, waiting...", stackName)
|
|
}
|
|
time.Sleep(interval)
|
|
}
|
|
return fmt.Errorf("stack %s did not reach running state within %s after restore", stackName, timeout)
|
|
}
|