Files
felhom-controller/controller/internal/backup/recovery_unit_test.go
T
admin 7863e62f29 v0.54.0: Phase 2b — restore-from-recovery-unit + fail-closed data-key gate
Restore recreates an app from its on-drive unit + the guest's own secrets,
regenerating nothing. reconcileRestoreSecrets (pure, unit-tested) merges the unit's
non-secret env with secrets recovered from the live app.yaml and FAILS CLOSED if a
data-encrypting key is unrecoverable (refuse — a PBS whole-guest restore is needed —
rather than regenerate and corrupt). Resettable secrets missing → warn + proceed.

- backup: RestoreFromRecoveryUnit (manifest -> recover secrets -> gate -> restore
  volumes -> recreate definition + redeploy w/ re-pull); falls back to volume-only.
- seams: RecoverStackSecrets/RecreateStackFromUnit (adapter +encKey),
  stacks.RedeployFromEnv. Wired into /backup/restore.
- tests: gate (refuse/proceed/verbatim) + data_key parsing.

Gate + reconcile + data_key parsing unit-tested; capture live-validated (v0.53.1).
Full readable-data e2e vs AdventureLog needs the auth-gated dashboard restore — pending.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-13 11:12:43 +02:00

197 lines
7.6 KiB
Go

package backup
import (
"encoding/json"
"io"
"io/fs"
"log"
"os"
"path/filepath"
"strings"
"testing"
)
// fakeRecoveryProvider is a minimal StackDataProvider for the capture test.
type fakeRecoveryProvider struct {
info RecoveryInfo
hdd string
}
func (f *fakeRecoveryProvider) GetStackComposePath(string) (string, bool) {
return filepath.Join(f.info.StackDir, "docker-compose.yml"), true
}
func (f *fakeRecoveryProvider) ListDeployedStacks() []StackSummary { return nil }
func (f *fakeRecoveryProvider) GetStackHDDMounts(string) []string { return nil }
func (f *fakeRecoveryProvider) GetStackHDDPath(string) string { return f.hdd }
func (f *fakeRecoveryProvider) GetDockerVolumes(string) []string { return nil }
func (f *fakeRecoveryProvider) StopStack(string) error { return nil }
func (f *fakeRecoveryProvider) StartStack(string) error { return nil }
func (f *fakeRecoveryProvider) RefreshAndIsRunning(string) bool { return false }
func (f *fakeRecoveryProvider) GetStackRecoveryInfo(string) (RecoveryInfo, bool) {
return f.info, true
}
func (f *fakeRecoveryProvider) RecoverStackSecrets(string, []string) map[string]string { return nil }
func (f *fakeRecoveryProvider) RecreateStackFromUnit(string, string, map[string]string) error {
return nil
}
// TestCaptureRecoveryUnitIsSecretFree proves the captured unit (a) contains compose+config+manifest,
// (b) enumerates the existing dumps, and (c) is SECRET-FREE: a secret value present in the SOURCE
// app.yaml does NOT appear anywhere in the unit, because the capture writes the stripped NonSecretEnv
// (not the raw app.yaml). The manifest records the secret NAMES + data_key flag for recovery-from-guest.
func TestCaptureRecoveryUnitIsSecretFree(t *testing.T) {
const secretVal = "SUPERSECRETVALUE-do-not-leak"
tmp := t.TempDir()
stackDir := filepath.Join(tmp, "stack")
drive := filepath.Join(tmp, "drive") // in-guest namespace root (basename need not be felhom-data)
if err := os.MkdirAll(stackDir, 0755); err != nil {
t.Fatal(err)
}
// Source stack files — the raw app.yaml DELIBERATELY holds a secret to prove it's not copied.
mustWrite(t, filepath.Join(stackDir, "docker-compose.yml"),
"services:\n app:\n image: example/app:1.2.3\n")
mustWrite(t, filepath.Join(stackDir, ".felhom.yml"), "display_name: Example\n")
mustWrite(t, filepath.Join(stackDir, "app.yaml"),
"deployed: true\nenv:\n DB_PASSWORD: "+secretVal+"\n SUBDOMAIN: example\n")
// Pre-existing dumps (written by the dump flow before capture).
mustWrite(t, filepath.Join(AppDBDumpPath(drive, "example"), "example-postgres.sql"), "dump")
mustWrite(t, filepath.Join(AppVolumeDumpPath(drive, "example"), "example_data.tar"), "tar")
// RecoveryInfo as the adapter would build it: secret values already stripped from NonSecretEnv.
info := RecoveryInfo{
StackDir: stackDir,
DisplayName: "Example",
ImagePins: []string{"example/app:1.2.3"},
NonSecretEnv: map[string]string{"SUBDOMAIN": "example", "HDD_PATH": drive},
SecretEnvVars: []string{"DB_PASSWORD", "SECRET_KEY"},
DataKeyEnvVars: []string{"SECRET_KEY"},
}
m := &Manager{
logger: log.New(io.Discard, "", 0),
systemDataPath: filepath.Join(tmp, "system"), // != drive ⇒ drive treated as in-guest, nsRoot = drive
stackProvider: &fakeRecoveryProvider{info: info, hdd: drive},
version: "vtest",
}
if err := m.CaptureRecoveryUnit("example"); err != nil {
t.Fatalf("capture: %v", err)
}
composeDir := RecoveryUnitComposePath(drive, "example")
for _, f := range []string{"docker-compose.yml", ".felhom.yml", "app.yaml"} {
if _, err := os.Stat(filepath.Join(composeDir, f)); err != nil {
t.Errorf("missing captured config %s: %v", f, err)
}
}
// Manifest structure.
mfData, err := os.ReadFile(RecoveryUnitManifestPath(drive, "example"))
if err != nil {
t.Fatalf("manifest: %v", err)
}
var man RecoveryManifest
if err := json.Unmarshal(mfData, &man); err != nil {
t.Fatalf("manifest parse: %v", err)
}
if man.AppName != "example" || man.ControllerVer != "vtest" {
t.Errorf("manifest meta: app=%q ver=%q", man.AppName, man.ControllerVer)
}
if len(man.ImagePins) != 1 || man.ImagePins[0] != "example/app:1.2.3" {
t.Errorf("image pins: %v", man.ImagePins)
}
if len(man.SecretEnvVars) != 2 {
t.Errorf("secret env-var names: %v (want 2)", man.SecretEnvVars)
}
if len(man.DataKeyEnvVars) != 1 || man.DataKeyEnvVars[0] != "SECRET_KEY" {
t.Errorf("data-key env-vars: %v", man.DataKeyEnvVars)
}
if len(man.DBDumps) != 1 || len(man.VolumeDumps) != 1 {
t.Errorf("dumps enumerated: db=%v vol=%v", man.DBDumps, man.VolumeDumps)
}
// app.yaml in the unit must carry the non-secret env but NOT the secret value.
appy := mustRead(t, filepath.Join(composeDir, "app.yaml"))
if !strings.Contains(appy, "SUBDOMAIN") {
t.Errorf("stripped app.yaml missing non-secret env: %s", appy)
}
// SECRET-FREE invariant: the secret value must not appear ANYWHERE in the unit.
unitRoot := RecoveryUnitPath(drive, "example")
_ = filepath.WalkDir(unitRoot, func(path string, d fs.DirEntry, err error) error {
if err != nil || d.IsDir() {
return nil
}
if strings.Contains(mustRead(t, path), secretVal) {
t.Errorf("SECRET LEAK: %q found in %s", secretVal, path)
}
return nil
})
}
// TestCaptureRecoveryUnitIdempotent proves the checksum-skip guard: a second capture with unchanged
// config does NOT rewrite the manifest (CreatedAt stable), but a config change DOES.
func TestCaptureRecoveryUnitIdempotent(t *testing.T) {
tmp := t.TempDir()
stackDir := filepath.Join(tmp, "stack")
drive := filepath.Join(tmp, "drive")
mustWrite(t, filepath.Join(stackDir, "docker-compose.yml"), "services:\n app:\n image: ex/app:1\n")
mustWrite(t, filepath.Join(AppDBDumpPath(drive, "ex"), "ex.sql"), "d")
info := RecoveryInfo{StackDir: stackDir, DisplayName: "Ex", ImagePins: []string{"ex/app:1"},
NonSecretEnv: map[string]string{"SUBDOMAIN": "ex"}}
m := &Manager{logger: log.New(io.Discard, "", 0), systemDataPath: filepath.Join(tmp, "sys"),
stackProvider: &fakeRecoveryProvider{info: info, hdd: drive}, version: "v1"}
manifestPath := RecoveryUnitManifestPath(drive, "ex")
if err := m.CaptureRecoveryUnit("ex"); err != nil {
t.Fatal(err)
}
first := readManifest(manifestPath)
if first == nil {
t.Fatal("manifest not written")
}
// Second capture, unchanged → skipped (manifest byte-identical incl. CreatedAt).
if err := m.CaptureRecoveryUnit("ex"); err != nil {
t.Fatal(err)
}
if again := readManifest(manifestPath); again.CreatedAt != first.CreatedAt {
t.Errorf("idempotent capture rewrote manifest: %q -> %q", first.CreatedAt, again.CreatedAt)
}
// Change the compose → must rewrite (config checksum differs).
mustWrite(t, filepath.Join(stackDir, "docker-compose.yml"), "services:\n app:\n image: ex/app:2\n")
m.stackProvider.(*fakeRecoveryProvider).info.ImagePins = []string{"ex/app:2"}
if err := m.CaptureRecoveryUnit("ex"); err != nil {
t.Fatal(err)
}
changed := readManifest(manifestPath)
if len(changed.ImagePins) != 1 || changed.ImagePins[0] != "ex/app:2" {
t.Errorf("config change not captured: %v", changed.ImagePins)
}
if changed.Checksums["docker-compose.yml"] == first.Checksums["docker-compose.yml"] {
t.Errorf("compose checksum should change after edit")
}
}
func mustWrite(t *testing.T, path, content string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
t.Fatal(err)
}
}
func mustRead(t *testing.T, path string) string {
t.Helper()
b, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
return string(b)
}