v0.53.0: Phase 2 capture side — per-app secret-free recovery unit

Each app's on-drive backup becomes a self-contained, recreatable recovery unit:
compose/ (docker-compose.yml + .felhom.yml + secret-stripped app.yaml) alongside
the existing db-dumps/ + volume-dumps/, plus a secret-free manifest.json (image
pins, secret env-var NAMES, data_key names, checksums). The unit stores no secret
value, no data-key, and not the image — secrets are recovered at restore from the
guest's own app.yaml (live/PBS), never regenerated.

- appbackup: RecoveryUnit* path helpers, RecoveryInfo + GetStackRecoveryInfo,
  ParseComposeImages; AppDBDump/Volume refactored onto RecoveryUnitPath.
- backup: recovery_unit.go (manifest + CaptureRecoveryUnit), wired into RunDBDumps;
  capture test proves secret-free.
- stacks: DeployField.DataKey + Metadata.DataKeyEnvVars(); main.go stackAdapter
  implements GetStackRecoveryInfo (excludes secret-named + encrypted values).
- Restore-from-unit recreate + fail-closed gate + live AdventureLog validation: next.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-13 10:20:37 +02:00
parent 5eb25c3861
commit 70eb521cd0
9 changed files with 586 additions and 3 deletions
@@ -0,0 +1,146 @@
package backup
import (
"encoding/json"
"io"
"io/fs"
"log"
"os"
"path/filepath"
"strings"
"testing"
)
// fakeRecoveryProvider is a minimal StackDataProvider for the capture test.
type fakeRecoveryProvider struct {
info RecoveryInfo
hdd string
}
func (f *fakeRecoveryProvider) GetStackComposePath(string) (string, bool) {
return filepath.Join(f.info.StackDir, "docker-compose.yml"), true
}
func (f *fakeRecoveryProvider) ListDeployedStacks() []StackSummary { return nil }
func (f *fakeRecoveryProvider) GetStackHDDMounts(string) []string { return nil }
func (f *fakeRecoveryProvider) GetStackHDDPath(string) string { return f.hdd }
func (f *fakeRecoveryProvider) GetDockerVolumes(string) []string { return nil }
func (f *fakeRecoveryProvider) StopStack(string) error { return nil }
func (f *fakeRecoveryProvider) StartStack(string) error { return nil }
func (f *fakeRecoveryProvider) RefreshAndIsRunning(string) bool { return false }
func (f *fakeRecoveryProvider) GetStackRecoveryInfo(string) (RecoveryInfo, bool) {
return f.info, true
}
// TestCaptureRecoveryUnitIsSecretFree proves the captured unit (a) contains compose+config+manifest,
// (b) enumerates the existing dumps, and (c) is SECRET-FREE: a secret value present in the SOURCE
// app.yaml does NOT appear anywhere in the unit, because the capture writes the stripped NonSecretEnv
// (not the raw app.yaml). The manifest records the secret NAMES + data_key flag for recovery-from-guest.
func TestCaptureRecoveryUnitIsSecretFree(t *testing.T) {
const secretVal = "SUPERSECRETVALUE-do-not-leak"
tmp := t.TempDir()
stackDir := filepath.Join(tmp, "stack")
drive := filepath.Join(tmp, "drive") // in-guest namespace root (basename need not be felhom-data)
if err := os.MkdirAll(stackDir, 0755); err != nil {
t.Fatal(err)
}
// Source stack files — the raw app.yaml DELIBERATELY holds a secret to prove it's not copied.
mustWrite(t, filepath.Join(stackDir, "docker-compose.yml"),
"services:\n app:\n image: example/app:1.2.3\n")
mustWrite(t, filepath.Join(stackDir, ".felhom.yml"), "display_name: Example\n")
mustWrite(t, filepath.Join(stackDir, "app.yaml"),
"deployed: true\nenv:\n DB_PASSWORD: "+secretVal+"\n SUBDOMAIN: example\n")
// Pre-existing dumps (written by the dump flow before capture).
mustWrite(t, filepath.Join(AppDBDumpPath(drive, "example"), "example-postgres.sql"), "dump")
mustWrite(t, filepath.Join(AppVolumeDumpPath(drive, "example"), "example_data.tar"), "tar")
// RecoveryInfo as the adapter would build it: secret values already stripped from NonSecretEnv.
info := RecoveryInfo{
StackDir: stackDir,
DisplayName: "Example",
ImagePins: []string{"example/app:1.2.3"},
NonSecretEnv: map[string]string{"SUBDOMAIN": "example", "HDD_PATH": drive},
SecretEnvVars: []string{"DB_PASSWORD", "SECRET_KEY"},
DataKeyEnvVars: []string{"SECRET_KEY"},
}
m := &Manager{
logger: log.New(io.Discard, "", 0),
systemDataPath: filepath.Join(tmp, "system"), // != drive ⇒ drive treated as in-guest, nsRoot = drive
stackProvider: &fakeRecoveryProvider{info: info, hdd: drive},
version: "vtest",
}
if err := m.CaptureRecoveryUnit("example"); err != nil {
t.Fatalf("capture: %v", err)
}
composeDir := RecoveryUnitComposePath(drive, "example")
for _, f := range []string{"docker-compose.yml", ".felhom.yml", "app.yaml"} {
if _, err := os.Stat(filepath.Join(composeDir, f)); err != nil {
t.Errorf("missing captured config %s: %v", f, err)
}
}
// Manifest structure.
mfData, err := os.ReadFile(RecoveryUnitManifestPath(drive, "example"))
if err != nil {
t.Fatalf("manifest: %v", err)
}
var man RecoveryManifest
if err := json.Unmarshal(mfData, &man); err != nil {
t.Fatalf("manifest parse: %v", err)
}
if man.AppName != "example" || man.ControllerVer != "vtest" {
t.Errorf("manifest meta: app=%q ver=%q", man.AppName, man.ControllerVer)
}
if len(man.ImagePins) != 1 || man.ImagePins[0] != "example/app:1.2.3" {
t.Errorf("image pins: %v", man.ImagePins)
}
if len(man.SecretEnvVars) != 2 {
t.Errorf("secret env-var names: %v (want 2)", man.SecretEnvVars)
}
if len(man.DataKeyEnvVars) != 1 || man.DataKeyEnvVars[0] != "SECRET_KEY" {
t.Errorf("data-key env-vars: %v", man.DataKeyEnvVars)
}
if len(man.DBDumps) != 1 || len(man.VolumeDumps) != 1 {
t.Errorf("dumps enumerated: db=%v vol=%v", man.DBDumps, man.VolumeDumps)
}
// app.yaml in the unit must carry the non-secret env but NOT the secret value.
appy := mustRead(t, filepath.Join(composeDir, "app.yaml"))
if !strings.Contains(appy, "SUBDOMAIN") {
t.Errorf("stripped app.yaml missing non-secret env: %s", appy)
}
// SECRET-FREE invariant: the secret value must not appear ANYWHERE in the unit.
unitRoot := RecoveryUnitPath(drive, "example")
_ = filepath.WalkDir(unitRoot, func(path string, d fs.DirEntry, err error) error {
if err != nil || d.IsDir() {
return nil
}
if strings.Contains(mustRead(t, path), secretVal) {
t.Errorf("SECRET LEAK: %q found in %s", secretVal, path)
}
return nil
})
}
func mustWrite(t *testing.T, path, content string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
t.Fatal(err)
}
}
func mustRead(t *testing.T, path string) string {
t.Helper()
b, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
return string(b)
}