v0.54.0: Phase 2b — restore-from-recovery-unit + fail-closed data-key gate

Restore recreates an app from its on-drive unit + the guest's own secrets,
regenerating nothing. reconcileRestoreSecrets (pure, unit-tested) merges the unit's
non-secret env with secrets recovered from the live app.yaml and FAILS CLOSED if a
data-encrypting key is unrecoverable (refuse — a PBS whole-guest restore is needed —
rather than regenerate and corrupt). Resettable secrets missing → warn + proceed.

- backup: RestoreFromRecoveryUnit (manifest -> recover secrets -> gate -> restore
  volumes -> recreate definition + redeploy w/ re-pull); falls back to volume-only.
- seams: RecoverStackSecrets/RecreateStackFromUnit (adapter +encKey),
  stacks.RedeployFromEnv. Wired into /backup/restore.
- tests: gate (refuse/proceed/verbatim) + data_key parsing.

Gate + reconcile + data_key parsing unit-tested; capture live-validated (v0.53.1).
Full readable-data e2e vs AdventureLog needs the auth-gated dashboard restore — pending.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-13 11:12:43 +02:00
parent 39d623a1c1
commit 7863e62f29
9 changed files with 377 additions and 1 deletions
+42
View File
@@ -431,6 +431,48 @@ func (m *Manager) UpdateStackConfig(name string, values map[string]string) error
return m.RefreshStatus()
}
// RedeployFromEnv writes app.yaml from the given FULL env (encrypting secret fields) and (re-)deploys
// the stack with `docker compose up -d`, which re-pulls the pinned image. Used by the restore-from-unit
// flow (Phase 2b): unlike UpdateStackConfig it sets the full env INCLUDING locked secrets — which were
// recovered from the guest's own app.yaml, never regenerated. Caller is responsible for the gate.
func (m *Manager) RedeployFromEnv(name string, env map[string]string) error {
stack, ok := m.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
stackDir := filepath.Dir(stack.ComposePath)
meta := LoadMetadata(stackDir)
cfg := &AppConfig{
Deployed: true,
DeployedAt: time.Now().UTC().Format(time.RFC3339),
Env: env,
}
for _, f := range meta.DeployFields {
if f.LockedAfterDeploy {
cfg.LockedFields = append(cfg.LockedFields, f.EnvVar)
}
}
if err := SaveAppConfig(stackDir, cfg, m.encKey, SensitiveEnvVars(&meta)); err != nil {
return fmt.Errorf("saving app config: %w", err)
}
m.mu.Lock()
if s, ok := m.stacks[name]; ok {
s.Deployed = true
s.AppConfig = cfg
}
m.mu.Unlock()
m.logger.Printf("[INFO] [stacks] Redeploying %s from recovery unit with %d env vars", name, len(env))
deployEnv := m.stackEnv(stackDir) // decrypts secrets back for compose
if _, err := m.composeExecCustomEnv(stackDir, deployEnv, "up", "-d"); err != nil {
return fmt.Errorf("compose up: %w", err)
}
m.logPostStartStatus(name, stackDir, deployEnv)
return m.RefreshStatus()
}
// composeExecWithEnv runs a compose command with custom env vars injected.
func (m *Manager) composeExecWithEnv(dir string, env map[string]string, args ...string) (string, error) {
cmdEnv := os.Environ()
@@ -0,0 +1,40 @@
package stacks
import (
"os"
"path/filepath"
"testing"
)
// TestDataKeyParsing proves the catalog `data_key: true` annotation flows through .felhom.yml parsing
// into Metadata.DataKeyEnvVars() — the capture-side half of the Phase 2b fail-closed mechanism. The
// fail-closed gate itself is unit-tested in internal/backup (reconcileRestoreSecrets).
func TestDataKeyParsing(t *testing.T) {
dir := t.TempDir()
// Mirrors adventurelog/.felhom.yml: SECRET_KEY is a data-encrypting key, DB_PASSWORD is resettable.
yml := `display_name: AdventureLog
deploy_fields:
- env_var: SECRET_KEY
label: "Titkosítási kulcs"
type: secret
data_key: true
- env_var: DB_PASSWORD
label: "Adatbázis jelszó"
type: secret
`
if err := os.WriteFile(filepath.Join(dir, ".felhom.yml"), []byte(yml), 0644); err != nil {
t.Fatal(err)
}
meta := LoadMetadata(dir)
dk := meta.DataKeyEnvVars()
if len(dk) != 1 || dk[0] != "SECRET_KEY" {
t.Fatalf("DataKeyEnvVars() = %v, want [SECRET_KEY]", dk)
}
// Both secrets are sensitive (stripped from the unit); only SECRET_KEY is a data_key (fail-closed).
sens := SensitiveEnvVars(&meta)
if len(sens) != 2 {
t.Errorf("SensitiveEnvVars() = %v, want both SECRET_KEY and DB_PASSWORD", sens)
}
}