v0.54.0: Phase 2b — restore-from-recovery-unit + fail-closed data-key gate

Restore recreates an app from its on-drive unit + the guest's own secrets,
regenerating nothing. reconcileRestoreSecrets (pure, unit-tested) merges the unit's
non-secret env with secrets recovered from the live app.yaml and FAILS CLOSED if a
data-encrypting key is unrecoverable (refuse — a PBS whole-guest restore is needed —
rather than regenerate and corrupt). Resettable secrets missing → warn + proceed.

- backup: RestoreFromRecoveryUnit (manifest -> recover secrets -> gate -> restore
  volumes -> recreate definition + redeploy w/ re-pull); falls back to volume-only.
- seams: RecoverStackSecrets/RecreateStackFromUnit (adapter +encKey),
  stacks.RedeployFromEnv. Wired into /backup/restore.
- tests: gate (refuse/proceed/verbatim) + data_key parsing.

Gate + reconcile + data_key parsing unit-tested; capture live-validated (v0.53.1).
Full readable-data e2e vs AdventureLog needs the auth-gated dashboard restore — pending.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-13 11:12:43 +02:00
parent 39d623a1c1
commit 7863e62f29
9 changed files with 377 additions and 1 deletions
+45
View File
@@ -218,6 +218,7 @@ func main() {
stackProv := &stackAdapter{
mgr: stackMgr,
getStoragePaths: func() []settings.StoragePath { return sett.GetStoragePaths() },
encKey: encKey,
}
if cfg.Backup.Enabled {
backupMgr = backup.NewManager(cfg, sett, logger)
@@ -768,6 +769,7 @@ func setupLogger(cfg *config.Config) (*log.Logger, *web.LogBuffer) {
type stackAdapter struct {
mgr *stacks.Manager
getStoragePaths func() []settings.StoragePath
encKey []byte // for decrypting live app.yaml secrets during restore-from-unit
}
func (a *stackAdapter) GetStackComposePath(name string) (string, bool) {
@@ -904,6 +906,49 @@ func (a *stackAdapter) GetStackRecoveryInfo(name string) (backup.RecoveryInfo, b
}, true
}
// RecoverStackSecrets returns the live decrypted values for the named secret env vars present in the
// stack's app.yaml (the guest's own — live rootfs or PBS-restored). Absent/empty names are omitted;
// the caller's fail-closed gate decides. Secrets come from the guest, never from the recovery unit.
func (a *stackAdapter) RecoverStackSecrets(name string, names []string) map[string]string {
s, ok := a.mgr.GetStack(name)
if !ok {
return nil
}
cfg := stacks.LoadAppConfigDecrypted(filepath.Dir(s.ComposePath), a.encKey)
if cfg == nil {
return nil
}
out := make(map[string]string)
for _, n := range names {
if v, ok := cfg.Env[n]; ok && v != "" {
out[n] = v
}
}
return out
}
// RecreateStackFromUnit restores the app definition from the unit's compose dir into the stack dir,
// then redeploys with the reconstructed full env (re-pulling the pinned image). Secrets in fullEnv were
// recovered from the guest, never regenerated.
func (a *stackAdapter) RecreateStackFromUnit(name, composeSrcDir string, fullEnv map[string]string) error {
s, ok := a.mgr.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
stackDir := filepath.Dir(s.ComposePath)
// Recover the app definition from the unit (compose + .felhom.yml) into the stack dir.
for _, fname := range []string{"docker-compose.yml", ".felhom.yml"} {
data, err := os.ReadFile(filepath.Join(composeSrcDir, fname))
if err != nil {
continue // capture whichever existed
}
if err := os.WriteFile(filepath.Join(stackDir, fname), data, 0644); err != nil {
return fmt.Errorf("restoring %s from unit: %w", fname, err)
}
}
return a.mgr.RedeployFromEnv(name, fullEnv)
}
// RefreshAndIsRunning forces a docker ps scan before checking state.
// Called during post-restore health check (~every 5s for up to 90s).
// Full refresh is acceptable here since restores are rare operations.