v0.53.1: refresh recovery units on periodic cache cycle (idempotent)
CaptureRecoveryUnit now builds content in memory and skips writes when the unit is already current (checksum + dump-set + version), so it can run from RefreshCache (startup + every 5m) without thrashing the USB drive. Units now exist shortly after startup and track config changes without waiting for the daily DB dump. +idempotency test. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -56,6 +56,10 @@ func (m *Manager) SetVersion(v string) {
|
||||
// CaptureRecoveryUnit writes/refreshes an app's secret-free recovery unit: it captures the
|
||||
// compose + metadata + a secret-stripped app.yaml into compose/, enumerates the DB/volume dumps
|
||||
// already present, and writes manifest.json. It NEVER writes a secret value or the Docker image.
|
||||
//
|
||||
// Idempotent: it builds the captured content in memory first and SKIPS all writes when the unit is
|
||||
// already current (same config checksums, same dump set, same controller version) — so it can run on
|
||||
// the periodic status refresh without thrashing a spinning USB drive.
|
||||
func (m *Manager) CaptureRecoveryUnit(stackName string) error {
|
||||
if m.stackProvider == nil {
|
||||
return fmt.Errorf("no stack provider")
|
||||
@@ -69,41 +73,60 @@ func (m *Manager) CaptureRecoveryUnit(stackName string) error {
|
||||
return fmt.Errorf("cannot determine absolute drive path for %s", stackName)
|
||||
}
|
||||
nsRoot := m.namespaceRoot(drivePath)
|
||||
|
||||
// Build the captured config CONTENT in memory (no writes yet) so we can checksum-compare.
|
||||
type capFile struct {
|
||||
name string
|
||||
data []byte
|
||||
perm os.FileMode
|
||||
}
|
||||
var files []capFile
|
||||
checksums := make(map[string]string)
|
||||
var configFiles []string
|
||||
for _, fname := range []string{"docker-compose.yml", ".felhom.yml"} {
|
||||
data, err := os.ReadFile(filepath.Join(info.StackDir, fname))
|
||||
if err != nil {
|
||||
continue // optional — capture whichever exist
|
||||
}
|
||||
files = append(files, capFile{fname, data, 0644})
|
||||
checksums[fname] = sha256Hex(data)
|
||||
configFiles = append(configFiles, fname)
|
||||
}
|
||||
appYaml := buildStrippedAppYaml(info)
|
||||
files = append(files, capFile{"app.yaml", appYaml, 0600})
|
||||
checksums["app.yaml"] = sha256Hex(appYaml)
|
||||
configFiles = append(configFiles, "app.yaml")
|
||||
|
||||
dbDumps := listFileNames(AppDBDumpPath(nsRoot, stackName), ".sql")
|
||||
volDumps := listFileNames(AppVolumeDumpPath(nsRoot, stackName), ".tar")
|
||||
version := m.versionLocked()
|
||||
|
||||
manifestPath := RecoveryUnitManifestPath(nsRoot, stackName)
|
||||
|
||||
// Skip if the unit is already current — avoids needless drive writes on the periodic refresh.
|
||||
if cur := readManifest(manifestPath); cur != nil &&
|
||||
cur.ControllerVer == version &&
|
||||
stringMapEqual(cur.Checksums, checksums) &&
|
||||
stringSliceEqual(cur.DBDumps, dbDumps) &&
|
||||
stringSliceEqual(cur.VolumeDumps, volDumps) {
|
||||
return nil
|
||||
}
|
||||
|
||||
composeDir := RecoveryUnitComposePath(nsRoot, stackName)
|
||||
if err := os.MkdirAll(composeDir, 0755); err != nil {
|
||||
return fmt.Errorf("creating recovery-unit compose dir: %w", err)
|
||||
}
|
||||
|
||||
checksums := make(map[string]string)
|
||||
var configFiles []string
|
||||
|
||||
// Capture docker-compose.yml + .felhom.yml verbatim (whichever exist).
|
||||
for _, fname := range []string{"docker-compose.yml", ".felhom.yml"} {
|
||||
src := filepath.Join(info.StackDir, fname)
|
||||
if _, err := os.Stat(src); err != nil {
|
||||
continue
|
||||
for _, f := range files {
|
||||
if err := atomicWrite(filepath.Join(composeDir, f.name), f.data, f.perm); err != nil {
|
||||
return fmt.Errorf("capturing %s: %w", f.name, err)
|
||||
}
|
||||
sum, err := copyFileChecksum(src, filepath.Join(composeDir, fname))
|
||||
if err != nil {
|
||||
return fmt.Errorf("capturing %s: %w", fname, err)
|
||||
}
|
||||
checksums[fname] = sum
|
||||
configFiles = append(configFiles, fname)
|
||||
}
|
||||
|
||||
// Write the SECRET-STRIPPED app.yaml (non-secret env only).
|
||||
sum, err := writeStrippedAppYaml(filepath.Join(composeDir, "app.yaml"), info)
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing stripped app.yaml: %w", err)
|
||||
}
|
||||
checksums["app.yaml"] = sum
|
||||
configFiles = append(configFiles, "app.yaml")
|
||||
|
||||
manifest := &RecoveryManifest{
|
||||
SchemaVersion: 1,
|
||||
AppName: stackName,
|
||||
DisplayName: info.DisplayName,
|
||||
ControllerVer: m.versionLocked(),
|
||||
ControllerVer: version,
|
||||
CreatedAt: time.Now().UTC().Format(time.RFC3339),
|
||||
Drive: drivePath,
|
||||
NamespaceRoot: nsRoot,
|
||||
@@ -112,11 +135,11 @@ func (m *Manager) CaptureRecoveryUnit(stackName string) error {
|
||||
DataKeyEnvVars: info.DataKeyEnvVars,
|
||||
SecretSource: "guest app.yaml (live rootfs) or PBS whole-guest snapshot — never stored in this unit",
|
||||
ConfigFiles: configFiles,
|
||||
DBDumps: listFileNames(AppDBDumpPath(nsRoot, stackName), ".sql"),
|
||||
VolumeDumps: listFileNames(AppVolumeDumpPath(nsRoot, stackName), ".tar"),
|
||||
DBDumps: dbDumps,
|
||||
VolumeDumps: volDumps,
|
||||
Checksums: checksums,
|
||||
}
|
||||
if err := writeManifest(RecoveryUnitManifestPath(nsRoot, stackName), manifest); err != nil {
|
||||
if err := writeManifest(manifestPath, manifest); err != nil {
|
||||
return fmt.Errorf("writing manifest: %w", err)
|
||||
}
|
||||
|
||||
@@ -154,11 +177,13 @@ type strippedAppYaml struct {
|
||||
Env map[string]string `yaml:"env"`
|
||||
}
|
||||
|
||||
// writeStrippedAppYaml writes a secret-free app.yaml (non-secret env only) and returns its sha256.
|
||||
func writeStrippedAppYaml(dst string, info RecoveryInfo) (string, error) {
|
||||
// buildStrippedAppYaml renders a secret-free app.yaml (non-secret env only) as bytes. Deterministic:
|
||||
// yaml.v3 sorts map keys and the secret-name list comes in stable metadata order, so identical input
|
||||
// yields identical bytes (needed for the checksum-skip guard).
|
||||
func buildStrippedAppYaml(info RecoveryInfo) []byte {
|
||||
body, err := yaml.Marshal(strippedAppYaml{Deployed: true, Env: info.NonSecretEnv})
|
||||
if err != nil {
|
||||
return "", err
|
||||
body = []byte("deployed: true\nenv: {}\n")
|
||||
}
|
||||
header := "# Captured by felhom-controller recovery unit — SECRET-FREE.\n" +
|
||||
"# Secret/data-key values are intentionally omitted; recover them at restore from the\n" +
|
||||
@@ -166,12 +191,7 @@ func writeStrippedAppYaml(dst string, info RecoveryInfo) (string, error) {
|
||||
if len(info.SecretEnvVars) > 0 {
|
||||
header += "# " + strings.Join(info.SecretEnvVars, ", ") + "\n"
|
||||
}
|
||||
content := []byte(header + string(body))
|
||||
if err := atomicWrite(dst, content, 0600); err != nil {
|
||||
return "", err
|
||||
}
|
||||
sum := sha256.Sum256(content)
|
||||
return hex.EncodeToString(sum[:]), nil
|
||||
return []byte(header + string(body))
|
||||
}
|
||||
|
||||
// writeManifest writes the manifest JSON atomically.
|
||||
@@ -183,17 +203,46 @@ func writeManifest(dst string, manifest *RecoveryManifest) error {
|
||||
return atomicWrite(dst, append(data, '\n'), 0644)
|
||||
}
|
||||
|
||||
// copyFileChecksum copies src→dst and returns the sha256 of the copied bytes.
|
||||
func copyFileChecksum(src, dst string) (string, error) {
|
||||
data, err := os.ReadFile(src)
|
||||
// readManifest reads an existing recovery-unit manifest (nil if absent or unparseable).
|
||||
func readManifest(path string) *RecoveryManifest {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return nil
|
||||
}
|
||||
if err := atomicWrite(dst, data, 0644); err != nil {
|
||||
return "", err
|
||||
var m RecoveryManifest
|
||||
if json.Unmarshal(data, &m) != nil {
|
||||
return nil
|
||||
}
|
||||
return &m
|
||||
}
|
||||
|
||||
func sha256Hex(data []byte) string {
|
||||
sum := sha256.Sum256(data)
|
||||
return hex.EncodeToString(sum[:]), nil
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func stringMapEqual(a, b map[string]string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for k, v := range a {
|
||||
if b[k] != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func stringSliceEqual(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// listFileNames returns the names of files with the given suffix in dir (sorted, none if absent).
|
||||
|
||||
Reference in New Issue
Block a user