slice 8B (controller half): app-consistent backup quiesce loop (v0.36.0)

internal/quiesce: poll /backup/due -> quiesce (stop app stacks) -> POST /backup
-> poll /backup/status -> unquiesce (restart exactly those). Crash-safety:
persisted marker before stopping, guaranteed unquiesce (defer), max-quiesce
guard, startup Recover, single-flight. agentapi BackupDue/StartBackup/
BackupStatus; stacks.RunningAppStacks(); config QuiesceConfig; main wiring.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 10:44:52 +02:00
parent 10685b771c
commit 68fc153d9c
7 changed files with 813 additions and 0 deletions
+22
View File
@@ -223,6 +223,28 @@ func (m *Manager) DeployedStackNames() []string {
return names
}
// RunningAppStacks returns the names of deployed, NON-protected stacks that currently have
// containers up (running/starting/unhealthy/restarting) — the set the quiesce loop (slice 8B)
// stops before an app-consistent backup and restarts after. Protected infra (traefik, cloudflared,
// felhom-controller) is excluded so the controller never stops its own tunnel/proxy or itself.
// Sorted for deterministic stop/start order.
func (m *Manager) RunningAppStacks() []string {
m.mu.RLock()
defer m.mu.RUnlock()
var names []string
for name, stack := range m.stacks {
if !stack.Deployed || stack.Protected || m.cfg.IsProtectedStack(name) {
continue
}
switch stack.State {
case StateRunning, StateStarting, StateUnhealthy, StateRestarting:
names = append(names, name)
}
}
sort.Strings(names)
return names
}
// ScanStacks discovers all compose stacks in the stacks directory.
func (m *Manager) ScanStacks() error {
m.mu.Lock()