slice 8B (controller half): app-consistent backup quiesce loop (v0.36.0)
internal/quiesce: poll /backup/due -> quiesce (stop app stacks) -> POST /backup -> poll /backup/status -> unquiesce (restart exactly those). Crash-safety: persisted marker before stopping, guaranteed unquiesce (defer), max-quiesce guard, startup Recover, single-flight. agentapi BackupDue/StartBackup/ BackupStatus; stacks.RunningAppStacks(); config QuiesceConfig; main wiring. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -31,6 +31,7 @@ import (
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/notify"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/quiesce"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/recovery"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/report"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
|
||||
@@ -154,6 +155,11 @@ func main() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
// --- Quiesce loop (slice 8B): app-consistent backup around the agent vzdump ---
|
||||
// Runs only when the local API is configured (a provisioned guest) and quiesce is enabled.
|
||||
// Recover FIRST (restart any stacks left stopped by a crash mid-quiesce), then start the loop.
|
||||
startQuiesceLoop(ctx, cfg, stackMgr, logger)
|
||||
|
||||
// --- Start CPU collector ---
|
||||
cpuCollector := system.NewCPUCollector(5 * time.Second)
|
||||
cpuCollector.Start(ctx)
|
||||
@@ -1301,6 +1307,67 @@ func fileExists(path string) bool {
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// quiesceBackend adapts *agentapi.Client to quiesce.Backend (bool/string, decoupled from the
|
||||
// agentapi response structs).
|
||||
type quiesceBackend struct{ c *agentapi.Client }
|
||||
|
||||
func (b quiesceBackend) Due(ctx context.Context) (bool, error) {
|
||||
r, err := b.c.BackupDue(ctx)
|
||||
return r.Due, err
|
||||
}
|
||||
func (b quiesceBackend) StartBackup(ctx context.Context) (string, error) {
|
||||
r, err := b.c.StartBackup(ctx)
|
||||
return r.JobID, err
|
||||
}
|
||||
func (b quiesceBackend) BackupStatus(ctx context.Context) (string, error) {
|
||||
r, err := b.c.BackupStatus(ctx)
|
||||
return r.Phase, err
|
||||
}
|
||||
|
||||
// startQuiesceLoop wires + starts the slice-8B quiesce loop when the local API is configured and
|
||||
// quiesce is enabled. It Recovers (restarts stacks left stopped by a mid-quiesce crash) before
|
||||
// starting the loop goroutine. Non-fatal: any misconfig disables the loop with a log line.
|
||||
func startQuiesceLoop(ctx context.Context, cfg *config.Config, stackMgr *stacks.Manager, logger *log.Logger) {
|
||||
if cfg.LocalAPI.Endpoint == "" || cfg.LocalAPI.Token == "" {
|
||||
return // not a provisioned guest — no agent to back up against
|
||||
}
|
||||
if !cfg.Quiesce.QuiesceEnabled() {
|
||||
logger.Printf("[INFO] [quiesce] disabled by config")
|
||||
return
|
||||
}
|
||||
client, err := agentapi.New(cfg.LocalAPI.Endpoint, cfg.LocalAPI.Token, cfg.LocalAPI.Fingerprint)
|
||||
if err != nil {
|
||||
logger.Printf("[WARN] [quiesce] disabled (agent client init failed): %v", err)
|
||||
return
|
||||
}
|
||||
poll := parseDurationOr(cfg.Quiesce.PollInterval, 5*time.Minute)
|
||||
statusPoll := parseDurationOr(cfg.Quiesce.StatusPoll, 10*time.Second)
|
||||
maxQuiesce := parseDurationOr(cfg.Quiesce.MaxQuiesce, 30*time.Minute)
|
||||
loop := quiesce.New(quiesce.Options{
|
||||
Backend: quiesceBackend{c: client},
|
||||
Stacks: stackMgr,
|
||||
MarkerPath: filepath.Join(cfg.Paths.DataDir, "quiesce-state.json"),
|
||||
Poll: poll,
|
||||
StatusPoll: statusPoll,
|
||||
MaxQuiesce: maxQuiesce,
|
||||
Logger: logger,
|
||||
})
|
||||
loop.Recover() // crash-safety: restart any stacks stranded-down by a mid-quiesce crash
|
||||
go loop.Run(ctx)
|
||||
}
|
||||
|
||||
// parseDurationOr parses a duration string, falling back to def on empty/invalid input.
|
||||
func parseDurationOr(s string, def time.Duration) time.Duration {
|
||||
if s == "" {
|
||||
return def
|
||||
}
|
||||
d, err := time.ParseDuration(s)
|
||||
if err != nil || d <= 0 {
|
||||
return def
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// probeLocalAPI proves the controller↔agent local-API channel at startup and logs this guest's
|
||||
// mounts (slice 8A). Non-fatal: it only runs when a local-API endpoint is configured, and any
|
||||
// error is logged for diagnosis without affecting the controller's boot. The leaf SHA-256 from
|
||||
|
||||
Reference in New Issue
Block a user