controller v0.47.0: backups page — whole-guest backup visibility + manual trigger
Part 2 of the USB/backup spec. agentapi: StatusResponse.Backup record, DueResponse
age_seconds, RestoreTestStatus(). New "Rendszermentés (teljes mentés)" section
(read-only: last backup/target PBS-vs-local/next-due/restore-test) + "Mentés most"
manual trigger that goes through the quiesce loop (controller owns quiescing):
quiesce.Loop gains mutex + TriggerNow() (single-flight, async). New
/api/guest-backup/{trigger,status} (distinct from apiRouter's /api/backup/*).
App-data rows relabeled under an "Alkalmazás-mentések" divider. Config → slice 10.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13,13 +13,19 @@ package quiesce
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ErrBackupInProgress is returned by TriggerNow when a scheduled or manual quiesce cycle is already
|
||||
// running (single-flight). The caller (the "Mentés most" handler) surfaces it as a benign 409.
|
||||
var ErrBackupInProgress = errors.New("quiesce: a backup cycle is already in progress")
|
||||
|
||||
// Backend is the agent local-API surface the loop needs (satisfied by an adapter over
|
||||
// *agentapi.Client). Kept minimal (bool/string) so the loop is testable with plain fakes.
|
||||
type Backend interface {
|
||||
@@ -74,6 +80,10 @@ type Loop struct {
|
||||
maxQuiesce time.Duration
|
||||
logger *log.Logger
|
||||
now func() time.Time
|
||||
// mu single-flights the quiesce cycle across the scheduled loop AND the manual trigger, so the
|
||||
// two can never stop the same stacks concurrently (the persisted marker covers crash-safety across
|
||||
// restarts; this covers concurrency within the process — which a manual trigger introduces).
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// New builds a Loop with sane defaults for any unset duration.
|
||||
@@ -135,8 +145,15 @@ func (l *Loop) Run(ctx context.Context) {
|
||||
// is guaranteed via the deferred closure: a backup error, a status-poll error, the max-quiesce
|
||||
// bound, or context cancellation all still restart the stacks and clear the marker.
|
||||
func (l *Loop) runOnce(ctx context.Context) error {
|
||||
// Single-flight: skip the scheduled check if a cycle (scheduled or manual) is already running.
|
||||
if !l.mu.TryLock() {
|
||||
l.logger.Printf("[INFO] [quiesce] a backup cycle is already running — skipping this scheduled check")
|
||||
return nil
|
||||
}
|
||||
defer l.mu.Unlock()
|
||||
|
||||
// Defensive single-flight: never quiesce on top of an active marker (Recover clears one left
|
||||
// by a crash; within a process the single loop goroutine already serializes).
|
||||
// by a crash; the mutex above serializes within the process).
|
||||
if m, ok := l.readMarker(); ok && m.Active {
|
||||
l.logger.Printf("[WARN] [quiesce] a marker is already active — skipping this cycle")
|
||||
return nil
|
||||
@@ -150,6 +167,42 @@ func (l *Loop) runOnce(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
return l.quiesceAndPoll(ctx)
|
||||
}
|
||||
|
||||
// TriggerNow forces an app-consistent backup NOW (the manual "Mentés most" action), bypassing the
|
||||
// /backup/due check. It runs the SAME quiesce flow the scheduled loop uses (stop stacks → POST
|
||||
// /backup → poll → resume), so it is app-consistent and crash-safe (marker-protected). Single-flight
|
||||
// via the same mutex: it returns ErrBackupInProgress if a scheduled or manual cycle is already
|
||||
// running. The cycle runs ASYNCHRONOUSLY (it can take minutes) on a background context bounded by
|
||||
// maxQuiesce; the caller polls /backup/status for progress. The controller — not the agent — owns
|
||||
// quiescing (the agent's vzdump is crash-consistent only), so this MUST go through the loop.
|
||||
func (l *Loop) TriggerNow() error {
|
||||
if !l.mu.TryLock() {
|
||||
return ErrBackupInProgress
|
||||
}
|
||||
if m, ok := l.readMarker(); ok && m.Active {
|
||||
l.mu.Unlock()
|
||||
return ErrBackupInProgress
|
||||
}
|
||||
go func() {
|
||||
defer l.mu.Unlock()
|
||||
// Detached from any request context; bounded so a hung backup still unquiesces.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), l.maxQuiesce+5*time.Minute)
|
||||
defer cancel()
|
||||
l.logger.Printf("[INFO] [quiesce] manual backup requested — quiescing now")
|
||||
if err := l.quiesceAndPoll(ctx); err != nil {
|
||||
l.logger.Printf("[ERROR] [quiesce] manual backup cycle error: %v", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
// quiesceAndPoll performs the marked, guaranteed-unquiesce cycle: write marker → stop running app
|
||||
// stacks → POST /backup → poll /backup/status → restart exactly the stacks it stopped. The caller
|
||||
// MUST hold l.mu. Unquiesce is guaranteed via the deferred closure (backup error, status-poll error,
|
||||
// the max-quiesce bound, or context cancellation all still restart the stacks and clear the marker).
|
||||
func (l *Loop) quiesceAndPoll(ctx context.Context) error {
|
||||
running := l.stacks.RunningAppStacks()
|
||||
marker := Marker{Active: true, StartedAt: l.now(), StoppedStacks: running}
|
||||
if err := l.writeMarker(marker); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user