slice 8B (controller half): app-consistent backup quiesce loop (v0.36.0)
internal/quiesce: poll /backup/due -> quiesce (stop app stacks) -> POST /backup -> poll /backup/status -> unquiesce (restart exactly those). Crash-safety: persisted marker before stopping, guaranteed unquiesce (defer), max-quiesce guard, startup Recover, single-flight. agentapi BackupDue/StartBackup/ BackupStatus; stacks.RunningAppStacks(); config QuiesceConfig; main wiring. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,266 @@
|
||||
// Package quiesce implements the slice-8B app-consistent backup loop (doc 03 §6/§8): the
|
||||
// in-guest controller polls the host agent's GET /backup/due, and when due it QUIESCES (stops its
|
||||
// app stacks) → POST /backup → polls GET /backup/status to completion → UNQUIESCES (restarts
|
||||
// exactly the stacks it stopped). An agent-initiated vzdump is crash-consistent only (an LXC has
|
||||
// no fsfreeze); stopping the stacks first makes the captured state clean-shutdown-consistent.
|
||||
//
|
||||
// The correctness centerpiece is crash-safety: a stranded-down app is worse than a crash-consistent
|
||||
// backup. So: a persisted marker is written BEFORE stopping anything; unquiesce is guaranteed (it
|
||||
// runs even when the backup errors or times out); a max-quiesce bound restarts the app no matter
|
||||
// what; and on controller startup Recover() restarts any stacks left stopped by a mid-quiesce crash.
|
||||
package quiesce
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Backend is the agent local-API surface the loop needs (satisfied by an adapter over
|
||||
// *agentapi.Client). Kept minimal (bool/string) so the loop is testable with plain fakes.
|
||||
type Backend interface {
|
||||
Due(ctx context.Context) (bool, error)
|
||||
StartBackup(ctx context.Context) (jobID string, err error)
|
||||
BackupStatus(ctx context.Context) (phase string, err error)
|
||||
}
|
||||
|
||||
// Stacks is the stack-control surface (satisfied by *stacks.Manager). RunningAppStacks must return
|
||||
// only deployed, non-protected, currently-up stacks (so unquiesce restarts exactly those).
|
||||
type Stacks interface {
|
||||
RunningAppStacks() []string
|
||||
StopStack(name string) error
|
||||
StartStack(name string) error
|
||||
}
|
||||
|
||||
// Backup status phases (mirror the agent's vocabulary).
|
||||
const (
|
||||
phaseDone = "done"
|
||||
phaseFailed = "failed"
|
||||
)
|
||||
|
||||
// Marker is the persisted quiesce state — the crash-safety + single-flight record. It is written
|
||||
// (atomically, 0600) BEFORE any stack is stopped, so a controller crash mid-quiesce leaves a
|
||||
// durable "these stacks were stopped, restart them" note that Recover honors at next startup.
|
||||
type Marker struct {
|
||||
Active bool `json:"active"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
StoppedStacks []string `json:"stopped_stacks"`
|
||||
JobID string `json:"job_id"`
|
||||
}
|
||||
|
||||
// Options configures a Loop.
|
||||
type Options struct {
|
||||
Backend Backend
|
||||
Stacks Stacks
|
||||
MarkerPath string // persisted marker (e.g. <data_dir>/quiesce-state.json)
|
||||
Poll time.Duration // how often to check /backup/due
|
||||
StatusPoll time.Duration // how often to poll /backup/status while quiesced
|
||||
MaxQuiesce time.Duration // hard bound on app downtime (unquiesce no matter what)
|
||||
Logger *log.Logger
|
||||
}
|
||||
|
||||
// Loop is the quiesce background loop.
|
||||
type Loop struct {
|
||||
backend Backend
|
||||
stacks Stacks
|
||||
markerPath string
|
||||
poll time.Duration
|
||||
statusPoll time.Duration
|
||||
maxQuiesce time.Duration
|
||||
logger *log.Logger
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
// New builds a Loop with sane defaults for any unset duration.
|
||||
func New(o Options) *Loop {
|
||||
if o.Poll <= 0 {
|
||||
o.Poll = 5 * time.Minute
|
||||
}
|
||||
if o.StatusPoll <= 0 {
|
||||
o.StatusPoll = 10 * time.Second
|
||||
}
|
||||
if o.MaxQuiesce <= 0 {
|
||||
o.MaxQuiesce = 30 * time.Minute
|
||||
}
|
||||
if o.Logger == nil {
|
||||
o.Logger = log.Default()
|
||||
}
|
||||
return &Loop{
|
||||
backend: o.Backend, stacks: o.Stacks, markerPath: o.MarkerPath,
|
||||
poll: o.Poll, statusPoll: o.StatusPoll, maxQuiesce: o.MaxQuiesce,
|
||||
logger: o.Logger, now: time.Now,
|
||||
}
|
||||
}
|
||||
|
||||
// Recover restarts any stacks left stopped by a controller crash mid-quiesce, then clears the
|
||||
// marker. Call ONCE at startup, before Run. Idempotent — StartStack on an already-running stack is
|
||||
// tolerated; an absent/inactive marker is a no-op.
|
||||
func (l *Loop) Recover() {
|
||||
m, ok := l.readMarker()
|
||||
if !ok || !m.Active {
|
||||
return
|
||||
}
|
||||
l.logger.Printf("[WARN] [quiesce] crash recovery: a quiesce was in progress (job %q, %d stack(s) stopped) — restarting them",
|
||||
m.JobID, len(m.StoppedStacks))
|
||||
l.restartAll(m.StoppedStacks)
|
||||
if err := l.clearMarker(); err != nil {
|
||||
l.logger.Printf("[ERROR] [quiesce] crash recovery: clear marker: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Run polls for a due backup and runs the quiesce cycle, until ctx is cancelled.
|
||||
func (l *Loop) Run(ctx context.Context) {
|
||||
l.logger.Printf("[INFO] [quiesce] loop started (poll %s, max-quiesce %s)", l.poll, l.maxQuiesce)
|
||||
ticker := time.NewTicker(l.poll)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
l.logger.Printf("[INFO] [quiesce] loop stopping")
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := l.runOnce(ctx); err != nil && ctx.Err() == nil {
|
||||
l.logger.Printf("[ERROR] [quiesce] cycle error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// runOnce performs one due-check → (if due) quiesce → backup → poll → unquiesce cycle. Unquiesce
|
||||
// is guaranteed via the deferred closure: a backup error, a status-poll error, the max-quiesce
|
||||
// bound, or context cancellation all still restart the stacks and clear the marker.
|
||||
func (l *Loop) runOnce(ctx context.Context) error {
|
||||
// Defensive single-flight: never quiesce on top of an active marker (Recover clears one left
|
||||
// by a crash; within a process the single loop goroutine already serializes).
|
||||
if m, ok := l.readMarker(); ok && m.Active {
|
||||
l.logger.Printf("[WARN] [quiesce] a marker is already active — skipping this cycle")
|
||||
return nil
|
||||
}
|
||||
|
||||
due, err := l.backend.Due(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("check due: %w", err)
|
||||
}
|
||||
if !due {
|
||||
return nil
|
||||
}
|
||||
|
||||
running := l.stacks.RunningAppStacks()
|
||||
marker := Marker{Active: true, StartedAt: l.now(), StoppedStacks: running}
|
||||
if err := l.writeMarker(marker); err != nil {
|
||||
return fmt.Errorf("write quiesce marker (refusing to stop stacks unprotected): %w", err)
|
||||
}
|
||||
|
||||
// GUARANTEED unquiesce + marker clear — runs on every exit path below.
|
||||
unquiesced := false
|
||||
unquiesce := func(reason string) {
|
||||
if unquiesced {
|
||||
return
|
||||
}
|
||||
unquiesced = true
|
||||
l.logger.Printf("[INFO] [quiesce] unquiescing (%s): restarting %d stack(s)", reason, len(running))
|
||||
l.restartAll(running)
|
||||
if err := l.clearMarker(); err != nil {
|
||||
l.logger.Printf("[ERROR] [quiesce] clear marker: %v", err)
|
||||
}
|
||||
}
|
||||
defer unquiesce("deferred")
|
||||
|
||||
l.logger.Printf("[INFO] [quiesce] backup due — quiescing %d stack(s): %v", len(running), running)
|
||||
for _, s := range running {
|
||||
if err := l.stacks.StopStack(s); err != nil {
|
||||
l.logger.Printf("[ERROR] [quiesce] stop %s: %v (continuing)", s, err)
|
||||
}
|
||||
}
|
||||
|
||||
jobID, err := l.backend.StartBackup(ctx)
|
||||
if err != nil {
|
||||
unquiesce("backup start failed")
|
||||
return fmt.Errorf("start backup: %w", err)
|
||||
}
|
||||
marker.JobID = jobID
|
||||
_ = l.writeMarker(marker) // best-effort: record the job id for diagnosis
|
||||
l.logger.Printf("[INFO] [quiesce] backup job %s started — polling to completion", jobID)
|
||||
|
||||
deadline := l.now().Add(l.maxQuiesce)
|
||||
for {
|
||||
if !l.now().Before(deadline) {
|
||||
l.logger.Printf("[WARN] [quiesce] max-quiesce-duration (%s) exceeded for job %s — unquiescing while the backup continues on the agent",
|
||||
l.maxQuiesce, jobID)
|
||||
unquiesce("max-quiesce guard")
|
||||
return nil
|
||||
}
|
||||
phase, err := l.backend.BackupStatus(ctx)
|
||||
if err != nil {
|
||||
unquiesce("status poll failed")
|
||||
return fmt.Errorf("poll backup status: %w", err)
|
||||
}
|
||||
switch phase {
|
||||
case phaseDone:
|
||||
l.logger.Printf("[INFO] [quiesce] backup job %s done", jobID)
|
||||
unquiesce("backup done")
|
||||
return nil
|
||||
case phaseFailed:
|
||||
l.logger.Printf("[WARN] [quiesce] backup job %s failed", jobID)
|
||||
unquiesce("backup failed")
|
||||
return nil
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
unquiesce("controller shutting down")
|
||||
return ctx.Err()
|
||||
case <-time.After(l.statusPoll):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Loop) restartAll(stacks []string) {
|
||||
for _, s := range stacks {
|
||||
if err := l.stacks.StartStack(s); err != nil {
|
||||
l.logger.Printf("[ERROR] [quiesce] restart %s: %v", s, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---- marker persistence (atomic, 0600) --------------------------------------------------
|
||||
|
||||
func (l *Loop) writeMarker(m Marker) error {
|
||||
m.Active = true
|
||||
data, err := json.MarshalIndent(m, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(l.markerPath), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
tmp := l.markerPath + ".tmp"
|
||||
if err := os.WriteFile(tmp, data, 0o600); err != nil {
|
||||
os.Remove(tmp)
|
||||
return err
|
||||
}
|
||||
return os.Rename(tmp, l.markerPath)
|
||||
}
|
||||
|
||||
func (l *Loop) readMarker() (Marker, bool) {
|
||||
data, err := os.ReadFile(l.markerPath)
|
||||
if err != nil {
|
||||
return Marker{}, false
|
||||
}
|
||||
var m Marker
|
||||
if json.Unmarshal(data, &m) != nil {
|
||||
return Marker{}, false
|
||||
}
|
||||
return m, true
|
||||
}
|
||||
|
||||
func (l *Loop) clearMarker() error {
|
||||
err := os.Remove(l.markerPath)
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -0,0 +1,302 @@
|
||||
package quiesce
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// fakeStacks records stop/start calls in order.
|
||||
type fakeStacks struct {
|
||||
mu sync.Mutex
|
||||
running []string
|
||||
stopped []string
|
||||
started []string
|
||||
stopErr map[string]error
|
||||
}
|
||||
|
||||
func (f *fakeStacks) RunningAppStacks() []string {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
return append([]string(nil), f.running...)
|
||||
}
|
||||
func (f *fakeStacks) StopStack(name string) error {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.stopped = append(f.stopped, name)
|
||||
if f.stopErr != nil {
|
||||
return f.stopErr[name]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (f *fakeStacks) StartStack(name string) error {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.started = append(f.started, name)
|
||||
return nil
|
||||
}
|
||||
func (f *fakeStacks) startedNames() []string {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
return append([]string(nil), f.started...)
|
||||
}
|
||||
func (f *fakeStacks) stoppedNames() []string {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
return append([]string(nil), f.stopped...)
|
||||
}
|
||||
|
||||
// fakeBackend drives the agent-side responses.
|
||||
type fakeBackend struct {
|
||||
due bool
|
||||
dueErr error
|
||||
startErr error
|
||||
jobID string
|
||||
phases []string // returned in sequence by BackupStatus; last value repeats
|
||||
statusErr error
|
||||
startCalls int
|
||||
statusCalls int
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func (b *fakeBackend) Due(context.Context) (bool, error) { return b.due, b.dueErr }
|
||||
func (b *fakeBackend) StartBackup(context.Context) (string, error) {
|
||||
b.mu.Lock()
|
||||
b.startCalls++
|
||||
b.mu.Unlock()
|
||||
if b.startErr != nil {
|
||||
return "", b.startErr
|
||||
}
|
||||
if b.jobID == "" {
|
||||
b.jobID = "job-1"
|
||||
}
|
||||
return b.jobID, nil
|
||||
}
|
||||
func (b *fakeBackend) BackupStatus(context.Context) (string, error) {
|
||||
if b.statusErr != nil {
|
||||
return "", b.statusErr
|
||||
}
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
i := b.statusCalls
|
||||
b.statusCalls++
|
||||
if i >= len(b.phases) {
|
||||
if len(b.phases) == 0 {
|
||||
return PhaseRunning, nil
|
||||
}
|
||||
return b.phases[len(b.phases)-1], nil
|
||||
}
|
||||
return b.phases[i], nil
|
||||
}
|
||||
|
||||
const PhaseRunning = "running" // local alias for readability in tests
|
||||
|
||||
func testLoop(t *testing.T, be Backend, st Stacks) *Loop {
|
||||
t.Helper()
|
||||
l := New(Options{
|
||||
Backend: be,
|
||||
Stacks: st,
|
||||
MarkerPath: filepath.Join(t.TempDir(), "quiesce-state.json"),
|
||||
Poll: time.Hour,
|
||||
StatusPoll: time.Millisecond,
|
||||
MaxQuiesce: 5 * time.Second,
|
||||
Logger: log.New(io.Discard, "", 0),
|
||||
})
|
||||
return l
|
||||
}
|
||||
|
||||
// Happy path: due → stop running stacks → start backup → poll to done → restart exactly those → clear marker.
|
||||
func TestRunOnce_HappyPath(t *testing.T) {
|
||||
be := &fakeBackend{due: true, phases: []string{"running", "running", "done"}}
|
||||
st := &fakeStacks{running: []string{"nextcloud", "vaultwarden"}}
|
||||
l := testLoop(t, be, st)
|
||||
|
||||
if err := l.runOnce(context.Background()); err != nil {
|
||||
t.Fatalf("runOnce: %v", err)
|
||||
}
|
||||
if got := st.stoppedNames(); len(got) != 2 || got[0] != "nextcloud" || got[1] != "vaultwarden" {
|
||||
t.Fatalf("stopped wrong/order: %v", got)
|
||||
}
|
||||
if got := st.startedNames(); len(got) != 2 || got[0] != "nextcloud" || got[1] != "vaultwarden" {
|
||||
t.Fatalf("started wrong/order: %v", got)
|
||||
}
|
||||
if be.startCalls != 1 {
|
||||
t.Fatalf("expected 1 backup, got %d", be.startCalls)
|
||||
}
|
||||
if _, ok := l.readMarker(); ok {
|
||||
t.Fatal("marker not cleared after a successful cycle")
|
||||
}
|
||||
}
|
||||
|
||||
// Not due → nothing happens.
|
||||
func TestRunOnce_NotDue(t *testing.T) {
|
||||
be := &fakeBackend{due: false}
|
||||
st := &fakeStacks{running: []string{"a"}}
|
||||
l := testLoop(t, be, st)
|
||||
if err := l.runOnce(context.Background()); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(st.stoppedNames()) != 0 || be.startCalls != 0 {
|
||||
t.Fatal("acted while not due")
|
||||
}
|
||||
}
|
||||
|
||||
// Backup START fails → stacks STILL restarted (guaranteed unquiesce).
|
||||
func TestRunOnce_BackupStartFails_StillRestarts(t *testing.T) {
|
||||
be := &fakeBackend{due: true, startErr: errString("boom")}
|
||||
st := &fakeStacks{running: []string{"a", "b"}}
|
||||
l := testLoop(t, be, st)
|
||||
|
||||
_ = l.runOnce(context.Background())
|
||||
if got := st.startedNames(); len(got) != 2 {
|
||||
t.Fatalf("stacks not restarted after a backup-start failure: %v", got)
|
||||
}
|
||||
if _, ok := l.readMarker(); ok {
|
||||
t.Fatal("marker not cleared after a failed backup")
|
||||
}
|
||||
}
|
||||
|
||||
// Backup reports failed → stacks restarted.
|
||||
func TestRunOnce_BackupFailedPhase_Restarts(t *testing.T) {
|
||||
be := &fakeBackend{due: true, phases: []string{"running", "failed"}}
|
||||
st := &fakeStacks{running: []string{"a"}}
|
||||
l := testLoop(t, be, st)
|
||||
_ = l.runOnce(context.Background())
|
||||
if got := st.startedNames(); len(got) != 1 || got[0] != "a" {
|
||||
t.Fatalf("not restarted after failed phase: %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// Max-quiesce guard: status never reaches done → stacks restarted at the bound.
|
||||
func TestRunOnce_MaxQuiesceGuard(t *testing.T) {
|
||||
be := &fakeBackend{due: true, phases: []string{"running"}} // never done
|
||||
st := &fakeStacks{running: []string{"a", "b"}}
|
||||
l := testLoop(t, be, st)
|
||||
// shrink the bound + use a controllable clock so the guard fires fast
|
||||
base := time.Now()
|
||||
steps := 0
|
||||
l.now = func() time.Time {
|
||||
steps++
|
||||
return base.Add(time.Duration(steps) * time.Minute) // each call advances 1m
|
||||
}
|
||||
l.maxQuiesce = 2 * time.Minute
|
||||
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- l.runOnce(context.Background()) }()
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
t.Fatalf("runOnce returned error: %v", err)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("runOnce did not return — max-quiesce guard did not fire")
|
||||
}
|
||||
if got := st.startedNames(); len(got) != 2 {
|
||||
t.Fatalf("stacks not restarted at the max-quiesce bound: %v", got)
|
||||
}
|
||||
if _, ok := l.readMarker(); ok {
|
||||
t.Fatal("marker not cleared after the guard fired")
|
||||
}
|
||||
}
|
||||
|
||||
// Crash recovery: a marker present at startup → recorded stacks restarted, marker cleared.
|
||||
func TestRecover_RestartsFromMarker(t *testing.T) {
|
||||
st := &fakeStacks{}
|
||||
l := testLoop(t, &fakeBackend{}, st)
|
||||
// simulate a crash mid-quiesce: an active marker with stopped stacks
|
||||
if err := l.writeMarker(Marker{Active: true, StoppedStacks: []string{"nextcloud", "immich"}, JobID: "job-x"}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
l.Recover()
|
||||
if got := st.startedNames(); len(got) != 2 || got[0] != "nextcloud" || got[1] != "immich" {
|
||||
t.Fatalf("recovery did not restart the recorded stacks: %v", got)
|
||||
}
|
||||
if _, ok := l.readMarker(); ok {
|
||||
t.Fatal("recovery did not clear the marker")
|
||||
}
|
||||
}
|
||||
|
||||
// Recover with no marker is a no-op.
|
||||
func TestRecover_NoMarker(t *testing.T) {
|
||||
st := &fakeStacks{}
|
||||
l := testLoop(t, &fakeBackend{}, st)
|
||||
l.Recover()
|
||||
if len(st.startedNames()) != 0 {
|
||||
t.Fatal("recovery restarted stacks with no marker present")
|
||||
}
|
||||
}
|
||||
|
||||
// Single-flight: a cycle that begins with an active marker present is a no-op (no second backup).
|
||||
func TestRunOnce_SingleFlight(t *testing.T) {
|
||||
be := &fakeBackend{due: true, phases: []string{"done"}}
|
||||
st := &fakeStacks{running: []string{"a"}}
|
||||
l := testLoop(t, be, st)
|
||||
if err := l.writeMarker(Marker{Active: true, StoppedStacks: []string{"a"}}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := l.runOnce(context.Background()); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if be.startCalls != 0 {
|
||||
t.Fatal("started a backup while a marker was already active")
|
||||
}
|
||||
}
|
||||
|
||||
// Only the stacks we stopped are restarted: an already-stopped stack is not in RunningAppStacks,
|
||||
// so unquiesce never starts it.
|
||||
func TestRunOnce_OnlyRestartsWhatWeStopped(t *testing.T) {
|
||||
// "db" was already stopped before quiesce → not in running → not restarted.
|
||||
be := &fakeBackend{due: true, phases: []string{"done"}}
|
||||
st := &fakeStacks{running: []string{"web"}} // only web is up
|
||||
l := testLoop(t, be, st)
|
||||
if err := l.runOnce(context.Background()); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, s := range st.startedNames() {
|
||||
if s == "db" {
|
||||
t.Fatal("restarted a stack that was already stopped before quiesce")
|
||||
}
|
||||
}
|
||||
if got := st.startedNames(); len(got) != 1 || got[0] != "web" {
|
||||
t.Fatalf("expected only web restarted, got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// Marker is written BEFORE stacks are stopped (crash-safety ordering): if stop is observed, the
|
||||
// marker must already exist on disk.
|
||||
func TestRunOnce_MarkerWrittenBeforeStop(t *testing.T) {
|
||||
st := &fakeStacks{running: []string{"a"}}
|
||||
l := testLoop(t, &fakeBackend{due: true, phases: []string{"done"}}, st)
|
||||
// Wrap StopStack via a stacks decorator that checks the marker file exists at stop time.
|
||||
markerSeen := false
|
||||
dec := &stopObserver{inner: st, onStop: func() {
|
||||
if _, err := os.Stat(l.markerPath); err == nil {
|
||||
markerSeen = true
|
||||
}
|
||||
}}
|
||||
l.stacks = dec
|
||||
if err := l.runOnce(context.Background()); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !markerSeen {
|
||||
t.Fatal("marker was not on disk when the first stack was stopped (crash-safety ordering violated)")
|
||||
}
|
||||
}
|
||||
|
||||
type stopObserver struct {
|
||||
inner Stacks
|
||||
onStop func()
|
||||
}
|
||||
|
||||
func (s *stopObserver) RunningAppStacks() []string { return s.inner.RunningAppStacks() }
|
||||
func (s *stopObserver) StopStack(n string) error { s.onStop(); return s.inner.StopStack(n) }
|
||||
func (s *stopObserver) StartStack(n string) error { return s.inner.StartStack(n) }
|
||||
|
||||
type errString string
|
||||
|
||||
func (e errString) Error() string { return string(e) }
|
||||
Reference in New Issue
Block a user