Files
felhom-controller/controller/internal/quiesce/quiesce_8b2_test.go
T
admin e4b69ac9e5 slice 8B.2 (controller): resume app at snapshotted, keep tracking to done (v0.38.0)
Quiesce loop resumes (StartStack + clear marker) at the snapshotted phase
instead of done -> downtime whole-backup -> until-snapshot, no consistency loss.
Keeps polling to done/failed (no overlapping backup; post-snapshot failure
observed). Stop-mode fallback to done + crash-safety preserved.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 14:54:19 +02:00

136 lines
3.8 KiB
Go

package quiesce
import (
"context"
"io"
"log"
"path/filepath"
"sync"
"testing"
"time"
)
// eventStacks records an ordered event log (shared with eventBackend) so a test can assert that
// StartStack (resume) happened at the `snapshotted` poll, before `done`.
type eventStacks struct {
mu *sync.Mutex
events *[]string
running []string
}
func (s *eventStacks) RunningAppStacks() []string { return append([]string(nil), s.running...) }
func (s *eventStacks) StopStack(string) error { return nil }
func (s *eventStacks) StartStack(string) error {
s.mu.Lock()
*s.events = append(*s.events, "RESUME")
s.mu.Unlock()
return nil
}
type eventBackend struct {
mu *sync.Mutex
events *[]string
phases []string
i int
}
func (b *eventBackend) Due(context.Context) (bool, error) { return true, nil }
func (b *eventBackend) StartBackup(context.Context) (string, error) { return "job-1", nil }
func (b *eventBackend) BackupStatus(context.Context) (string, error) {
ph := b.phases[len(b.phases)-1]
if b.i < len(b.phases) {
ph = b.phases[b.i]
b.i++
}
b.mu.Lock()
*b.events = append(*b.events, ph)
b.mu.Unlock()
return ph, nil
}
func eventLoop(t *testing.T, phases []string) (*Loop, *[]string, *eventStacks) {
var mu sync.Mutex
events := &[]string{}
st := &eventStacks{mu: &mu, events: events, running: []string{"pgapp"}}
be := &eventBackend{mu: &mu, events: events, phases: phases}
l := New(Options{
Backend: be, Stacks: st,
MarkerPath: filepath.Join(t.TempDir(), "q.json"),
Poll: time.Hour, StatusPoll: time.Millisecond, MaxQuiesce: 5 * time.Second,
Logger: log.New(io.Discard, "", 0),
})
return l, events, st
}
// 8B.2: resume at `snapshotted` (RESUME before `done`), then keep tracking to `done`; marker cleared.
func TestRunOnce_ResumesAtSnapshotted(t *testing.T) {
l, events, _ := eventLoop(t, []string{"running", "snapshotted", "running", "done"})
if err := l.runOnce(context.Background()); err != nil {
t.Fatal(err)
}
// RESUME must appear and must come BEFORE the first "done".
resumeIdx, doneIdx := -1, -1
for i, e := range *events {
if e == "RESUME" && resumeIdx < 0 {
resumeIdx = i
}
if e == "done" && doneIdx < 0 {
doneIdx = i
}
}
if resumeIdx < 0 {
t.Fatalf("never resumed: %v", *events)
}
if doneIdx < 0 {
t.Fatalf("never tracked to done (must keep polling after early resume): %v", *events)
}
if resumeIdx > doneIdx {
t.Fatalf("resumed at/after done, not at snapshotted: %v", *events)
}
// the event right before RESUME should be a snapshotted poll
if (*events)[resumeIdx-1] != "snapshotted" {
t.Fatalf("resume not triggered by snapshotted: %v", *events)
}
if _, ok := l.readMarker(); ok {
t.Fatal("marker not cleared after resume")
}
}
// Fallback: stop mode (never snapshotted) → resume at `done` (8B behavior).
func TestRunOnce_FallbackResumeAtDone(t *testing.T) {
l, events, _ := eventLoop(t, []string{"running", "running", "done"})
if err := l.runOnce(context.Background()); err != nil {
t.Fatal(err)
}
// RESUME comes only at/after done (no snapshotted in the stream).
for _, e := range *events {
if e == "snapshotted" {
t.Fatal("snapshotted appeared in stop-mode stream")
}
}
last := (*events)[len(*events)-1]
if last != "RESUME" && last != "done" {
t.Fatalf("expected resume at done: %v", *events)
}
}
// A backup that FAILS after snapshotted: the app is already up (resumed once), the cycle ends.
func TestRunOnce_FailAfterSnapshotted_AppStaysUp(t *testing.T) {
l, events, _ := eventLoop(t, []string{"snapshotted", "failed"})
if err := l.runOnce(context.Background()); err != nil {
t.Fatal(err)
}
resumes := 0
for _, e := range *events {
if e == "RESUME" {
resumes++
}
}
if resumes != 1 {
t.Fatalf("expected exactly one resume (at snapshotted), got %d: %v", resumes, *events)
}
if _, ok := l.readMarker(); ok {
t.Fatal("marker not cleared")
}
}