v0.4.0-rc1: slice 4 Phase A — reconcile engine (structural, runs live unfed)
New internal/reconcile package: the agent-side control core's structural half. - Per-guest serializer Queue (doc 03 §10): the single choke point all mutation sources funnel through; same-vmid serial in submit order, different vmids parallel (cond-var FIFO lanes). - Desired-state model + DesiredProvider seam; EmptyProvider is the only live source at slice 4 (no hub serving until slice 10) so the live engine computes an empty action set and performs zero mutations. - Normalization layer (FieldNormalizers): normalized desired-vs-actual so Proxmox round-trip quirks don't read as drift. normDesc promoted out of main.go to reconcile.NormDescription; selftest uses the shared helper. - Plan (pure diff): minimal benign action set (Start/Stop/SetConfig) for guests in both desired and actual; provision/destroy out of scope here. - Engine: dispatches onto the shared queue; honors the dual-mode SetConfig contract (UPID -> WaitTask; empty UPID -> synchronous success). - Durable op journal + idempotency store (mirrors authz.FileNonceStore): in-flight task ids for crash detection + AlreadyApplied dedupe across restart. - Wired into runDaemon alongside the hub loop, sharing the queue; runs cleanly with no desired state and no signers. Full module race-clean and vet-clean on the Linux build server. CHECKPOINT: Phase A only. Awaiting validation before Phase B (the reversibility gate + signed-op consuming layer, landing v0.4.0). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,154 @@
|
||||
package reconcile
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func appendRaw(t *testing.T, path, line string) {
|
||||
t.Helper()
|
||||
f, err := os.OpenFile(path, os.O_WRONLY|os.O_APPEND, 0o600)
|
||||
if err != nil {
|
||||
t.Fatalf("open for raw append: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err := f.WriteString(line + "\n"); err != nil {
|
||||
t.Fatalf("raw append: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func reopen(t *testing.T, j *Journal, path string) *Journal {
|
||||
t.Helper()
|
||||
if err := j.Close(); err != nil {
|
||||
t.Fatalf("close: %v", err)
|
||||
}
|
||||
nj, err := OpenJournal(path)
|
||||
if err != nil {
|
||||
t.Fatalf("reopen: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { nj.Close() })
|
||||
return nj
|
||||
}
|
||||
|
||||
func TestJournal_LifecycleLatestWins(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "journal.log")
|
||||
j, err := OpenJournal(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { j.Close() })
|
||||
|
||||
now := time.Now().UTC()
|
||||
for _, e := range []JournalEntry{
|
||||
{OpID: "op1", VMID: 100, Kind: "start", State: OpStarted, At: now},
|
||||
{OpID: "op1", VMID: 100, Kind: "start", UPID: "UPID:x:", State: OpTaskRunning, At: now},
|
||||
{OpID: "op1", VMID: 100, Kind: "start", UPID: "UPID:x:", State: OpSucceeded, At: now},
|
||||
} {
|
||||
if err := j.Append(e); err != nil {
|
||||
t.Fatalf("append: %v", err)
|
||||
}
|
||||
}
|
||||
got, ok := j.Latest("op1")
|
||||
if !ok || got.State != OpSucceeded {
|
||||
t.Fatalf("Latest(op1) = %+v ok=%v, want succeeded", got, ok)
|
||||
}
|
||||
if len(j.InFlight()) != 0 {
|
||||
t.Errorf("a succeeded op must not be in-flight: %+v", j.InFlight())
|
||||
}
|
||||
}
|
||||
|
||||
func TestJournal_InFlightSurvivesRestart(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "journal.log")
|
||||
j, err := OpenJournal(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
// op started + got a task id, but NO terminal record — simulates a crash mid-op.
|
||||
mustAppend(t, j, JournalEntry{OpID: "op9", VMID: 100, Kind: "set_config", UPID: "UPID:crash:", State: OpTaskRunning, At: now})
|
||||
|
||||
j2 := reopen(t, j, path)
|
||||
inflight := j2.InFlight()
|
||||
if len(inflight) != 1 || inflight[0].OpID != "op9" || inflight[0].UPID != "UPID:crash:" {
|
||||
t.Fatalf("crash-mid-op should replay as in-flight with its task id, got %+v", inflight)
|
||||
}
|
||||
}
|
||||
|
||||
func TestJournal_IdempotencyDedupeAcrossRestart(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "journal.log")
|
||||
j, err := OpenJournal(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
|
||||
const key = "job-abc-123"
|
||||
if j.AlreadyApplied(key) {
|
||||
t.Fatal("key should not be applied before any record")
|
||||
}
|
||||
// A one-shot op succeeds carrying an idempotency key.
|
||||
mustAppend(t, j, JournalEntry{OpID: "op1", VMID: 100, Kind: "restore", IdempKey: key, State: OpStarted, At: now})
|
||||
mustAppend(t, j, JournalEntry{OpID: "op1", VMID: 100, Kind: "restore", IdempKey: key, State: OpSucceeded, At: now})
|
||||
if !j.AlreadyApplied(key) {
|
||||
t.Fatal("key should be applied after success")
|
||||
}
|
||||
|
||||
// Survives a restart (replayed from the log) — a redelivered job must not re-run.
|
||||
j2 := reopen(t, j, path)
|
||||
if !j2.AlreadyApplied(key) {
|
||||
t.Error("idempotency key must survive an agent restart")
|
||||
}
|
||||
// Empty key is never 'applied'.
|
||||
if j2.AlreadyApplied("") {
|
||||
t.Error("empty idempotency key must never be considered applied")
|
||||
}
|
||||
}
|
||||
|
||||
func TestJournal_FailedKeyNotApplied(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "journal.log")
|
||||
j, err := OpenJournal(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { j.Close() })
|
||||
now := time.Now().UTC()
|
||||
const key = "job-fail"
|
||||
mustAppend(t, j, JournalEntry{OpID: "opF", VMID: 1, Kind: "restore", IdempKey: key, State: OpStarted, At: now})
|
||||
mustAppend(t, j, JournalEntry{OpID: "opF", VMID: 1, Kind: "restore", IdempKey: key, State: OpFailed, At: now})
|
||||
if j.AlreadyApplied(key) {
|
||||
t.Error("a FAILED one-shot op must not mark its key applied (it may be retried)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestJournal_SkipsTornTrailingLine(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "journal.log")
|
||||
j, err := OpenJournal(path)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
mustAppend(t, j, JournalEntry{OpID: "ok", VMID: 1, Kind: "start", State: OpSucceeded, At: time.Now().UTC()})
|
||||
j.Close()
|
||||
// Append a torn (partial) JSON line as a crash would leave.
|
||||
appendRaw(t, path, `{"op_id":"torn","state":`)
|
||||
|
||||
j2, err := OpenJournal(path)
|
||||
if err != nil {
|
||||
t.Fatalf("reopen with torn line: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { j2.Close() })
|
||||
if _, ok := j2.Latest("ok"); !ok {
|
||||
t.Error("the good record before the torn line must still load")
|
||||
}
|
||||
if _, ok := j2.Latest("torn"); ok {
|
||||
t.Error("the torn line must be skipped")
|
||||
}
|
||||
}
|
||||
|
||||
func mustAppend(t *testing.T, j *Journal, e JournalEntry) {
|
||||
t.Helper()
|
||||
if err := j.Append(e); err != nil {
|
||||
t.Fatalf("append: %v", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user