05c450147c
New internal/reconcile package: the agent-side control core's structural half. - Per-guest serializer Queue (doc 03 §10): the single choke point all mutation sources funnel through; same-vmid serial in submit order, different vmids parallel (cond-var FIFO lanes). - Desired-state model + DesiredProvider seam; EmptyProvider is the only live source at slice 4 (no hub serving until slice 10) so the live engine computes an empty action set and performs zero mutations. - Normalization layer (FieldNormalizers): normalized desired-vs-actual so Proxmox round-trip quirks don't read as drift. normDesc promoted out of main.go to reconcile.NormDescription; selftest uses the shared helper. - Plan (pure diff): minimal benign action set (Start/Stop/SetConfig) for guests in both desired and actual; provision/destroy out of scope here. - Engine: dispatches onto the shared queue; honors the dual-mode SetConfig contract (UPID -> WaitTask; empty UPID -> synchronous success). - Durable op journal + idempotency store (mirrors authz.FileNonceStore): in-flight task ids for crash detection + AlreadyApplied dedupe across restart. - Wired into runDaemon alongside the hub loop, sharing the queue; runs cleanly with no desired state and no signers. Full module race-clean and vet-clean on the Linux build server. CHECKPOINT: Phase A only. Awaiting validation before Phase B (the reversibility gate + signed-op consuming layer, landing v0.4.0). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
213 lines
6.4 KiB
Go
213 lines
6.4 KiB
Go
package reconcile
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"path/filepath"
|
|
"sync"
|
|
"testing"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-agent/internal/hub"
|
|
"gitea.dooplex.hu/admin/felhom-agent/internal/proxmox"
|
|
)
|
|
|
|
// fakeAPI is a configurable GuestAPI for engine tests: it records mutating calls and
|
|
// returns canned UPIDs (""=synchronous, non-empty=async) and WaitTask verdicts.
|
|
type fakeAPI struct {
|
|
mu sync.Mutex
|
|
lxc []proxmox.Guest
|
|
cfg map[int]proxmox.GuestConfig
|
|
|
|
startUPID, stopUPID, setUPID string
|
|
startErr, stopErr, setErr error
|
|
// waitFunc maps a UPID to a (status, err); default = OK. Mirrors the real client,
|
|
// which errors on a non-OK exitstatus.
|
|
waitFunc func(upid string) (proxmox.TaskStatus, error)
|
|
|
|
starts []int
|
|
stops []int
|
|
sets []setCall
|
|
waits []string
|
|
listErr error
|
|
}
|
|
|
|
type setCall struct {
|
|
vmid int
|
|
params map[string]string
|
|
}
|
|
|
|
func (f *fakeAPI) ListLXC(context.Context) ([]proxmox.Guest, error) {
|
|
if f.listErr != nil {
|
|
return nil, f.listErr
|
|
}
|
|
return f.lxc, nil
|
|
}
|
|
|
|
func (f *fakeAPI) GuestConfig(_ context.Context, vmid int) (proxmox.GuestConfig, error) {
|
|
c, ok := f.cfg[vmid]
|
|
if !ok {
|
|
return proxmox.GuestConfig{}, errors.New("no config")
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
func (f *fakeAPI) Start(_ context.Context, vmid int) (string, error) {
|
|
f.mu.Lock()
|
|
f.starts = append(f.starts, vmid)
|
|
f.mu.Unlock()
|
|
return f.startUPID, f.startErr
|
|
}
|
|
|
|
func (f *fakeAPI) Stop(_ context.Context, vmid int) (string, error) {
|
|
f.mu.Lock()
|
|
f.stops = append(f.stops, vmid)
|
|
f.mu.Unlock()
|
|
return f.stopUPID, f.stopErr
|
|
}
|
|
|
|
func (f *fakeAPI) SetConfig(_ context.Context, vmid int, params map[string]string) (string, error) {
|
|
f.mu.Lock()
|
|
f.sets = append(f.sets, setCall{vmid, params})
|
|
f.mu.Unlock()
|
|
return f.setUPID, f.setErr
|
|
}
|
|
|
|
func (f *fakeAPI) WaitTask(_ context.Context, upid string, _ proxmox.WaitOptions) (proxmox.TaskStatus, error) {
|
|
f.mu.Lock()
|
|
f.waits = append(f.waits, upid)
|
|
f.mu.Unlock()
|
|
if f.waitFunc != nil {
|
|
return f.waitFunc(upid)
|
|
}
|
|
return proxmox.TaskStatus{Status: "stopped", ExitStatus: "OK"}, nil
|
|
}
|
|
|
|
func newEngine(t *testing.T, api GuestAPI, provider DesiredProvider) (*Engine, *Journal, *Queue) {
|
|
t.Helper()
|
|
jp := filepath.Join(t.TempDir(), "journal.log")
|
|
j, err := OpenJournal(jp)
|
|
if err != nil {
|
|
t.Fatalf("OpenJournal: %v", err)
|
|
}
|
|
t.Cleanup(func() { j.Close() })
|
|
q := NewQueue()
|
|
t.Cleanup(q.Close)
|
|
e := NewEngine(EngineOptions{API: api, Queue: q, Journal: j, Provider: provider})
|
|
return e, j, q
|
|
}
|
|
|
|
func TestEngine_EmptyProviderNoMutations(t *testing.T) {
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "running"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
}
|
|
e, _, _ := newEngine(t, api, EmptyProvider{})
|
|
res, err := e.Reconcile(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("Reconcile: %v", err)
|
|
}
|
|
if res.Planned != 0 || res.Executed != 0 {
|
|
t.Errorf("EmptyProvider should plan nothing, got %+v", res)
|
|
}
|
|
if len(api.starts)+len(api.stops)+len(api.sets) != 0 {
|
|
t.Errorf("EmptyProvider mutated Proxmox: starts=%v stops=%v sets=%v", api.starts, api.stops, api.sets)
|
|
}
|
|
}
|
|
|
|
func TestEngine_AsyncStartWaitsTask(t *testing.T) {
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "stopped"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
startUPID: "UPID:demo:start:100:",
|
|
}
|
|
e, j, _ := newEngine(t, api, StaticProvider{State: desired(DesiredGuest{VMID: 100, Run: RunRunning})})
|
|
res, err := e.Reconcile(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("Reconcile: %v", err)
|
|
}
|
|
if res.Executed != 1 || res.Failed != 0 {
|
|
t.Fatalf("want 1 executed, got %+v", res)
|
|
}
|
|
if len(api.starts) != 1 || api.starts[0] != 100 {
|
|
t.Errorf("expected Start(100), got %v", api.starts)
|
|
}
|
|
if len(api.waits) != 1 {
|
|
t.Errorf("async op must WaitTask, got waits=%v", api.waits)
|
|
}
|
|
if len(j.InFlight()) != 0 {
|
|
t.Errorf("no ops should be in-flight after success: %+v", j.InFlight())
|
|
}
|
|
}
|
|
|
|
func TestEngine_SynchronousSetConfigNoWait(t *testing.T) {
|
|
// Empty UPID = PVE applied synchronously (slice-4 proven for description). Must be
|
|
// treated as success WITHOUT a WaitTask call.
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "stopped"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
setUPID: "", // synchronous
|
|
}
|
|
e, _, _ := newEngine(t, api, StaticProvider{State: desired(
|
|
DesiredGuest{VMID: 100, Spec: &hub.GuestSpec{Cores: 4, MemoryBytes: mib(2048)}})})
|
|
res, err := e.Reconcile(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("Reconcile: %v", err)
|
|
}
|
|
if res.Executed != 1 {
|
|
t.Fatalf("want 1 executed, got %+v", res)
|
|
}
|
|
if len(api.sets) != 1 || api.sets[0].params["cores"] != "4" {
|
|
t.Errorf("expected SetConfig cores=4, got %v", api.sets)
|
|
}
|
|
if len(api.waits) != 0 {
|
|
t.Errorf("synchronous op must NOT WaitTask, got waits=%v", api.waits)
|
|
}
|
|
}
|
|
|
|
func TestEngine_WaitTaskFailureCountsFailed(t *testing.T) {
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "stopped"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
startUPID: "UPID:demo:start:100:",
|
|
waitFunc: func(string) (proxmox.TaskStatus, error) {
|
|
return proxmox.TaskStatus{Status: "stopped", ExitStatus: "got 403"}, errors.New("task failed: got 403")
|
|
},
|
|
}
|
|
e, j, _ := newEngine(t, api, StaticProvider{State: desired(DesiredGuest{VMID: 100, Run: RunRunning})})
|
|
res, err := e.Reconcile(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("Reconcile (pass): %v", err)
|
|
}
|
|
if res.Failed != 1 || res.Executed != 0 {
|
|
t.Fatalf("want 1 failed, got %+v", res)
|
|
}
|
|
// The failed op is journaled terminal (failed), not left in-flight.
|
|
if len(j.InFlight()) != 0 {
|
|
t.Errorf("failed op should be terminal, in-flight=%+v", j.InFlight())
|
|
}
|
|
}
|
|
|
|
func TestEngine_PostErrorCountsFailed(t *testing.T) {
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "stopped"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
startErr: errors.New("connection refused"),
|
|
}
|
|
e, _, _ := newEngine(t, api, StaticProvider{State: desired(DesiredGuest{VMID: 100, Run: RunRunning})})
|
|
res, _ := e.Reconcile(context.Background())
|
|
if res.Failed != 1 {
|
|
t.Fatalf("want 1 failed on POST error, got %+v", res)
|
|
}
|
|
if len(api.waits) != 0 {
|
|
t.Errorf("POST error must not reach WaitTask, got %v", api.waits)
|
|
}
|
|
}
|
|
|
|
func TestEngine_ListErrorIsPassFailure(t *testing.T) {
|
|
api := &fakeAPI{listErr: errors.New("api down")}
|
|
e, _, _ := newEngine(t, api, StaticProvider{State: desired(DesiredGuest{VMID: 100, Run: RunRunning})})
|
|
if _, err := e.Reconcile(context.Background()); err == nil {
|
|
t.Error("expected a pass-level error when actual state can't be read")
|
|
}
|
|
}
|