1af21a6cac
The security core of slice 4: hub-supplied intent is no longer trusted for destructive change. The gate fronts the per-guest queue's executor, so every mutation passes it. Reuses internal/authz for all crypto (surface untouched). - Classifier (doc 03 §4): benign vs destructive by provenance + data-bearing- ness, NOT by verb. Destroy/overwrite of customer data is destructive unless agent-internal provenance (same-journaled-txn create, or agent-tagged scratch) makes it benign — and that provenance is journal-recorded, NEVER hub-sourced. Unknown op class fails safe to destructive. - Reversibility gate: benign -> allowed unsigned; destructive -> requires a verified, role-scoped, action-bound operator signature, else pending_signature and never executed. Every decision audited (signal, never the guard). - Signed-op consuming layer over authz.Verifier.Verify (locked pipeline untouched): role-scoping (doc 04 §4 — recovery=rotation only, operational= ordinary destructive + planned rotation) + op-to-action binding (op+host+ guest+params must match the gated action). - Signed-job orchestration: idempotency dedupe by nonce + journal-wrapped execution via an injected DestructiveExecutor (nil this slice — inert). - Crash recovery (Note 1): Engine.Recover consumes the journal InFlight() set at startup (resume-or-rollback) — covers an op that crashed after the POST and before its terminal record, which idempotency dedupe alone cannot. Added TaskStatusOnce to the GuestAPI seam. Wired into daemon startup. - Note 2: memory comparison canonicalized to MiB (desiredMemoryMiB) so a non-MiB-aligned MemoryBytes converges in one pass, not perpetual drift. - Daemon: builds the verifier from config signers (none = nil verifier, the common slice-4 state), the gate (+SlogAudit), runs Recover before mutating. Adversarial matrix proven against the REAL authz.Verifier with in-test-minted SSHSIGs (framing replicated in reconcile's test binary; authz untouched, no signing added to the verify-only package): unsigned job + unsigned desired-state delta -> pending_signature; unknown signer/expired/replay-across-restart/wrong host -> typed authz rejections; wrong guest/op/params -> binding_mismatch; recovery key on ordinary destructive -> role_denied; hub-supplied scratch tag ignored -> refused; valid+role+target+fresh nonce -> accepted then replay rejected. Full module race-clean + vet-clean on the Linux build server. Inert this slice: no destructive deltas served until slice 10; the destructive path is classified, gated, and tested but not wired to live execution. CHECKPOINT: Phase B complete (slice 4 done). Awaiting validation. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
222 lines
6.8 KiB
Go
222 lines
6.8 KiB
Go
package reconcile
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"path/filepath"
|
|
"sync"
|
|
"testing"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-agent/internal/hub"
|
|
"gitea.dooplex.hu/admin/felhom-agent/internal/proxmox"
|
|
)
|
|
|
|
// fakeAPI is a configurable GuestAPI for engine tests: it records mutating calls and
|
|
// returns canned UPIDs (""=synchronous, non-empty=async) and WaitTask verdicts.
|
|
type fakeAPI struct {
|
|
mu sync.Mutex
|
|
lxc []proxmox.Guest
|
|
cfg map[int]proxmox.GuestConfig
|
|
|
|
startUPID, stopUPID, setUPID string
|
|
startErr, stopErr, setErr error
|
|
// waitFunc maps a UPID to a (status, err); default = OK. Mirrors the real client,
|
|
// which errors on a non-OK exitstatus.
|
|
waitFunc func(upid string) (proxmox.TaskStatus, error)
|
|
// statusFunc backs TaskStatusOnce (crash recovery); default = stopped/OK.
|
|
statusFunc func(upid string) (proxmox.TaskStatus, error)
|
|
|
|
starts []int
|
|
stops []int
|
|
sets []setCall
|
|
waits []string
|
|
listErr error
|
|
}
|
|
|
|
func (f *fakeAPI) TaskStatusOnce(_ context.Context, upid string) (proxmox.TaskStatus, error) {
|
|
if f.statusFunc != nil {
|
|
return f.statusFunc(upid)
|
|
}
|
|
return proxmox.TaskStatus{UPID: upid, Status: "stopped", ExitStatus: "OK"}, nil
|
|
}
|
|
|
|
type setCall struct {
|
|
vmid int
|
|
params map[string]string
|
|
}
|
|
|
|
func (f *fakeAPI) ListLXC(context.Context) ([]proxmox.Guest, error) {
|
|
if f.listErr != nil {
|
|
return nil, f.listErr
|
|
}
|
|
return f.lxc, nil
|
|
}
|
|
|
|
func (f *fakeAPI) GuestConfig(_ context.Context, vmid int) (proxmox.GuestConfig, error) {
|
|
c, ok := f.cfg[vmid]
|
|
if !ok {
|
|
return proxmox.GuestConfig{}, errors.New("no config")
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
func (f *fakeAPI) Start(_ context.Context, vmid int) (string, error) {
|
|
f.mu.Lock()
|
|
f.starts = append(f.starts, vmid)
|
|
f.mu.Unlock()
|
|
return f.startUPID, f.startErr
|
|
}
|
|
|
|
func (f *fakeAPI) Stop(_ context.Context, vmid int) (string, error) {
|
|
f.mu.Lock()
|
|
f.stops = append(f.stops, vmid)
|
|
f.mu.Unlock()
|
|
return f.stopUPID, f.stopErr
|
|
}
|
|
|
|
func (f *fakeAPI) SetConfig(_ context.Context, vmid int, params map[string]string) (string, error) {
|
|
f.mu.Lock()
|
|
f.sets = append(f.sets, setCall{vmid, params})
|
|
f.mu.Unlock()
|
|
return f.setUPID, f.setErr
|
|
}
|
|
|
|
func (f *fakeAPI) WaitTask(_ context.Context, upid string, _ proxmox.WaitOptions) (proxmox.TaskStatus, error) {
|
|
f.mu.Lock()
|
|
f.waits = append(f.waits, upid)
|
|
f.mu.Unlock()
|
|
if f.waitFunc != nil {
|
|
return f.waitFunc(upid)
|
|
}
|
|
return proxmox.TaskStatus{Status: "stopped", ExitStatus: "OK"}, nil
|
|
}
|
|
|
|
func newEngine(t *testing.T, api GuestAPI, provider DesiredProvider) (*Engine, *Journal, *Queue) {
|
|
t.Helper()
|
|
jp := filepath.Join(t.TempDir(), "journal.log")
|
|
j, err := OpenJournal(jp)
|
|
if err != nil {
|
|
t.Fatalf("OpenJournal: %v", err)
|
|
}
|
|
t.Cleanup(func() { j.Close() })
|
|
q := NewQueue()
|
|
t.Cleanup(q.Close)
|
|
e := NewEngine(EngineOptions{API: api, Queue: q, Journal: j, Provider: provider})
|
|
return e, j, q
|
|
}
|
|
|
|
func TestEngine_EmptyProviderNoMutations(t *testing.T) {
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "running"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
}
|
|
e, _, _ := newEngine(t, api, EmptyProvider{})
|
|
res, err := e.Reconcile(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("Reconcile: %v", err)
|
|
}
|
|
if res.Planned != 0 || res.Executed != 0 {
|
|
t.Errorf("EmptyProvider should plan nothing, got %+v", res)
|
|
}
|
|
if len(api.starts)+len(api.stops)+len(api.sets) != 0 {
|
|
t.Errorf("EmptyProvider mutated Proxmox: starts=%v stops=%v sets=%v", api.starts, api.stops, api.sets)
|
|
}
|
|
}
|
|
|
|
func TestEngine_AsyncStartWaitsTask(t *testing.T) {
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "stopped"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
startUPID: "UPID:demo:start:100:",
|
|
}
|
|
e, j, _ := newEngine(t, api, StaticProvider{State: desired(DesiredGuest{VMID: 100, Run: RunRunning})})
|
|
res, err := e.Reconcile(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("Reconcile: %v", err)
|
|
}
|
|
if res.Executed != 1 || res.Failed != 0 {
|
|
t.Fatalf("want 1 executed, got %+v", res)
|
|
}
|
|
if len(api.starts) != 1 || api.starts[0] != 100 {
|
|
t.Errorf("expected Start(100), got %v", api.starts)
|
|
}
|
|
if len(api.waits) != 1 {
|
|
t.Errorf("async op must WaitTask, got waits=%v", api.waits)
|
|
}
|
|
if len(j.InFlight()) != 0 {
|
|
t.Errorf("no ops should be in-flight after success: %+v", j.InFlight())
|
|
}
|
|
}
|
|
|
|
func TestEngine_SynchronousSetConfigNoWait(t *testing.T) {
|
|
// Empty UPID = PVE applied synchronously (slice-4 proven for description). Must be
|
|
// treated as success WITHOUT a WaitTask call.
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "stopped"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
setUPID: "", // synchronous
|
|
}
|
|
e, _, _ := newEngine(t, api, StaticProvider{State: desired(
|
|
DesiredGuest{VMID: 100, Spec: &hub.GuestSpec{Cores: 4, MemoryBytes: mib(2048)}})})
|
|
res, err := e.Reconcile(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("Reconcile: %v", err)
|
|
}
|
|
if res.Executed != 1 {
|
|
t.Fatalf("want 1 executed, got %+v", res)
|
|
}
|
|
if len(api.sets) != 1 || api.sets[0].params["cores"] != "4" {
|
|
t.Errorf("expected SetConfig cores=4, got %v", api.sets)
|
|
}
|
|
if len(api.waits) != 0 {
|
|
t.Errorf("synchronous op must NOT WaitTask, got waits=%v", api.waits)
|
|
}
|
|
}
|
|
|
|
func TestEngine_WaitTaskFailureCountsFailed(t *testing.T) {
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "stopped"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
startUPID: "UPID:demo:start:100:",
|
|
waitFunc: func(string) (proxmox.TaskStatus, error) {
|
|
return proxmox.TaskStatus{Status: "stopped", ExitStatus: "got 403"}, errors.New("task failed: got 403")
|
|
},
|
|
}
|
|
e, j, _ := newEngine(t, api, StaticProvider{State: desired(DesiredGuest{VMID: 100, Run: RunRunning})})
|
|
res, err := e.Reconcile(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("Reconcile (pass): %v", err)
|
|
}
|
|
if res.Failed != 1 || res.Executed != 0 {
|
|
t.Fatalf("want 1 failed, got %+v", res)
|
|
}
|
|
// The failed op is journaled terminal (failed), not left in-flight.
|
|
if len(j.InFlight()) != 0 {
|
|
t.Errorf("failed op should be terminal, in-flight=%+v", j.InFlight())
|
|
}
|
|
}
|
|
|
|
func TestEngine_PostErrorCountsFailed(t *testing.T) {
|
|
api := &fakeAPI{
|
|
lxc: []proxmox.Guest{{VMID: 100, Status: "stopped"}},
|
|
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
|
startErr: errors.New("connection refused"),
|
|
}
|
|
e, _, _ := newEngine(t, api, StaticProvider{State: desired(DesiredGuest{VMID: 100, Run: RunRunning})})
|
|
res, _ := e.Reconcile(context.Background())
|
|
if res.Failed != 1 {
|
|
t.Fatalf("want 1 failed on POST error, got %+v", res)
|
|
}
|
|
if len(api.waits) != 0 {
|
|
t.Errorf("POST error must not reach WaitTask, got %v", api.waits)
|
|
}
|
|
}
|
|
|
|
func TestEngine_ListErrorIsPassFailure(t *testing.T) {
|
|
api := &fakeAPI{listErr: errors.New("api down")}
|
|
e, _, _ := newEngine(t, api, StaticProvider{State: desired(DesiredGuest{VMID: 100, Run: RunRunning})})
|
|
if _, err := e.Reconcile(context.Background()); err == nil {
|
|
t.Error("expected a pass-level error when actual state can't be read")
|
|
}
|
|
}
|