v0.4.0: slice 4 Phase B — reversibility gate + signed-op consuming layer
The security core of slice 4: hub-supplied intent is no longer trusted for destructive change. The gate fronts the per-guest queue's executor, so every mutation passes it. Reuses internal/authz for all crypto (surface untouched). - Classifier (doc 03 §4): benign vs destructive by provenance + data-bearing- ness, NOT by verb. Destroy/overwrite of customer data is destructive unless agent-internal provenance (same-journaled-txn create, or agent-tagged scratch) makes it benign — and that provenance is journal-recorded, NEVER hub-sourced. Unknown op class fails safe to destructive. - Reversibility gate: benign -> allowed unsigned; destructive -> requires a verified, role-scoped, action-bound operator signature, else pending_signature and never executed. Every decision audited (signal, never the guard). - Signed-op consuming layer over authz.Verifier.Verify (locked pipeline untouched): role-scoping (doc 04 §4 — recovery=rotation only, operational= ordinary destructive + planned rotation) + op-to-action binding (op+host+ guest+params must match the gated action). - Signed-job orchestration: idempotency dedupe by nonce + journal-wrapped execution via an injected DestructiveExecutor (nil this slice — inert). - Crash recovery (Note 1): Engine.Recover consumes the journal InFlight() set at startup (resume-or-rollback) — covers an op that crashed after the POST and before its terminal record, which idempotency dedupe alone cannot. Added TaskStatusOnce to the GuestAPI seam. Wired into daemon startup. - Note 2: memory comparison canonicalized to MiB (desiredMemoryMiB) so a non-MiB-aligned MemoryBytes converges in one pass, not perpetual drift. - Daemon: builds the verifier from config signers (none = nil verifier, the common slice-4 state), the gate (+SlogAudit), runs Recover before mutating. Adversarial matrix proven against the REAL authz.Verifier with in-test-minted SSHSIGs (framing replicated in reconcile's test binary; authz untouched, no signing added to the verify-only package): unsigned job + unsigned desired-state delta -> pending_signature; unknown signer/expired/replay-across-restart/wrong host -> typed authz rejections; wrong guest/op/params -> binding_mismatch; recovery key on ordinary destructive -> role_denied; hub-supplied scratch tag ignored -> refused; valid+role+target+fresh nonce -> accepted then replay rejected. Full module race-clean + vet-clean on the Linux build server. Inert this slice: no destructive deltas served until slice 10; the destructive path is classified, gated, and tested but not wired to live execution. CHECKPOINT: Phase B complete (slice 4 done). Awaiting validation. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,299 @@
|
||||
package reconcile
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.dooplex.hu/admin/felhom-agent/internal/authz"
|
||||
)
|
||||
|
||||
const testHost = "demo-felhom"
|
||||
|
||||
// captureAudit records gate decisions so tests can assert audit is always written
|
||||
// (audit is a signal, never the guard).
|
||||
type captureAudit struct{ recs []AuditRecord }
|
||||
|
||||
func (c *captureAudit) Record(r AuditRecord) { c.recs = append(c.recs, r) }
|
||||
|
||||
// realVerifierAt builds a real authz.Verifier over a durable nonce store at path
|
||||
// (reused across "restart" by reopening the same path), pinning the given signers.
|
||||
func realVerifierAt(t *testing.T, path, hostID string, signers ...authz.AllowedSigner) (*authz.Verifier, *authz.FileNonceStore) {
|
||||
t.Helper()
|
||||
store, err := authz.OpenFileNonceStore(path)
|
||||
if err != nil {
|
||||
t.Fatalf("OpenFileNonceStore: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { store.Close() })
|
||||
return authz.New(signers, store, hostID), store
|
||||
}
|
||||
|
||||
// destroyIntent is the canonical destructive fixture: destroy guest 9001, params
|
||||
// {"purge":true} (mirrors the committed slice-2 op_blob.json shape).
|
||||
func destroyIntent(source SourceKind) Intent {
|
||||
return Intent{
|
||||
Class: ClassGuestDestroy,
|
||||
HostID: testHost,
|
||||
GuestID: "9001",
|
||||
VMID: 9001,
|
||||
ParamsJSON: json.RawMessage(`{"purge":true}`),
|
||||
Source: source,
|
||||
}
|
||||
}
|
||||
|
||||
func freshWindow() (issued, expires time.Time) {
|
||||
now := time.Now().UTC()
|
||||
return now.Add(-1 * time.Minute), now.Add(10 * time.Minute)
|
||||
}
|
||||
|
||||
// --- The adversarial matrix: each case must be INDEPENDENTLY rejected (or, the one
|
||||
// positive case, accepted). ---
|
||||
|
||||
func TestGate_DestructiveJobNoSignatureRefused(t *testing.T) {
|
||||
op := newTestSigner(t)
|
||||
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, op.allowed(t, "op1", authz.RoleOperational))
|
||||
aud := &captureAudit{}
|
||||
g := NewGate(v, testHost, aud, nil)
|
||||
|
||||
d := g.Authorize(destroyIntent(SourceOneShotJob), nil)
|
||||
if d.Allowed || d.Reason != ReasonPendingSignature {
|
||||
t.Fatalf("unsigned destructive job: got allowed=%v reason=%s, want pending_signature", d.Allowed, d.Reason)
|
||||
}
|
||||
if len(aud.recs) != 1 || aud.recs[0].Allowed {
|
||||
t.Errorf("decision must be audited as refused: %+v", aud.recs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_DestructiveDesiredDeltaNoSignatureRefused(t *testing.T) {
|
||||
// Proves the agent distrusts hub DESIRED STATE for destructive change, not just
|
||||
// jobs — same refusal, different source.
|
||||
op := newTestSigner(t)
|
||||
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, op.allowed(t, "op1", authz.RoleOperational))
|
||||
g := NewGate(v, testHost, nil, nil)
|
||||
|
||||
d := g.Authorize(destroyIntent(SourceDesiredDelta), nil)
|
||||
if d.Allowed || d.Reason != ReasonPendingSignature {
|
||||
t.Fatalf("unsigned destructive delta: got allowed=%v reason=%s, want pending_signature", d.Allowed, d.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_UnknownSignerRejected(t *testing.T) {
|
||||
pinned := newTestSigner(t)
|
||||
attacker := newTestSigner(t) // NOT pinned
|
||||
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, pinned.allowed(t, "op1", authz.RoleOperational))
|
||||
g := NewGate(v, testHost, nil, nil)
|
||||
|
||||
issued, expires := freshWindow()
|
||||
signed := attacker.mint("guest_destroy", testHost, "9001", "op1", nonce(), `{"purge":true}`, issued, expires)
|
||||
d := g.Authorize(destroyIntent(SourceOneShotJob), signed)
|
||||
if d.Allowed || d.Reason != ReasonRejected || !errors.Is(d.Err, authz.ErrUnknownSigner) {
|
||||
t.Fatalf("forged signer: got allowed=%v reason=%s err=%v, want rejected/ErrUnknownSigner", d.Allowed, d.Reason, d.Err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_ExpiredSignatureRejected(t *testing.T) {
|
||||
op := newTestSigner(t)
|
||||
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, op.allowed(t, "op1", authz.RoleOperational))
|
||||
g := NewGate(v, testHost, nil, nil)
|
||||
|
||||
past := time.Now().UTC().Add(-2 * time.Hour)
|
||||
signed := op.mint("guest_destroy", testHost, "9001", "op1", nonce(), `{"purge":true}`, past, past.Add(time.Minute))
|
||||
d := g.Authorize(destroyIntent(SourceOneShotJob), signed)
|
||||
if d.Allowed || !errors.Is(d.Err, authz.ErrExpired) {
|
||||
t.Fatalf("expired op: got allowed=%v err=%v, want ErrExpired", d.Allowed, d.Err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_WrongHostTargetRejected(t *testing.T) {
|
||||
op := newTestSigner(t)
|
||||
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, op.allowed(t, "op1", authz.RoleOperational))
|
||||
g := NewGate(v, testHost, nil, nil)
|
||||
|
||||
issued, expires := freshWindow()
|
||||
signed := op.mint("guest_destroy", "some-other-host", "9001", "op1", nonce(), `{"purge":true}`, issued, expires)
|
||||
d := g.Authorize(destroyIntent(SourceOneShotJob), signed)
|
||||
if d.Allowed || !errors.Is(d.Err, authz.ErrTarget) {
|
||||
t.Fatalf("wrong host: got allowed=%v err=%v, want ErrTarget", d.Allowed, d.Err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_WrongGuestBindingMismatch(t *testing.T) {
|
||||
// host matches (verifier passes) but the signature names a DIFFERENT guest than the
|
||||
// action — the op-to-action binding rejects it.
|
||||
op := newTestSigner(t)
|
||||
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, op.allowed(t, "op1", authz.RoleOperational))
|
||||
g := NewGate(v, testHost, nil, nil)
|
||||
|
||||
issued, expires := freshWindow()
|
||||
signed := op.mint("guest_destroy", testHost, "9002", "op1", nonce(), `{"purge":true}`, issued, expires)
|
||||
d := g.Authorize(destroyIntent(SourceOneShotJob), signed) // intent targets 9001
|
||||
if d.Allowed || d.Reason != ReasonBindingMismatch {
|
||||
t.Fatalf("guest mismatch: got allowed=%v reason=%s, want binding_mismatch", d.Allowed, d.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_WrongParamsBindingMismatch(t *testing.T) {
|
||||
op := newTestSigner(t)
|
||||
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, op.allowed(t, "op1", authz.RoleOperational))
|
||||
g := NewGate(v, testHost, nil, nil)
|
||||
|
||||
issued, expires := freshWindow()
|
||||
// signature authorizes purge=false; the action wants purge=true.
|
||||
signed := op.mint("guest_destroy", testHost, "9001", "op1", nonce(), `{"purge":false}`, issued, expires)
|
||||
d := g.Authorize(destroyIntent(SourceOneShotJob), signed)
|
||||
if d.Allowed || d.Reason != ReasonBindingMismatch {
|
||||
t.Fatalf("params mismatch: got allowed=%v reason=%s, want binding_mismatch", d.Allowed, d.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_WrongOpBindingMismatch(t *testing.T) {
|
||||
op := newTestSigner(t)
|
||||
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, op.allowed(t, "op1", authz.RoleOperational))
|
||||
g := NewGate(v, testHost, nil, nil)
|
||||
|
||||
issued, expires := freshWindow()
|
||||
// a valid signature for restore_overwrite cannot authorize a guest_destroy.
|
||||
signed := op.mint("restore_overwrite", testHost, "9001", "op1", nonce(), `{"purge":true}`, issued, expires)
|
||||
d := g.Authorize(destroyIntent(SourceOneShotJob), signed)
|
||||
if d.Allowed || d.Reason != ReasonBindingMismatch {
|
||||
t.Fatalf("op mismatch: got allowed=%v reason=%s, want binding_mismatch", d.Allowed, d.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_RecoveryKeyOnOrdinaryDestructiveRoleDenied(t *testing.T) {
|
||||
// A valid signature from the cold RECOVERY key on an ordinary destructive op is
|
||||
// refused by role-scoping (recovery authorizes ONLY key-rotation).
|
||||
rec := newTestSigner(t)
|
||||
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, rec.allowed(t, "rec1", authz.RoleRecovery))
|
||||
g := NewGate(v, testHost, nil, nil)
|
||||
|
||||
issued, expires := freshWindow()
|
||||
signed := rec.mint("guest_destroy", testHost, "9001", "rec1", nonce(), `{"purge":true}`, issued, expires)
|
||||
d := g.Authorize(destroyIntent(SourceOneShotJob), signed)
|
||||
if d.Allowed || d.Reason != ReasonRoleDenied {
|
||||
t.Fatalf("recovery on destroy: got allowed=%v reason=%s, want role_denied", d.Allowed, d.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_HubSuppliedScratchTagIgnored(t *testing.T) {
|
||||
// A compromised hub attaches a "scratch" hint to a data-bearing guest's destroy
|
||||
// delta to try to walk the gate unsigned. The intent built from a hub delta must
|
||||
// NOT carry that as agent-internal provenance — so it stays destructive and is
|
||||
// refused without a signature.
|
||||
intent := intentFromHubDelta(hubDelta{Class: ClassGuestDestroy, HostID: testHost, GuestID: "9001", VMID: 9001, HubSaysScratch: true})
|
||||
if intent.Provenance.AgentTaggedScratch || intent.Provenance.SameTxnCreated {
|
||||
t.Fatal("hub-supplied scratch must NOT become agent-internal provenance")
|
||||
}
|
||||
g := NewGate(nil, testHost, nil, nil) // no verifier even needed
|
||||
d := g.Authorize(intent, nil)
|
||||
if d.Allowed || d.Reason != ReasonPendingSignature {
|
||||
t.Fatalf("hub-scratch destroy: got allowed=%v reason=%s, want pending_signature", d.Allowed, d.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_ValidOpAcceptedThenReplayRejected(t *testing.T) {
|
||||
// The ONE positive case: valid signature, correct role, correct target, fresh
|
||||
// nonce → accepted. A SECOND presentation (same nonce) → rejected (nonce consumed).
|
||||
op := newTestSigner(t)
|
||||
path := filepath.Join(t.TempDir(), "n.log")
|
||||
v, _ := realVerifierAt(t, path, testHost, op.allowed(t, "op1", authz.RoleOperational))
|
||||
aud := &captureAudit{}
|
||||
g := NewGate(v, testHost, aud, nil)
|
||||
|
||||
issued, expires := freshWindow()
|
||||
n := nonce()
|
||||
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
|
||||
|
||||
d := g.Authorize(destroyIntent(SourceOneShotJob), signed)
|
||||
if !d.Allowed || d.Reason != ReasonSigned {
|
||||
t.Fatalf("valid op: got allowed=%v reason=%s err=%v, want accepted/signed", d.Allowed, d.Reason, d.Err)
|
||||
}
|
||||
if d.Verified == nil || d.Verified.Nonce != n {
|
||||
t.Fatalf("accepted op should surface the verified op with nonce %s", n)
|
||||
}
|
||||
// Replay the exact same signed op → nonce already consumed.
|
||||
d2 := g.Authorize(destroyIntent(SourceOneShotJob), signed)
|
||||
if d2.Allowed || !errors.Is(d2.Err, authz.ErrReplay) {
|
||||
t.Fatalf("replay: got allowed=%v err=%v, want ErrReplay", d2.Allowed, d2.Err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_ReplayAcrossRestartRejected(t *testing.T) {
|
||||
// Replay protection must survive an agent restart (the durable nonce store). Accept
|
||||
// once with verifier A, then reopen the SAME nonce-store path as verifier B (a
|
||||
// restart) and replay → still rejected.
|
||||
op := newTestSigner(t)
|
||||
path := filepath.Join(t.TempDir(), "n.log")
|
||||
signer := op.allowed(t, "op1", authz.RoleOperational)
|
||||
|
||||
issued, expires := freshWindow()
|
||||
n := nonce()
|
||||
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
|
||||
|
||||
vA, storeA := realVerifierAt(t, path, testHost, signer)
|
||||
if d := NewGate(vA, testHost, nil, nil).Authorize(destroyIntent(SourceOneShotJob), signed); !d.Allowed {
|
||||
t.Fatalf("first presentation should be accepted: %+v", d)
|
||||
}
|
||||
storeA.Close() // simulate shutdown
|
||||
|
||||
vB, _ := realVerifierAt(t, path, testHost, signer) // restart: reopen same nonce log
|
||||
d := NewGate(vB, testHost, nil, nil).Authorize(destroyIntent(SourceOneShotJob), signed)
|
||||
if d.Allowed || !errors.Is(d.Err, authz.ErrReplay) {
|
||||
t.Fatalf("replay across restart: got allowed=%v err=%v, want ErrReplay", d.Allowed, d.Err)
|
||||
}
|
||||
}
|
||||
|
||||
// --- gate unit tests (benign path, binding, params) ---
|
||||
|
||||
func TestGate_BenignAllowedWithoutVerifier(t *testing.T) {
|
||||
g := NewGate(nil, testHost, nil, nil) // no verifier at all
|
||||
for _, k := range []ActionKind{ActionStart, ActionStop, ActionSetConfig} {
|
||||
d := g.Authorize(intentForAction(testHost, Action{VMID: 100, Kind: k}), nil)
|
||||
if !d.Allowed || d.Reason != ReasonBenign {
|
||||
t.Errorf("benign %s: got allowed=%v reason=%s, want benign", k, d.Allowed, d.Reason)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParamsEqual(t *testing.T) {
|
||||
eq := func(a, b string) bool { return paramsEqual(json.RawMessage(a), json.RawMessage(b)) }
|
||||
if !eq(`{"purge":true}`, `{"purge":true}`) {
|
||||
t.Error("identical params should be equal")
|
||||
}
|
||||
if !eq(`{"a":1,"b":2}`, `{"b":2,"a":1}`) {
|
||||
t.Error("key order must not matter")
|
||||
}
|
||||
if eq(`{"purge":true}`, `{"purge":false}`) {
|
||||
t.Error("different values must differ")
|
||||
}
|
||||
if !eq(``, `{}`) || !eq(`{}`, `null`) {
|
||||
t.Error("absent / empty / null params should all compare equal")
|
||||
}
|
||||
}
|
||||
|
||||
// --- helpers for the hub-scratch test: a stand-in for the slice-10 desired-delta →
|
||||
// intent constructor, proving it never propagates hub-supplied provenance. ---
|
||||
|
||||
type hubDelta struct {
|
||||
Class OpClass
|
||||
HostID string
|
||||
GuestID string
|
||||
VMID int
|
||||
HubSaysScratch bool // a hostile/erroneous hub hint — MUST be ignored
|
||||
}
|
||||
|
||||
func intentFromHubDelta(d hubDelta) Intent {
|
||||
// NOTE: HubSaysScratch is deliberately NOT mapped to Provenance. Agent-internal
|
||||
// provenance (scratch/same-txn) is recorded by the agent's own journal, never taken
|
||||
// from the hub (doc 03 §4).
|
||||
return Intent{
|
||||
Class: d.Class,
|
||||
HostID: d.HostID,
|
||||
GuestID: d.GuestID,
|
||||
VMID: d.VMID,
|
||||
Provenance: Provenance{}, // always zero from an external source
|
||||
Source: SourceDesiredDelta,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user