1af21a6cac
The security core of slice 4: hub-supplied intent is no longer trusted for destructive change. The gate fronts the per-guest queue's executor, so every mutation passes it. Reuses internal/authz for all crypto (surface untouched). - Classifier (doc 03 §4): benign vs destructive by provenance + data-bearing- ness, NOT by verb. Destroy/overwrite of customer data is destructive unless agent-internal provenance (same-journaled-txn create, or agent-tagged scratch) makes it benign — and that provenance is journal-recorded, NEVER hub-sourced. Unknown op class fails safe to destructive. - Reversibility gate: benign -> allowed unsigned; destructive -> requires a verified, role-scoped, action-bound operator signature, else pending_signature and never executed. Every decision audited (signal, never the guard). - Signed-op consuming layer over authz.Verifier.Verify (locked pipeline untouched): role-scoping (doc 04 §4 — recovery=rotation only, operational= ordinary destructive + planned rotation) + op-to-action binding (op+host+ guest+params must match the gated action). - Signed-job orchestration: idempotency dedupe by nonce + journal-wrapped execution via an injected DestructiveExecutor (nil this slice — inert). - Crash recovery (Note 1): Engine.Recover consumes the journal InFlight() set at startup (resume-or-rollback) — covers an op that crashed after the POST and before its terminal record, which idempotency dedupe alone cannot. Added TaskStatusOnce to the GuestAPI seam. Wired into daemon startup. - Note 2: memory comparison canonicalized to MiB (desiredMemoryMiB) so a non-MiB-aligned MemoryBytes converges in one pass, not perpetual drift. - Daemon: builds the verifier from config signers (none = nil verifier, the common slice-4 state), the gate (+SlogAudit), runs Recover before mutating. Adversarial matrix proven against the REAL authz.Verifier with in-test-minted SSHSIGs (framing replicated in reconcile's test binary; authz untouched, no signing added to the verify-only package): unsigned job + unsigned desired-state delta -> pending_signature; unknown signer/expired/replay-across-restart/wrong host -> typed authz rejections; wrong guest/op/params -> binding_mismatch; recovery key on ordinary destructive -> role_denied; hub-supplied scratch tag ignored -> refused; valid+role+target+fresh nonce -> accepted then replay rejected. Full module race-clean + vet-clean on the Linux build server. Inert this slice: no destructive deltas served until slice 10; the destructive path is classified, gated, and tested but not wired to live execution. CHECKPOINT: Phase B complete (slice 4 done). Awaiting validation. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
136 lines
5.0 KiB
Go
136 lines
5.0 KiB
Go
package reconcile
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-agent/internal/authz"
|
|
)
|
|
|
|
// newSignedEngine builds an engine whose gate has a real verifier pinning one
|
|
// operational key — for exercising the signed-job consuming layer end to end.
|
|
func newSignedEngine(t *testing.T, api GuestAPI) (*Engine, *Journal, testSigner) {
|
|
t.Helper()
|
|
j, err := OpenJournal(filepath.Join(t.TempDir(), "journal.log"))
|
|
if err != nil {
|
|
t.Fatalf("OpenJournal: %v", err)
|
|
}
|
|
t.Cleanup(func() { j.Close() })
|
|
q := NewQueue()
|
|
t.Cleanup(q.Close)
|
|
op := newTestSigner(t)
|
|
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, op.allowed(t, "op1", authz.RoleOperational))
|
|
g := NewGate(v, testHost, nil, nil)
|
|
e := NewEngine(EngineOptions{API: api, Queue: q, Journal: j, Gate: g, HostID: testHost})
|
|
return e, j, op
|
|
}
|
|
|
|
func TestRunSignedJob_ValidExecutesAndMarksApplied(t *testing.T) {
|
|
e, j, op := newSignedEngine(t, &fakeAPI{})
|
|
issued, expires := freshWindow()
|
|
n := nonce()
|
|
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
|
|
|
|
calls := 0
|
|
exec := func(context.Context, Intent, *authz.VerifiedOp) (string, error) { calls++; return "", nil } // synchronous
|
|
|
|
res := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
|
|
if !res.Executed || res.Err != nil {
|
|
t.Fatalf("valid job should execute, got %+v", res)
|
|
}
|
|
if calls != 1 {
|
|
t.Errorf("executor should run once, ran %d", calls)
|
|
}
|
|
if !j.AlreadyApplied(n) {
|
|
t.Error("successful job must mark its idempotency key (nonce) applied")
|
|
}
|
|
}
|
|
|
|
func TestRunSignedJob_RedeliveryDedupedByIdempotencyKey(t *testing.T) {
|
|
// After success, a redelivered identical job must NOT re-run — the journal's
|
|
// idempotency key short-circuits BEFORE the verifier (so it reports already-applied,
|
|
// not a confusing replay rejection).
|
|
e, _, op := newSignedEngine(t, &fakeAPI{})
|
|
issued, expires := freshWindow()
|
|
n := nonce()
|
|
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
|
|
|
|
calls := 0
|
|
exec := func(context.Context, Intent, *authz.VerifiedOp) (string, error) { calls++; return "", nil }
|
|
|
|
first := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
|
|
if !first.Executed {
|
|
t.Fatalf("first delivery should execute: %+v", first)
|
|
}
|
|
second := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
|
|
if !second.AlreadyApplied || second.Executed {
|
|
t.Fatalf("redelivery should be deduped (already applied), got %+v", second)
|
|
}
|
|
if calls != 1 {
|
|
t.Errorf("executor must run exactly once across redelivery, ran %d", calls)
|
|
}
|
|
}
|
|
|
|
func TestRunSignedJob_RefusedDoesNotExecute(t *testing.T) {
|
|
e, j, _ := newSignedEngine(t, &fakeAPI{})
|
|
attacker := newTestSigner(t) // not pinned
|
|
issued, expires := freshWindow()
|
|
n := nonce()
|
|
signed := attacker.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
|
|
|
|
calls := 0
|
|
exec := func(context.Context, Intent, *authz.VerifiedOp) (string, error) { calls++; return "", nil }
|
|
|
|
res := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
|
|
if res.Executed || res.Decision.Allowed || !errors.Is(res.Decision.Err, authz.ErrUnknownSigner) {
|
|
t.Fatalf("forged job must be refused unexecuted, got %+v", res)
|
|
}
|
|
if calls != 0 {
|
|
t.Errorf("executor must not run for a refused job, ran %d", calls)
|
|
}
|
|
if j.AlreadyApplied(n) {
|
|
t.Error("a refused job must not mark its key applied")
|
|
}
|
|
}
|
|
|
|
func TestRunSignedJob_NoExecutorInert(t *testing.T) {
|
|
// Slice-4 inert state: a VALID authorization with no destructive executor wired
|
|
// returns an error and does NOT mark the key applied (so it is retryable once the
|
|
// executor lands in a later slice).
|
|
e, j, op := newSignedEngine(t, &fakeAPI{})
|
|
issued, expires := freshWindow()
|
|
n := nonce()
|
|
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
|
|
|
|
res := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, nil)
|
|
if !res.Decision.Allowed {
|
|
t.Fatalf("op should authorize even with no executor: %+v", res.Decision)
|
|
}
|
|
if res.Executed || res.Err == nil {
|
|
t.Fatalf("no-executor job should not execute and should error, got %+v", res)
|
|
}
|
|
if j.AlreadyApplied(n) {
|
|
t.Error("an unexecuted (no-executor) job must not mark its key applied")
|
|
}
|
|
}
|
|
|
|
func TestRunSignedJob_ExecutorErrorJournaledFailed(t *testing.T) {
|
|
e, j, op := newSignedEngine(t, &fakeAPI{})
|
|
issued, expires := freshWindow()
|
|
n := nonce()
|
|
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
|
|
|
|
exec := func(context.Context, Intent, *authz.VerifiedOp) (string, error) {
|
|
return "", errors.New("destroy failed")
|
|
}
|
|
res := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
|
|
if res.Executed || res.Err == nil {
|
|
t.Fatalf("executor error should propagate, got %+v", res)
|
|
}
|
|
if j.AlreadyApplied(n) {
|
|
t.Error("a failed execution must not mark its key applied")
|
|
}
|
|
}
|