Files
felhom-agent/internal/reconcile/job_test.go
T
admin 1af21a6cac v0.4.0: slice 4 Phase B — reversibility gate + signed-op consuming layer
The security core of slice 4: hub-supplied intent is no longer trusted for
destructive change. The gate fronts the per-guest queue's executor, so every
mutation passes it. Reuses internal/authz for all crypto (surface untouched).

- Classifier (doc 03 §4): benign vs destructive by provenance + data-bearing-
  ness, NOT by verb. Destroy/overwrite of customer data is destructive unless
  agent-internal provenance (same-journaled-txn create, or agent-tagged scratch)
  makes it benign — and that provenance is journal-recorded, NEVER hub-sourced.
  Unknown op class fails safe to destructive.
- Reversibility gate: benign -> allowed unsigned; destructive -> requires a
  verified, role-scoped, action-bound operator signature, else pending_signature
  and never executed. Every decision audited (signal, never the guard).
- Signed-op consuming layer over authz.Verifier.Verify (locked pipeline
  untouched): role-scoping (doc 04 §4 — recovery=rotation only, operational=
  ordinary destructive + planned rotation) + op-to-action binding (op+host+
  guest+params must match the gated action).
- Signed-job orchestration: idempotency dedupe by nonce + journal-wrapped
  execution via an injected DestructiveExecutor (nil this slice — inert).
- Crash recovery (Note 1): Engine.Recover consumes the journal InFlight() set at
  startup (resume-or-rollback) — covers an op that crashed after the POST and
  before its terminal record, which idempotency dedupe alone cannot. Added
  TaskStatusOnce to the GuestAPI seam. Wired into daemon startup.
- Note 2: memory comparison canonicalized to MiB (desiredMemoryMiB) so a
  non-MiB-aligned MemoryBytes converges in one pass, not perpetual drift.
- Daemon: builds the verifier from config signers (none = nil verifier, the
  common slice-4 state), the gate (+SlogAudit), runs Recover before mutating.

Adversarial matrix proven against the REAL authz.Verifier with in-test-minted
SSHSIGs (framing replicated in reconcile's test binary; authz untouched, no
signing added to the verify-only package): unsigned job + unsigned desired-state
delta -> pending_signature; unknown signer/expired/replay-across-restart/wrong
host -> typed authz rejections; wrong guest/op/params -> binding_mismatch;
recovery key on ordinary destructive -> role_denied; hub-supplied scratch tag
ignored -> refused; valid+role+target+fresh nonce -> accepted then replay
rejected. Full module race-clean + vet-clean on the Linux build server.

Inert this slice: no destructive deltas served until slice 10; the destructive
path is classified, gated, and tested but not wired to live execution.

CHECKPOINT: Phase B complete (slice 4 done). Awaiting validation.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 23:56:20 +02:00

136 lines
5.0 KiB
Go

package reconcile
import (
"context"
"errors"
"path/filepath"
"testing"
"gitea.dooplex.hu/admin/felhom-agent/internal/authz"
)
// newSignedEngine builds an engine whose gate has a real verifier pinning one
// operational key — for exercising the signed-job consuming layer end to end.
func newSignedEngine(t *testing.T, api GuestAPI) (*Engine, *Journal, testSigner) {
t.Helper()
j, err := OpenJournal(filepath.Join(t.TempDir(), "journal.log"))
if err != nil {
t.Fatalf("OpenJournal: %v", err)
}
t.Cleanup(func() { j.Close() })
q := NewQueue()
t.Cleanup(q.Close)
op := newTestSigner(t)
v, _ := realVerifierAt(t, filepath.Join(t.TempDir(), "n.log"), testHost, op.allowed(t, "op1", authz.RoleOperational))
g := NewGate(v, testHost, nil, nil)
e := NewEngine(EngineOptions{API: api, Queue: q, Journal: j, Gate: g, HostID: testHost})
return e, j, op
}
func TestRunSignedJob_ValidExecutesAndMarksApplied(t *testing.T) {
e, j, op := newSignedEngine(t, &fakeAPI{})
issued, expires := freshWindow()
n := nonce()
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
calls := 0
exec := func(context.Context, Intent, *authz.VerifiedOp) (string, error) { calls++; return "", nil } // synchronous
res := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
if !res.Executed || res.Err != nil {
t.Fatalf("valid job should execute, got %+v", res)
}
if calls != 1 {
t.Errorf("executor should run once, ran %d", calls)
}
if !j.AlreadyApplied(n) {
t.Error("successful job must mark its idempotency key (nonce) applied")
}
}
func TestRunSignedJob_RedeliveryDedupedByIdempotencyKey(t *testing.T) {
// After success, a redelivered identical job must NOT re-run — the journal's
// idempotency key short-circuits BEFORE the verifier (so it reports already-applied,
// not a confusing replay rejection).
e, _, op := newSignedEngine(t, &fakeAPI{})
issued, expires := freshWindow()
n := nonce()
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
calls := 0
exec := func(context.Context, Intent, *authz.VerifiedOp) (string, error) { calls++; return "", nil }
first := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
if !first.Executed {
t.Fatalf("first delivery should execute: %+v", first)
}
second := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
if !second.AlreadyApplied || second.Executed {
t.Fatalf("redelivery should be deduped (already applied), got %+v", second)
}
if calls != 1 {
t.Errorf("executor must run exactly once across redelivery, ran %d", calls)
}
}
func TestRunSignedJob_RefusedDoesNotExecute(t *testing.T) {
e, j, _ := newSignedEngine(t, &fakeAPI{})
attacker := newTestSigner(t) // not pinned
issued, expires := freshWindow()
n := nonce()
signed := attacker.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
calls := 0
exec := func(context.Context, Intent, *authz.VerifiedOp) (string, error) { calls++; return "", nil }
res := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
if res.Executed || res.Decision.Allowed || !errors.Is(res.Decision.Err, authz.ErrUnknownSigner) {
t.Fatalf("forged job must be refused unexecuted, got %+v", res)
}
if calls != 0 {
t.Errorf("executor must not run for a refused job, ran %d", calls)
}
if j.AlreadyApplied(n) {
t.Error("a refused job must not mark its key applied")
}
}
func TestRunSignedJob_NoExecutorInert(t *testing.T) {
// Slice-4 inert state: a VALID authorization with no destructive executor wired
// returns an error and does NOT mark the key applied (so it is retryable once the
// executor lands in a later slice).
e, j, op := newSignedEngine(t, &fakeAPI{})
issued, expires := freshWindow()
n := nonce()
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
res := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, nil)
if !res.Decision.Allowed {
t.Fatalf("op should authorize even with no executor: %+v", res.Decision)
}
if res.Executed || res.Err == nil {
t.Fatalf("no-executor job should not execute and should error, got %+v", res)
}
if j.AlreadyApplied(n) {
t.Error("an unexecuted (no-executor) job must not mark its key applied")
}
}
func TestRunSignedJob_ExecutorErrorJournaledFailed(t *testing.T) {
e, j, op := newSignedEngine(t, &fakeAPI{})
issued, expires := freshWindow()
n := nonce()
signed := op.mint("guest_destroy", testHost, "9001", "op1", n, `{"purge":true}`, issued, expires)
exec := func(context.Context, Intent, *authz.VerifiedOp) (string, error) {
return "", errors.New("destroy failed")
}
res := e.RunSignedJob(context.Background(), destroyIntent(SourceOneShotJob), signed, exec)
if res.Executed || res.Err == nil {
t.Fatalf("executor error should propagate, got %+v", res)
}
if j.AlreadyApplied(n) {
t.Error("a failed execution must not mark its key applied")
}
}