v0.4.0: slice 4 Phase B — reversibility gate + signed-op consuming layer

The security core of slice 4: hub-supplied intent is no longer trusted for
destructive change. The gate fronts the per-guest queue's executor, so every
mutation passes it. Reuses internal/authz for all crypto (surface untouched).

- Classifier (doc 03 §4): benign vs destructive by provenance + data-bearing-
  ness, NOT by verb. Destroy/overwrite of customer data is destructive unless
  agent-internal provenance (same-journaled-txn create, or agent-tagged scratch)
  makes it benign — and that provenance is journal-recorded, NEVER hub-sourced.
  Unknown op class fails safe to destructive.
- Reversibility gate: benign -> allowed unsigned; destructive -> requires a
  verified, role-scoped, action-bound operator signature, else pending_signature
  and never executed. Every decision audited (signal, never the guard).
- Signed-op consuming layer over authz.Verifier.Verify (locked pipeline
  untouched): role-scoping (doc 04 §4 — recovery=rotation only, operational=
  ordinary destructive + planned rotation) + op-to-action binding (op+host+
  guest+params must match the gated action).
- Signed-job orchestration: idempotency dedupe by nonce + journal-wrapped
  execution via an injected DestructiveExecutor (nil this slice — inert).
- Crash recovery (Note 1): Engine.Recover consumes the journal InFlight() set at
  startup (resume-or-rollback) — covers an op that crashed after the POST and
  before its terminal record, which idempotency dedupe alone cannot. Added
  TaskStatusOnce to the GuestAPI seam. Wired into daemon startup.
- Note 2: memory comparison canonicalized to MiB (desiredMemoryMiB) so a
  non-MiB-aligned MemoryBytes converges in one pass, not perpetual drift.
- Daemon: builds the verifier from config signers (none = nil verifier, the
  common slice-4 state), the gate (+SlogAudit), runs Recover before mutating.

Adversarial matrix proven against the REAL authz.Verifier with in-test-minted
SSHSIGs (framing replicated in reconcile's test binary; authz untouched, no
signing added to the verify-only package): unsigned job + unsigned desired-state
delta -> pending_signature; unknown signer/expired/replay-across-restart/wrong
host -> typed authz rejections; wrong guest/op/params -> binding_mismatch;
recovery key on ordinary destructive -> role_denied; hub-supplied scratch tag
ignored -> refused; valid+role+target+fresh nonce -> accepted then replay
rejected. Full module race-clean + vet-clean on the Linux build server.

Inert this slice: no destructive deltas served until slice 10; the destructive
path is classified, gated, and tested but not wired to live execution.

CHECKPOINT: Phase B complete (slice 4 done). Awaiting validation.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-08 23:56:20 +02:00
parent 05c450147c
commit 1af21a6cac
18 changed files with 1640 additions and 80 deletions
+44 -11
View File
@@ -27,19 +27,23 @@ type Engine struct {
journal *Journal
provider DesiredProvider
norm FieldNormalizers
gate *Gate
hostID string
logger *slog.Logger
opSeq uint64 // atomic; makes each op id unique per attempt
}
// EngineOptions configures a new Engine. Norm defaults to DefaultNormalizers, Logger
// to a discard logger.
// to a discard logger, Gate to a no-verifier gate (benign-allow, destructive-pending).
type EngineOptions struct {
API GuestAPI
Queue *Queue
Journal *Journal
Provider DesiredProvider
Norm FieldNormalizers
Gate *Gate
HostID string
Logger *slog.Logger
}
@@ -58,12 +62,20 @@ func NewEngine(opts EngineOptions) *Engine {
if provider == nil {
provider = EmptyProvider{}
}
gate := opts.Gate
if gate == nil {
// No verifier configured: benign actions pass, destructive are pending. This is
// the common slice-4 daemon state (no signers pinned, no desired state).
gate = NewGate(nil, opts.HostID, nil, logger)
}
return &Engine{
api: opts.API,
queue: opts.Queue,
journal: opts.Journal,
provider: provider,
norm: norm,
gate: gate,
hostID: opts.HostID,
logger: logger,
}
}
@@ -97,23 +109,39 @@ func (e *Engine) Reconcile(ctx context.Context) (Result, error) {
return res, nil
}
// Dispatch all actions onto the shared per-guest queue, then await each. Same-vmid
// actions serialize in submit order; different vmids run concurrently.
chans := make([]<-chan error, len(actions))
// Every mutation passes the reversibility gate before the queue (doc 03 §4).
// Reconcile only produces benign actions, so each is allowed unsigned — but the
// gate is genuinely in the path: a destructive class here would be refused
// (pending_signature) and never dispatched. A gate refusal counts as a failed
// action (it should not happen for the benign reconcile set).
type dispatched struct {
act Action
ch <-chan error
}
var sent []dispatched
for i := range actions {
act := actions[i]
chans[i] = e.queue.Submit(act.VMID, func() error { return e.execute(ctx, act) })
dec := e.gate.Authorize(intentForAction(e.hostID, act), nil)
if !dec.Allowed {
res.Failed++
res.Errors = append(res.Errors, fmt.Errorf("reconcile: gate refused %s vmid %d: %s",
act.Kind, act.VMID, dec.Reason))
e.logger.Error("reconcile: gate refused a benign action (unexpected)",
"vmid", act.VMID, "kind", act.Kind, "reason", dec.Reason)
continue
}
sent = append(sent, dispatched{act: act, ch: e.queue.Submit(act.VMID, func() error { return e.execute(ctx, act) })})
}
for i, ch := range chans {
if err := <-ch; err != nil {
for _, d := range sent {
if err := <-d.ch; err != nil {
res.Failed++
res.Errors = append(res.Errors, err)
e.logger.Error("reconcile: action failed",
"vmid", actions[i].VMID, "kind", actions[i].Kind, "err", err)
"vmid", d.act.VMID, "kind", d.act.Kind, "err", err)
} else {
res.Executed++
e.logger.Info("reconcile: action applied",
"vmid", actions[i].VMID, "kind", actions[i].Kind, "reason", actions[i].Reason)
"vmid", d.act.VMID, "kind", d.act.Kind, "reason", d.act.Reason)
}
}
return res, nil
@@ -227,8 +255,13 @@ func (e *Engine) reconcileOnce(ctx context.Context) {
// nextOpID builds a per-attempt unique op id (kind-vmid-seq) for journal correlation.
func (e *Engine) nextOpID(act Action) string {
n := atomic.AddUint64(&e.opSeq, 1)
return string(act.Kind) + "-" + strconv.Itoa(act.VMID) + "-" + strconv.FormatUint(n, 10)
return string(act.Kind) + "-" + strconv.Itoa(act.VMID) + "-" + nextSeq(&e.opSeq)
}
// nextSeq atomically increments a counter and returns it as a string — the unique
// suffix that distinguishes journal op ids across attempts.
func nextSeq(p *uint64) string {
return strconv.FormatUint(atomic.AddUint64(p, 1), 10)
}
// append journals a lifecycle record, logging (never failing the op on) a journal I/O