v0.4.0: slice 4 Phase B — reversibility gate + signed-op consuming layer
The security core of slice 4: hub-supplied intent is no longer trusted for destructive change. The gate fronts the per-guest queue's executor, so every mutation passes it. Reuses internal/authz for all crypto (surface untouched). - Classifier (doc 03 §4): benign vs destructive by provenance + data-bearing- ness, NOT by verb. Destroy/overwrite of customer data is destructive unless agent-internal provenance (same-journaled-txn create, or agent-tagged scratch) makes it benign — and that provenance is journal-recorded, NEVER hub-sourced. Unknown op class fails safe to destructive. - Reversibility gate: benign -> allowed unsigned; destructive -> requires a verified, role-scoped, action-bound operator signature, else pending_signature and never executed. Every decision audited (signal, never the guard). - Signed-op consuming layer over authz.Verifier.Verify (locked pipeline untouched): role-scoping (doc 04 §4 — recovery=rotation only, operational= ordinary destructive + planned rotation) + op-to-action binding (op+host+ guest+params must match the gated action). - Signed-job orchestration: idempotency dedupe by nonce + journal-wrapped execution via an injected DestructiveExecutor (nil this slice — inert). - Crash recovery (Note 1): Engine.Recover consumes the journal InFlight() set at startup (resume-or-rollback) — covers an op that crashed after the POST and before its terminal record, which idempotency dedupe alone cannot. Added TaskStatusOnce to the GuestAPI seam. Wired into daemon startup. - Note 2: memory comparison canonicalized to MiB (desiredMemoryMiB) so a non-MiB-aligned MemoryBytes converges in one pass, not perpetual drift. - Daemon: builds the verifier from config signers (none = nil verifier, the common slice-4 state), the gate (+SlogAudit), runs Recover before mutating. Adversarial matrix proven against the REAL authz.Verifier with in-test-minted SSHSIGs (framing replicated in reconcile's test binary; authz untouched, no signing added to the verify-only package): unsigned job + unsigned desired-state delta -> pending_signature; unknown signer/expired/replay-across-restart/wrong host -> typed authz rejections; wrong guest/op/params -> binding_mismatch; recovery key on ordinary destructive -> role_denied; hub-supplied scratch tag ignored -> refused; valid+role+target+fresh nonce -> accepted then replay rejected. Full module race-clean + vet-clean on the Linux build server. Inert this slice: no destructive deltas served until slice 10; the destructive path is classified, gated, and tested but not wired to live execution. CHECKPOINT: Phase B complete (slice 4 done). Awaiting validation. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -27,19 +27,23 @@ type Engine struct {
|
||||
journal *Journal
|
||||
provider DesiredProvider
|
||||
norm FieldNormalizers
|
||||
gate *Gate
|
||||
hostID string
|
||||
logger *slog.Logger
|
||||
|
||||
opSeq uint64 // atomic; makes each op id unique per attempt
|
||||
}
|
||||
|
||||
// EngineOptions configures a new Engine. Norm defaults to DefaultNormalizers, Logger
|
||||
// to a discard logger.
|
||||
// to a discard logger, Gate to a no-verifier gate (benign-allow, destructive-pending).
|
||||
type EngineOptions struct {
|
||||
API GuestAPI
|
||||
Queue *Queue
|
||||
Journal *Journal
|
||||
Provider DesiredProvider
|
||||
Norm FieldNormalizers
|
||||
Gate *Gate
|
||||
HostID string
|
||||
Logger *slog.Logger
|
||||
}
|
||||
|
||||
@@ -58,12 +62,20 @@ func NewEngine(opts EngineOptions) *Engine {
|
||||
if provider == nil {
|
||||
provider = EmptyProvider{}
|
||||
}
|
||||
gate := opts.Gate
|
||||
if gate == nil {
|
||||
// No verifier configured: benign actions pass, destructive are pending. This is
|
||||
// the common slice-4 daemon state (no signers pinned, no desired state).
|
||||
gate = NewGate(nil, opts.HostID, nil, logger)
|
||||
}
|
||||
return &Engine{
|
||||
api: opts.API,
|
||||
queue: opts.Queue,
|
||||
journal: opts.Journal,
|
||||
provider: provider,
|
||||
norm: norm,
|
||||
gate: gate,
|
||||
hostID: opts.HostID,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
@@ -97,23 +109,39 @@ func (e *Engine) Reconcile(ctx context.Context) (Result, error) {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Dispatch all actions onto the shared per-guest queue, then await each. Same-vmid
|
||||
// actions serialize in submit order; different vmids run concurrently.
|
||||
chans := make([]<-chan error, len(actions))
|
||||
// Every mutation passes the reversibility gate before the queue (doc 03 §4).
|
||||
// Reconcile only produces benign actions, so each is allowed unsigned — but the
|
||||
// gate is genuinely in the path: a destructive class here would be refused
|
||||
// (pending_signature) and never dispatched. A gate refusal counts as a failed
|
||||
// action (it should not happen for the benign reconcile set).
|
||||
type dispatched struct {
|
||||
act Action
|
||||
ch <-chan error
|
||||
}
|
||||
var sent []dispatched
|
||||
for i := range actions {
|
||||
act := actions[i]
|
||||
chans[i] = e.queue.Submit(act.VMID, func() error { return e.execute(ctx, act) })
|
||||
dec := e.gate.Authorize(intentForAction(e.hostID, act), nil)
|
||||
if !dec.Allowed {
|
||||
res.Failed++
|
||||
res.Errors = append(res.Errors, fmt.Errorf("reconcile: gate refused %s vmid %d: %s",
|
||||
act.Kind, act.VMID, dec.Reason))
|
||||
e.logger.Error("reconcile: gate refused a benign action (unexpected)",
|
||||
"vmid", act.VMID, "kind", act.Kind, "reason", dec.Reason)
|
||||
continue
|
||||
}
|
||||
sent = append(sent, dispatched{act: act, ch: e.queue.Submit(act.VMID, func() error { return e.execute(ctx, act) })})
|
||||
}
|
||||
for i, ch := range chans {
|
||||
if err := <-ch; err != nil {
|
||||
for _, d := range sent {
|
||||
if err := <-d.ch; err != nil {
|
||||
res.Failed++
|
||||
res.Errors = append(res.Errors, err)
|
||||
e.logger.Error("reconcile: action failed",
|
||||
"vmid", actions[i].VMID, "kind", actions[i].Kind, "err", err)
|
||||
"vmid", d.act.VMID, "kind", d.act.Kind, "err", err)
|
||||
} else {
|
||||
res.Executed++
|
||||
e.logger.Info("reconcile: action applied",
|
||||
"vmid", actions[i].VMID, "kind", actions[i].Kind, "reason", actions[i].Reason)
|
||||
"vmid", d.act.VMID, "kind", d.act.Kind, "reason", d.act.Reason)
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
@@ -227,8 +255,13 @@ func (e *Engine) reconcileOnce(ctx context.Context) {
|
||||
|
||||
// nextOpID builds a per-attempt unique op id (kind-vmid-seq) for journal correlation.
|
||||
func (e *Engine) nextOpID(act Action) string {
|
||||
n := atomic.AddUint64(&e.opSeq, 1)
|
||||
return string(act.Kind) + "-" + strconv.Itoa(act.VMID) + "-" + strconv.FormatUint(n, 10)
|
||||
return string(act.Kind) + "-" + strconv.Itoa(act.VMID) + "-" + nextSeq(&e.opSeq)
|
||||
}
|
||||
|
||||
// nextSeq atomically increments a counter and returns it as a string — the unique
|
||||
// suffix that distinguishes journal op ids across attempts.
|
||||
func nextSeq(p *uint64) string {
|
||||
return strconv.FormatUint(atomic.AddUint64(p, 1), 10)
|
||||
}
|
||||
|
||||
// append journals a lifecycle record, logging (never failing the op on) a journal I/O
|
||||
|
||||
Reference in New Issue
Block a user