Files
felhom-agent/internal/authz/verifier.go
T
admin f0fee7e193 feat(authz): operator signed-op verifier + durable nonce store (slice 2, v0.2.0)
internal/authz: production form of the Phase-4 SSHSIG signing primitive.

- Verifier.New/Verify with the LOCKED pipeline (namespace → allow-list by key
  material → crypto over RAW bytes → target → time → nonce LAST); each post-crypto
  stage rejects even with a valid sig; an invalid sig never burns a nonce.
- SSHSIG framing via x/crypto/ssh (no hand-rolled crypto); key-type-agnostic
  (ed25519 / sk-ssh-ed25519 / rsa / ecdsa via pub.Verify). Fixed namespace
  felhom-op-v1. Typed errors. OpBlob (fixed host_id/guest_id tags) + VerifiedOp.
- NonceStore: MemoryNonceStore + durable crash-safe FileNonceStore (fsync'd append
  log, replay-on-open, compaction, expiry-only pruning; survives restart).
- config.AuthzConfig (nonce path + pinned operational/recovery signer keys).
- Tests (14): real ssh-keygen fixture, per-stage rejection, nonce-not-burned,
  replay, persistence-across-restart, synthetic sk, byte-exactness.

Dep: golang.org/x/crypto v0.52.0 (declares go 1.25 — the Phase-4 doc's "Go 1.24.4 /
x/crypto v0.52.0" pairing doesn't build; build server upgraded to go1.26.0,
backward-compatible). Version 0.1.0 -> 0.2.0.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 15:23:02 +02:00

192 lines
6.6 KiB
Go

package authz
import (
"bytes"
"encoding/json"
"fmt"
"log/slog"
"time"
"golang.org/x/crypto/ssh"
)
// Namespace is the FIXED SSHSIG domain separator. It is a package constant, never
// caller-supplied (phase4 §2.2): a signature minted for any other namespace must
// not verify.
const Namespace = "felhom-op-v1"
// DefaultClockSkew tolerates operator/host clock drift on the not-yet-valid check
// only (issued_at may be up to this far in the future). Expiry is NOT extended —
// the validity window stays an honest upper bound.
const DefaultClockSkew = 2 * time.Minute
// KeyRole tags a pinned operator key (doc 04 §3 two-key model).
type KeyRole string
const (
// RoleOperational signs ordinary destructive ops (the "master stamp").
RoleOperational KeyRole = "operational"
// RoleRecovery is the cold key; authorizes ONLY key-rotation/break-glass ops.
// Role-scoping is enforced by the consuming layer (slice 4), not here.
RoleRecovery KeyRole = "recovery"
)
// AllowedSigner is one pinned operator public key.
type AllowedSigner struct {
KeyID string
Role KeyRole
PublicKey ssh.PublicKey // parsed; allow-list match is by PublicKey.Marshal()
Comment string // from the authorized_keys line, if any
}
// NewAllowedSigner parses a standard authorized_keys line ("ssh-ed25519 AAAA… [comment]"
// or "sk-ssh-ed25519@openssh.com AAAA… …") into an AllowedSigner with the given id+role.
func NewAllowedSigner(keyID string, role KeyRole, authorizedKeyLine string) (AllowedSigner, error) {
pub, comment, _, _, err := ssh.ParseAuthorizedKey([]byte(authorizedKeyLine))
if err != nil {
return AllowedSigner{}, fmt.Errorf("authz: parsing pinned key %q: %w", keyID, err)
}
if role != RoleOperational && role != RoleRecovery {
return AllowedSigner{}, fmt.Errorf("authz: pinned key %q has invalid role %q", keyID, role)
}
return AllowedSigner{KeyID: keyID, Role: role, PublicKey: pub, Comment: comment}, nil
}
// NonceStore records seen nonces for anti-replay. SeenOrRecord reports whether the
// nonce was already recorded; if not, it records it (durably, in the host impl)
// before returning false. See noncestore.go.
type NonceStore interface {
SeenOrRecord(nonce string, exp time.Time) (seen bool)
}
// Verifier authenticates operator-signed destructive ops. Construct with New.
type Verifier struct {
signers []AllowedSigner
store NonceStore
hostID string
// ClockSkew tolerance for the not-yet-valid check (default DefaultClockSkew).
ClockSkew time.Duration
// Logger, if set, emits a warning when a blob's advisory key_id disagrees with
// the matched signer. Never affects the verdict.
Logger *slog.Logger
now func() time.Time // injectable for tests
}
// New builds a Verifier over the pinned signer set, a nonce store, and this box's
// host id. allowedSigners is a set (single signer today; quorum is just sizing).
func New(signers []AllowedSigner, store NonceStore, hostID string) *Verifier {
return &Verifier{
signers: signers,
store: store,
hostID: hostID,
ClockSkew: DefaultClockSkew,
now: func() time.Time { return time.Now().UTC() },
}
}
// Verify runs the LOCKED pipeline (phase4 §4 / doc 04 §2.3) and returns the
// authenticated op. Order is load-bearing and each post-crypto stage rejects even
// with an otherwise-valid signature:
//
// parse armor → namespace → parse pubkey → allow-list (by key MATERIAL, not
// key_id) → crypto verify (over the RAW received blob bytes) → parse blob →
// target → time window → nonce SeenOrRecord (LAST)
//
// The nonce is recorded last, so an invalid signature can never consume a nonce
// (DoS / replay-priming safe). Errors wrap the typed sentinels in errors.go.
func (v *Verifier) Verify(blob, sigArmored []byte) (*VerifiedOp, error) {
// 1. parse armor
sb, err := parseArmoredSSHSIG(sigArmored)
if err != nil {
return nil, err
}
// 2. namespace (fixed domain separator)
if sb.Namespace != Namespace {
return nil, fmt.Errorf("%w: got %q want %q", ErrNamespace, sb.Namespace, Namespace)
}
// 3. parse the embedded public key
pub, err := ssh.ParsePublicKey([]byte(sb.PublicKey))
if err != nil {
return nil, fmt.Errorf("%w: parsing signature public key: %v", ErrMalformed, err)
}
// 4. allow-list match by KEY MATERIAL (pub.Marshal equality) — NOT by key_id
matched, ok := v.matchSigner(pub)
if !ok {
return nil, ErrUnknownSigner
}
// 5. crypto verify over the RAW received bytes (never re-serialized)
signed, err := signedData(sb, blob)
if err != nil {
return nil, err
}
var inner ssh.Signature
if err := ssh.Unmarshal([]byte(sb.Signature), &inner); err != nil {
return nil, fmt.Errorf("%w: %v", ErrMalformed, err)
}
if err := pub.Verify(signed, &inner); err != nil { // dispatches on the key's algorithm
return nil, fmt.Errorf("%w: %v", ErrBadSignature, err)
}
// 6. parse the (now authenticated) blob bytes
var op OpBlob
if err := json.Unmarshal(blob, &op); err != nil {
return nil, fmt.Errorf("%w: decoding op blob: %v", ErrMalformed, err)
}
// 7. target binding — host must be this box. guest_id is surfaced, not matched
// here (the verifier doesn't enumerate guests; the caller routes by it).
if op.Target.HostID != v.hostID {
return nil, fmt.Errorf("%w: blob host_id=%q this=%q", ErrTarget, op.Target.HostID, v.hostID)
}
// 8. time window (clock-skew tolerance on not-yet-valid only)
now := v.now()
if now.Before(op.IssuedAt.Add(-v.ClockSkew)) {
return nil, fmt.Errorf("%w: issued_at=%s now=%s", ErrNotYetValid, op.IssuedAt, now)
}
if now.After(op.ExpiresAt) {
return nil, fmt.Errorf("%w: expires_at=%s now=%s", ErrExpired, op.ExpiresAt, now)
}
// 9. nonce LAST — only now is it durably recorded.
if v.store.SeenOrRecord(op.Nonce, op.ExpiresAt) {
return nil, fmt.Errorf("%w: nonce %s", ErrReplay, op.Nonce)
}
// advisory key_id audit (never a verdict input)
keyIDMatches := op.KeyID == matched.KeyID
if !keyIDMatches && v.Logger != nil {
v.Logger.Warn("authz: blob key_id does not match the matched signer (advisory)",
"blob_key_id", op.KeyID, "matched_signer", matched.KeyID)
}
return &VerifiedOp{
Op: op.Op,
HostID: op.Target.HostID,
GuestID: op.Target.GuestID,
Params: op.Params,
Nonce: op.Nonce,
IssuedAt: op.IssuedAt,
ExpiresAt: op.ExpiresAt,
KeyID: op.KeyID,
Signer: matched,
KeyIDMatchesSigner: keyIDMatches,
}, nil
}
func (v *Verifier) matchSigner(pub ssh.PublicKey) (AllowedSigner, bool) {
pm := pub.Marshal()
for _, s := range v.signers {
if s.PublicKey != nil && bytes.Equal(s.PublicKey.Marshal(), pm) {
return s, true
}
}
return AllowedSigner{}, false
}