slice 10D (hub): DR capstone — recovery mode + re-enroll + directive serving (hub v0.11.0)
Recovery-mode toggle (global key, bounded auto-expiry) gates re-enroll + restore-directive serving. Re-enroll rotates the agent<->hub credential to the new box (old key revoked); returns the opaque escrow blobs + non-secret directive. Store gains recovery_mode_until + identity_blob + directive_json. Hub holds no usable secret + no Cloudflare write-power (operator-side rotation). Doc 03 §9: slice 10 CLOSED. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"gitea.dooplex.hu/admin/felhom-hub/internal/configgen"
|
||||
)
|
||||
|
||||
// Slice 10D — DR capstone, hub side. The hub ORCHESTRATES recovery (recovery-mode toggle, directive
|
||||
// serving, re-enroll + its OWN agent↔hub credential rotation) but holds **no usable secret and no
|
||||
// Cloudflare write-power**: the escrow blobs it serves are opaque (need R, which the hub never has),
|
||||
// and the destructive tunnel/PBS rotation is the operator's step from a trusted environment. A
|
||||
// compromised hub can at most hand out opaque blobs + revoke/rotate its own per-host credential.
|
||||
|
||||
const (
|
||||
defaultRecoveryTTL = 30 * time.Minute // bounded auto-expiry default
|
||||
maxRecoveryTTL = 4 * time.Hour
|
||||
)
|
||||
|
||||
func writeJSON(w http.ResponseWriter, code int, v any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(code)
|
||||
json.NewEncoder(w).Encode(v)
|
||||
}
|
||||
|
||||
// handleSetRecoveryMode arms recovery mode for a host (GLOBAL/operator key only). Body:
|
||||
// {"ttl_seconds": N} (clamped to [60, maxRecoveryTTL]; default 30m). The directive + re-enroll are
|
||||
// served ONLY while this is active; it auto-expires.
|
||||
func (h *Handler) handleSetRecoveryMode(w http.ResponseWriter, r *http.Request, hostID string) {
|
||||
if _, _, isGlobal, ok := h.checkAuthHost(r); !ok || !isGlobal {
|
||||
http.Error(w, "Forbidden: global key required", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
if hostID == "" {
|
||||
http.Error(w, "Missing host_id", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
body, _ := io.ReadAll(io.LimitReader(r.Body, 1<<16))
|
||||
var req struct {
|
||||
TTLSeconds int `json:"ttl_seconds"`
|
||||
}
|
||||
json.Unmarshal(body, &req)
|
||||
ttl := defaultRecoveryTTL
|
||||
if req.TTLSeconds > 0 {
|
||||
ttl = time.Duration(req.TTLSeconds) * time.Second
|
||||
}
|
||||
if ttl < time.Minute {
|
||||
ttl = time.Minute
|
||||
}
|
||||
if ttl > maxRecoveryTTL {
|
||||
ttl = maxRecoveryTTL
|
||||
}
|
||||
until := time.Now().UTC().Add(ttl)
|
||||
if err := h.store.SetRecoveryMode(hostID, until); err == sql.ErrNoRows {
|
||||
http.Error(w, "Unknown host_id", http.StatusNotFound)
|
||||
return
|
||||
} else if err != nil {
|
||||
h.logger.Printf("[ERROR] set recovery mode for %s: %v", hostID, err)
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
h.logger.Printf("[INFO] DR: recovery mode ARMED for host %s until %s (auto-expires)", hostID, until.Format(time.RFC3339))
|
||||
writeJSON(w, http.StatusOK, map[string]any{"status": "ok", "recovery_mode_until": until.Format(time.RFC3339)})
|
||||
}
|
||||
|
||||
// handleClearRecoveryMode disables recovery mode (GLOBAL key).
|
||||
func (h *Handler) handleClearRecoveryMode(w http.ResponseWriter, r *http.Request, hostID string) {
|
||||
if _, _, isGlobal, ok := h.checkAuthHost(r); !ok || !isGlobal {
|
||||
http.Error(w, "Forbidden: global key required", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
if err := h.store.ClearRecoveryMode(hostID); err != nil {
|
||||
h.logger.Printf("[ERROR] clear recovery mode for %s: %v", hostID, err)
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
h.logger.Printf("[INFO] DR: recovery mode DISABLED for host %s", hostID)
|
||||
writeJSON(w, http.StatusOK, map[string]any{"status": "ok"})
|
||||
}
|
||||
|
||||
// reEnrollResponse is the re-enroll / restore-directive payload (slice 10D). The blobs are OPAQUE.
|
||||
type reEnrollResponse struct {
|
||||
HostID string `json:"host_id"`
|
||||
APIKeyRotated bool `json:"api_key_rotated"`
|
||||
Directive json.RawMessage `json:"directive"` // non-secret DR directive
|
||||
KEscrowB64 string `json:"k_escrow_b64"` // opaque PBS-key escrow blob
|
||||
IdentityEscrowB64 string `json:"identity_escrow_b64"` // opaque identity escrow blob
|
||||
}
|
||||
|
||||
// handleReEnroll is the re-enroll handshake (slice 10D.2). Gated ONLY on RECOVERY MODE (the lost box
|
||||
// has no key, so no old-key auth) — the operator armed recovery mode (operational gate) after
|
||||
// out-of-band validation. The new box posts a fresh api_key; the hub ROTATES the host's credential
|
||||
// to it (the old box's hub access is revoked instantly) and returns the DR directive + opaque blobs.
|
||||
// Without recovery mode → 403. The blobs are useless without R (zero-knowledge): even a wrongful
|
||||
// re-enroll within the window leaks nothing recoverable.
|
||||
func (h *Handler) handleReEnroll(w http.ResponseWriter, r *http.Request, hostID string) {
|
||||
if hostID == "" {
|
||||
http.Error(w, "Missing host_id", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
host, err := h.store.GetHost(hostID)
|
||||
if err != nil {
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if host == nil {
|
||||
http.Error(w, "Unknown host_id", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
// THE GATE: recovery mode must be active (operator-armed, not expired).
|
||||
if !host.InRecoveryMode(time.Now().UTC()) {
|
||||
h.logger.Printf("[WARN] DR: re-enroll REFUSED for %s — recovery mode not active", hostID)
|
||||
http.Error(w, "Forbidden: host not in recovery mode (operator must arm it)", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
body, _ := io.ReadAll(io.LimitReader(r.Body, 1<<16))
|
||||
var req struct {
|
||||
NewAPIKey string `json:"new_api_key"`
|
||||
}
|
||||
if json.Unmarshal(body, &req) != nil || req.NewAPIKey == "" {
|
||||
// If the box did not supply one, mint it (still rotates the credential).
|
||||
req.NewAPIKey, _ = configgen.RandomHex(32)
|
||||
}
|
||||
// Rotate the agent↔hub credential to the new box — the old box's key is revoked here.
|
||||
if err := h.store.RotateHostAPIKey(hostID, req.NewAPIKey); err != nil {
|
||||
h.logger.Printf("[ERROR] re-enroll rotate key for %s: %v", hostID, err)
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
resp := reEnrollResponse{HostID: hostID, APIKeyRotated: true, Directive: json.RawMessage("{}")}
|
||||
if bundle, err := h.store.GetHostDRBundle(hostID); err == nil && bundle != nil {
|
||||
resp.KEscrowB64 = base64.StdEncoding.EncodeToString(bundle.KEscrowBlob)
|
||||
resp.IdentityEscrowB64 = base64.StdEncoding.EncodeToString(bundle.IdentityBlob)
|
||||
if bundle.DirectiveJSON != "" {
|
||||
resp.Directive = json.RawMessage(bundle.DirectiveJSON)
|
||||
}
|
||||
}
|
||||
h.logger.Printf("[INFO] DR: host %s RE-ENROLLED (hub credential rotated; old key revoked; directive served)", hostID)
|
||||
// The new key is returned so the box can use it; the operator sees the rotation in the response.
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(w).Encode(map[string]any{
|
||||
"host_id": hostID, "api_key_rotated": true, "new_api_key": req.NewAPIKey,
|
||||
"directive": resp.Directive, "k_escrow_b64": resp.KEscrowB64, "identity_escrow_b64": resp.IdentityEscrowB64,
|
||||
})
|
||||
}
|
||||
|
||||
// handleGetRestoreDirective serves the directive to an already-re-enrolled box (its rotated per-host
|
||||
// key), gated on recovery mode. Lets the box re-fetch without re-rotating.
|
||||
func (h *Handler) handleGetRestoreDirective(w http.ResponseWriter, r *http.Request, hostID string) {
|
||||
authHostID, _, isGlobal, ok := h.checkAuthHost(r)
|
||||
if !ok {
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
if !isGlobal && authHostID != hostID {
|
||||
http.Error(w, "Forbidden: host_id mismatch", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
host, err := h.store.GetHost(hostID)
|
||||
if err != nil || host == nil {
|
||||
http.Error(w, "Unknown host_id", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
if !host.InRecoveryMode(time.Now().UTC()) {
|
||||
http.Error(w, "Forbidden: host not in recovery mode", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
resp := reEnrollResponse{HostID: hostID, Directive: json.RawMessage("{}")}
|
||||
if bundle, err := h.store.GetHostDRBundle(hostID); err == nil && bundle != nil {
|
||||
resp.KEscrowB64 = base64.StdEncoding.EncodeToString(bundle.KEscrowBlob)
|
||||
resp.IdentityEscrowB64 = base64.StdEncoding.EncodeToString(bundle.IdentityBlob)
|
||||
if bundle.DirectiveJSON != "" {
|
||||
resp.Directive = json.RawMessage(bundle.DirectiveJSON)
|
||||
}
|
||||
}
|
||||
writeJSON(w, http.StatusOK, resp)
|
||||
}
|
||||
Reference in New Issue
Block a user