feat(hub): host-domain ingest — tables + /host-report + per-host auth + host dead-man's-switch (v0.7.0, slice 3)
Purely additive; the controller path (reports/customer_configs/checkAuthCustomer/ existing checkers) is untouched. Cutover remains slice 10. - store: new hosts/guests/host_reports tables (full schema incl. columns INERT until slice 10, so no later ALTER); GetHostByAPIKey/GetHost/ListHosts/UpsertHost/ SaveHostReport/UpsertGuestFromReport (preserves inert cols)/GetHostStaleness/ GuestID; Prune also prunes host_reports. - api: checkAuthHost (sibling of checkAuthCustomer); POST /host-report (per-host Bearer, 4MiB, denorm + guest upsert, control envelope); POST /admin/hosts (PROVISIONAL global-key host mint); host_* event types registered. - monitor: HostStalenessChecker sibling over host_reports (host_stale/down/ recovered), wired on the existing 60s ticker; controller checkers unchanged. - tests (hermetic): store intent/inert-column preservation, auth, ingest (envelope+denorm, mismatch/unknown/blocked/oversize), admin mint round-trip, host staleness transitions. CHANGELOG v0.7.0. Contract matches the agent host-report spec field-for-field. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+229
-9
@@ -89,6 +89,30 @@ func (h *Handler) checkAuthCustomer(r *http.Request) (customerID string, isGloba
|
||||
return cfg.CustomerID, false, true
|
||||
}
|
||||
|
||||
// checkAuthHost resolves a Bearer token to a HOST identity (the agent's auth
|
||||
// path). It is a sibling of checkAuthCustomer — the controller path is unchanged.
|
||||
// - global key -> ("", "", true, true) caller trusts body.host_id
|
||||
// - per-host key -> (hostID, customerID, false, true)
|
||||
// - failure -> ("", "", false, false)
|
||||
func (h *Handler) checkAuthHost(r *http.Request) (hostID, customerID string, isGlobal, ok bool) {
|
||||
auth := r.Header.Get("Authorization")
|
||||
if !strings.HasPrefix(auth, "Bearer ") {
|
||||
return "", "", false, false
|
||||
}
|
||||
token := strings.TrimPrefix(auth, "Bearer ")
|
||||
|
||||
// Global key first (same constant-time compare as checkAuthCustomer).
|
||||
if h.apiKey != "" && subtle.ConstantTimeCompare([]byte(token), []byte(h.apiKey)) == 1 {
|
||||
return "", "", true, true
|
||||
}
|
||||
|
||||
host, err := h.store.GetHostByAPIKey(token)
|
||||
if err != nil || host == nil {
|
||||
return "", "", false, false
|
||||
}
|
||||
return host.HostID, host.CustomerID, false, true
|
||||
}
|
||||
|
||||
// ServeHTTP routes API requests.
|
||||
func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
path := strings.TrimPrefix(r.URL.Path, "/api/v1")
|
||||
@@ -96,6 +120,10 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Method == http.MethodPost && path == "/report":
|
||||
h.handleReport(w, r)
|
||||
case r.Method == http.MethodPost && path == "/host-report":
|
||||
h.handleHostReport(w, r)
|
||||
case r.Method == http.MethodPost && path == "/admin/hosts":
|
||||
h.handleAdminCreateHost(w, r)
|
||||
case r.Method == http.MethodPost && path == "/event":
|
||||
h.handleEvent(w, r)
|
||||
case r.Method == http.MethodPost && path == "/notify":
|
||||
@@ -194,6 +222,194 @@ func (h *Handler) handleReport(w http.ResponseWriter, r *http.Request) {
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}
|
||||
|
||||
// defaultHostPollSeconds is the cadence the hub hands every agent this slice (no
|
||||
// per-host override UI yet — that is a later slice).
|
||||
const defaultHostPollSeconds = 900
|
||||
|
||||
// hostReportPayload is the subset of the agent host-report (slice-3 contract,
|
||||
// §3 / agent spec §4) the hub needs for denorm + guest reality. Unknown fields
|
||||
// (storage_targets/backups/restore_tests/pbs_snapshots/audit_tail) are ignored,
|
||||
// so an empty or absent collection is accepted without error.
|
||||
type hostReportPayload struct {
|
||||
HostID string `json:"host_id"`
|
||||
AgentVersion string `json:"agent_version"`
|
||||
Host struct {
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
MemoryPercent float64 `json:"memory_percent"`
|
||||
DiskPercent float64 `json:"disk_percent"`
|
||||
} `json:"host"`
|
||||
Guests []struct {
|
||||
VMID int `json:"vmid"`
|
||||
Name string `json:"name"`
|
||||
Status string `json:"status"`
|
||||
ControllerVersion string `json:"controller_version"`
|
||||
} `json:"guests"`
|
||||
Cloudflared struct {
|
||||
Status string `json:"status"`
|
||||
} `json:"cloudflared"`
|
||||
}
|
||||
|
||||
// handleHostReport ingests the agent's host-report (the heartbeat) and returns the
|
||||
// control envelope (agent spec §5).
|
||||
func (h *Handler) handleHostReport(w http.ResponseWriter, r *http.Request) {
|
||||
hostID, custID, isGlobal, ok := h.checkAuthHost(r)
|
||||
if !ok {
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
// 4 MiB: host reports carry the full guest list + future storage/backup arrays;
|
||||
// the controller path's 1 MiB is too tight here.
|
||||
body, err := io.ReadAll(io.LimitReader(r.Body, 4<<20))
|
||||
if err != nil {
|
||||
http.Error(w, "Bad request", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
var rep hostReportPayload
|
||||
if err := json.Unmarshal(body, &rep); err != nil || rep.HostID == "" {
|
||||
http.Error(w, "Invalid payload: host_id required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if isGlobal {
|
||||
// Global-key bootstrap: trust body.host_id but require the host to exist
|
||||
// (it must be minted first) and resolve its customer from the row.
|
||||
host, err := h.store.GetHost(rep.HostID)
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERROR] host lookup failed for %s: %v", rep.HostID, err)
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if host == nil {
|
||||
http.Error(w, "Unknown host_id (mint via /admin/hosts first)", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
hostID, custID = rep.HostID, host.CustomerID
|
||||
} else if rep.HostID != hostID {
|
||||
http.Error(w, "Forbidden: host_id mismatch", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
running := 0
|
||||
for _, g := range rep.Guests {
|
||||
if g.Status == "running" {
|
||||
running++
|
||||
}
|
||||
}
|
||||
denorm := store.HostReportDenorm{
|
||||
AgentVersion: rep.AgentVersion,
|
||||
CPUPercent: rep.Host.CPUPercent,
|
||||
MemoryPercent: rep.Host.MemoryPercent,
|
||||
DiskPercent: rep.Host.DiskPercent,
|
||||
GuestTotal: len(rep.Guests),
|
||||
GuestRunning: running,
|
||||
CloudflaredStatus: rep.Cloudflared.Status,
|
||||
}
|
||||
if err := h.store.SaveHostReport(hostID, custID, body, denorm); err != nil {
|
||||
h.logger.Printf("[ERROR] Failed to save host-report from %s: %v", hostID, err)
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
for _, g := range rep.Guests {
|
||||
status := g.Status
|
||||
if status == "" {
|
||||
status = "unknown"
|
||||
}
|
||||
guest := &store.Guest{
|
||||
GuestID: store.GuestID(hostID, g.VMID),
|
||||
CustomerID: custID,
|
||||
HostID: hostID,
|
||||
VMID: g.VMID,
|
||||
DisplayName: g.Name,
|
||||
Status: status,
|
||||
ControllerVersion: g.ControllerVersion,
|
||||
}
|
||||
if err := h.store.UpsertGuestFromReport(guest); err != nil {
|
||||
// A guest upsert failure must not drop the whole report (liveness).
|
||||
h.logger.Printf("[WARN] Failed to upsert guest %s: %v", guest.GuestID, err)
|
||||
}
|
||||
}
|
||||
|
||||
h.logger.Printf("[INFO] host-report from %s (%d guests, %d bytes)", hostID, len(rep.Guests), len(body))
|
||||
|
||||
blocked := false
|
||||
if cc, err := h.store.GetCustomerConfig(custID); err == nil && cc != nil && cc.Status == "blocked" {
|
||||
blocked = true
|
||||
}
|
||||
resp := map[string]interface{}{
|
||||
"status": "ok",
|
||||
"poll_interval_seconds": defaultHostPollSeconds,
|
||||
"blocked": blocked,
|
||||
"desired_generation": 0, // reserved (slice 4)
|
||||
"has_signed_ops": false, // reserved (slice 4)
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}
|
||||
|
||||
// handleAdminCreateHost mints a host identity (host_id + per-host api_key).
|
||||
//
|
||||
// PROVISIONAL (slice-3 bootstrap): global-key only, so the demo agent can
|
||||
// authenticate before enrollment (slices 7–8) exists. Enrollment will mint host
|
||||
// identity + pin signing keys; this endpoint should be removed/locked down then
|
||||
// (tracked under doc 05 §11 auth-tightening at cutover).
|
||||
func (h *Handler) handleAdminCreateHost(w http.ResponseWriter, r *http.Request) {
|
||||
_, _, isGlobal, ok := h.checkAuthHost(r)
|
||||
if !ok || !isGlobal {
|
||||
http.Error(w, "Forbidden: global key required", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
body, err := io.ReadAll(io.LimitReader(r.Body, 1<<20))
|
||||
if err != nil {
|
||||
http.Error(w, "Bad request", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
var req struct {
|
||||
CustomerID string `json:"customer_id"`
|
||||
HostID string `json:"host_id"`
|
||||
DisplayName string `json:"display_name"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &req); err != nil || req.CustomerID == "" {
|
||||
http.Error(w, "Invalid payload: customer_id required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
cc, err := h.store.GetCustomerConfig(req.CustomerID)
|
||||
if err != nil {
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if cc == nil {
|
||||
http.Error(w, "Unknown customer_id", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
hostID := req.HostID
|
||||
if hostID == "" {
|
||||
sfx, err := configgen.RandomHex(3) // 6 hex chars — human-legible for the demo
|
||||
if err != nil {
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
hostID = req.CustomerID + "-" + sfx
|
||||
}
|
||||
apiKey, err := configgen.RandomHex(32)
|
||||
if err != nil {
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if err := h.store.UpsertHost(&store.Host{HostID: hostID, CustomerID: req.CustomerID, APIKey: apiKey}); err != nil {
|
||||
h.logger.Printf("[ERROR] Failed to mint host for %s: %v", req.CustomerID, err)
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
h.logger.Printf("[INFO] provisional host mint: %s (customer %s)", hostID, req.CustomerID)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusCreated)
|
||||
json.NewEncoder(w).Encode(map[string]string{"host_id": hostID, "api_key": apiKey})
|
||||
}
|
||||
|
||||
// allowedEventTypes lists all valid event_type values the Hub accepts.
|
||||
var allowedEventTypes = map[string]bool{
|
||||
// Controller-pushed events
|
||||
@@ -219,11 +435,15 @@ var allowedEventTypes = map[string]bool{
|
||||
"disaster_recovery_started": true,
|
||||
"disaster_recovery_completed": true,
|
||||
// Hub-generated events
|
||||
"node_stale": true,
|
||||
"node_down": true,
|
||||
"node_recovered": true,
|
||||
"expected_backup_missed": true,
|
||||
"expected_dbdump_missed": true,
|
||||
"node_stale": true,
|
||||
"node_down": true,
|
||||
"node_recovered": true,
|
||||
// Hub-generated host-domain events (v0.7.0, slice 3)
|
||||
"host_stale": true,
|
||||
"host_down": true,
|
||||
"host_recovered": true,
|
||||
"expected_backup_missed": true,
|
||||
"expected_dbdump_missed": true,
|
||||
// Special
|
||||
"test": true,
|
||||
}
|
||||
@@ -686,10 +906,10 @@ func (h *Handler) handleRecovery(w http.ResponseWriter, r *http.Request, custome
|
||||
}
|
||||
|
||||
resp := struct {
|
||||
CustomerID string `json:"customer_id"`
|
||||
ConfigYAML string `json:"config_yaml"`
|
||||
InfraBackup json.RawMessage `json:"infra_backup"`
|
||||
HasInfraBackup bool `json:"has_infra_backup"`
|
||||
CustomerID string `json:"customer_id"`
|
||||
ConfigYAML string `json:"config_yaml"`
|
||||
InfraBackup json.RawMessage `json:"infra_backup"`
|
||||
HasInfraBackup bool `json:"has_infra_backup"`
|
||||
BackupVersions []store.InfraBackupVersion `json:"backup_versions,omitempty"`
|
||||
}{
|
||||
CustomerID: customerID,
|
||||
|
||||
Reference in New Issue
Block a user