feat(hub): host-domain ingest — tables + /host-report + per-host auth + host dead-man's-switch (v0.7.0, slice 3)

Purely additive; the controller path (reports/customer_configs/checkAuthCustomer/
existing checkers) is untouched. Cutover remains slice 10.

- store: new hosts/guests/host_reports tables (full schema incl. columns INERT
  until slice 10, so no later ALTER); GetHostByAPIKey/GetHost/ListHosts/UpsertHost/
  SaveHostReport/UpsertGuestFromReport (preserves inert cols)/GetHostStaleness/
  GuestID; Prune also prunes host_reports.
- api: checkAuthHost (sibling of checkAuthCustomer); POST /host-report (per-host
  Bearer, 4MiB, denorm + guest upsert, control envelope); POST /admin/hosts
  (PROVISIONAL global-key host mint); host_* event types registered.
- monitor: HostStalenessChecker sibling over host_reports (host_stale/down/
  recovered), wired on the existing 60s ticker; controller checkers unchanged.
- tests (hermetic): store intent/inert-column preservation, auth, ingest
  (envelope+denorm, mismatch/unknown/blocked/oversize), admin mint round-trip,
  host staleness transitions.

CHANGELOG v0.7.0. Contract matches the agent host-report spec field-for-field.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-08 16:36:16 +02:00
parent 0d832def7b
commit 7c0c75457f
12 changed files with 1204 additions and 38 deletions
+122
View File
@@ -0,0 +1,122 @@
package store
import (
"io"
"log"
"path/filepath"
"testing"
)
func newTestStore(t *testing.T) *Store {
t.Helper()
s, err := New(filepath.Join(t.TempDir(), "test.db"), log.New(io.Discard, "", 0))
if err != nil {
t.Fatalf("store.New: %v", err)
}
t.Cleanup(func() { s.Close() })
return s
}
func TestGuestID(t *testing.T) {
if got := GuestID("demo-host-01", 100); got != "demo-host-01/100" {
t.Errorf("GuestID = %q", got)
}
}
func TestUpsertHost_AndLookup(t *testing.T) {
s := newTestStore(t)
if err := s.UpsertHost(&Host{HostID: "h1", CustomerID: "c1", APIKey: "k1"}); err != nil {
t.Fatalf("UpsertHost: %v", err)
}
h, err := s.GetHost("h1")
if err != nil || h == nil {
t.Fatalf("GetHost: %v / %v", h, err)
}
if h.CustomerID != "c1" || h.APIKey != "k1" || h.DesiredJSON != "{}" || h.LastReportAt != nil {
t.Errorf("host = %+v", h)
}
byKey, err := s.GetHostByAPIKey("k1")
if err != nil || byKey == nil || byKey.HostID != "h1" {
t.Errorf("GetHostByAPIKey hit = %+v / %v", byKey, err)
}
miss, err := s.GetHostByAPIKey("nope")
if err != nil || miss != nil {
t.Errorf("GetHostByAPIKey miss = %+v / %v (want nil,nil)", miss, err)
}
}
func TestSaveHostReport_BumpsRealityPreservesIntent(t *testing.T) {
s := newTestStore(t)
if err := s.UpsertHost(&Host{HostID: "h1", CustomerID: "c1", APIKey: "k1"}); err != nil {
t.Fatal(err)
}
// Operator-owned intent columns (inert this slice) set out-of-band.
if _, err := s.db.Exec(`UPDATE hosts SET desired_json='{"want":1}', desired_generation=7 WHERE host_id='h1'`); err != nil {
t.Fatal(err)
}
denorm := HostReportDenorm{AgentVersion: "0.3.0", CPUPercent: 3.2, MemoryPercent: 25, DiskPercent: 19, GuestTotal: 2, GuestRunning: 1, CloudflaredStatus: "active"}
if err := s.SaveHostReport("h1", "c1", []byte(`{"host_id":"h1"}`), denorm); err != nil {
t.Fatalf("SaveHostReport: %v", err)
}
h, _ := s.GetHost("h1")
if h.AgentVersion != "0.3.0" || h.LastReportAt == nil {
t.Errorf("reality not bumped: %+v", h)
}
if h.DesiredJSON != `{"want":1}` || h.DesiredGeneration != 7 {
t.Errorf("a report must NOT clobber intent columns: desired_json=%q gen=%d", h.DesiredJSON, h.DesiredGeneration)
}
var n int
s.db.QueryRow(`SELECT COUNT(*) FROM host_reports WHERE host_id='h1'`).Scan(&n)
if n != 1 {
t.Errorf("host_reports rows = %d, want 1", n)
}
}
func TestUpsertGuestFromReport_PreservesInertColumns(t *testing.T) {
s := newTestStore(t)
gid := GuestID("h1", 100)
if err := s.UpsertGuestFromReport(&Guest{GuestID: gid, CustomerID: "c1", HostID: "h1", VMID: 100, DisplayName: "acme", Status: "running"}); err != nil {
t.Fatal(err)
}
// Slice-10 columns set out-of-band; a report upsert must not touch them.
if _, err := s.db.Exec(`UPDATE guests SET api_key='controllerkey', desired_spec_json='{"cores":4}' WHERE guest_id=?`, gid); err != nil {
t.Fatal(err)
}
// A later report changes reality (status/name).
if err := s.UpsertGuestFromReport(&Guest{GuestID: gid, CustomerID: "c1", HostID: "h1", VMID: 100, DisplayName: "acme-renamed", Status: "stopped"}); err != nil {
t.Fatal(err)
}
var apiKey, desiredSpec, status, name string
err := s.db.QueryRow(`SELECT api_key, desired_spec_json, status, display_name FROM guests WHERE guest_id=?`, gid).
Scan(&apiKey, &desiredSpec, &status, &name)
if err != nil {
t.Fatal(err)
}
if apiKey != "controllerkey" || desiredSpec != `{"cores":4}` {
t.Errorf("inert columns clobbered: api_key=%q desired_spec_json=%q", apiKey, desiredSpec)
}
if status != "stopped" || name != "acme-renamed" {
t.Errorf("reality not updated: status=%q name=%q", status, name)
}
}
func TestGetHostStaleness_SkipsNeverReported(t *testing.T) {
s := newTestStore(t)
s.UpsertHost(&Host{HostID: "h1", CustomerID: "c1", APIKey: "k1"})
rows, err := s.GetHostStaleness()
if err != nil {
t.Fatal(err)
}
if len(rows) != 0 {
t.Errorf("never-reported host should be skipped, got %d rows", len(rows))
}
s.SaveHostReport("h1", "c1", []byte(`{}`), HostReportDenorm{})
rows, _ = s.GetHostStaleness()
if len(rows) != 1 || rows[0].HostID != "h1" {
t.Errorf("after a report expected 1 row, got %+v", rows)
}
}
+277 -16
View File
@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"log"
"strconv"
"time"
_ "modernc.org/sqlite"
@@ -18,18 +19,18 @@ type Store struct {
// CustomerSummary holds the latest status for a customer (for dashboard).
type CustomerSummary struct {
CustomerID string
CustomerName string
ControllerVersion string
ReceivedAt time.Time
HealthStatus string
CPUPercent float64
MemoryPercent float64
ContainerTotal int
ContainerRunning int
CustomerID string
CustomerName string
ControllerVersion string
ReceivedAt time.Time
HealthStatus string
CPUPercent float64
MemoryPercent float64
ContainerTotal int
ContainerRunning int
BackupLastSnapshot *time.Time
ReportJSON string
ControllerURL string
ReportJSON string
ControllerURL string
// Computed fields (not stored)
TimeSinceReport time.Duration
@@ -216,6 +217,63 @@ func (s *Store) migrate() error {
WHERE NOT EXISTS (SELECT 1 FROM infra_backup_versions
WHERE infra_backup_versions.customer_id = infra_backups.customer_id)`)
// v0.7.0: host-domain (slice 3). Purely additive — the controller path
// (reports/customer_configs) is untouched; the schema cutover is slice 10.
// Columns marked INERT exist now so slice 10 needs no ALTER; nothing reads or
// writes them this slice.
_, err = s.db.Exec(`
CREATE TABLE IF NOT EXISTS hosts (
host_id TEXT PRIMARY KEY,
customer_id TEXT NOT NULL,
api_key TEXT NOT NULL,
agent_version TEXT NOT NULL DEFAULT '',
last_report_at DATETIME,
desired_json TEXT NOT NULL DEFAULT '{}',
desired_generation INTEGER NOT NULL DEFAULT 0,
dr_record_json TEXT NOT NULL DEFAULT '{}',
created_at DATETIME NOT NULL DEFAULT (datetime('now')),
updated_at DATETIME NOT NULL DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_hosts_customer ON hosts(customer_id);
CREATE TABLE IF NOT EXISTS guests (
guest_id TEXT PRIMARY KEY,
customer_id TEXT NOT NULL,
host_id TEXT NOT NULL,
vmid INTEGER NOT NULL,
display_name TEXT NOT NULL DEFAULT '',
status TEXT NOT NULL DEFAULT 'unknown',
controller_version TEXT NOT NULL DEFAULT '',
last_seen_at DATETIME,
api_key TEXT NOT NULL DEFAULT '',
desired_spec_json TEXT NOT NULL DEFAULT '{}',
created_at DATETIME NOT NULL DEFAULT (datetime('now')),
updated_at DATETIME NOT NULL DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_guests_host ON guests(host_id);
CREATE INDEX IF NOT EXISTS idx_guests_customer ON guests(customer_id);
CREATE TABLE IF NOT EXISTS host_reports (
id INTEGER PRIMARY KEY AUTOINCREMENT,
host_id TEXT NOT NULL,
customer_id TEXT NOT NULL,
received_at DATETIME NOT NULL DEFAULT (datetime('now')),
report_json TEXT NOT NULL,
agent_version TEXT,
cpu_percent REAL,
memory_percent REAL,
disk_percent REAL,
guest_total INTEGER,
guest_running INTEGER,
cloudflared_status TEXT
);
CREATE INDEX IF NOT EXISTS idx_host_reports_host ON host_reports(host_id, received_at DESC);
CREATE INDEX IF NOT EXISTS idx_host_reports_customer ON host_reports(customer_id, received_at DESC);
`)
if err != nil {
return err
}
return nil
}
@@ -812,7 +870,13 @@ func (s *Store) Prune(maxDays int) (int64, error) {
if err != nil {
return 0, err
}
return res.RowsAffected()
n, _ := res.RowsAffected()
// v0.7.0: prune the parallel host-domain report stream, same retention.
if hres, herr := s.db.Exec("DELETE FROM host_reports WHERE received_at < ?", cutoff); herr == nil {
hn, _ := hres.RowsAffected()
n += hn
}
return n, nil
}
// Close closes the database connection.
@@ -1138,11 +1202,11 @@ func scanEvents(rows *sql.Rows) ([]Event, error) {
// parseSQLiteTime tries multiple formats that modernc.org/sqlite may return.
func parseSQLiteTime(s string) time.Time {
formats := []string{
"2006-01-02 15:04:05", // SQLite datetime('now')
"2006-01-02T15:04:05Z", // RFC3339 without fractional
"2006-01-02 15:04:05", // SQLite datetime('now')
"2006-01-02T15:04:05Z", // RFC3339 without fractional
time.RFC3339, // 2006-01-02T15:04:05Z07:00
time.RFC3339Nano, // with fractional seconds
"2006-01-02 15:04:05+00:00", // with explicit UTC offset
time.RFC3339Nano, // with fractional seconds
"2006-01-02 15:04:05+00:00", // with explicit UTC offset
"2006-01-02 15:04:05.999999999", // with fractional, no TZ
}
for _, f := range formats {
@@ -1180,3 +1244,200 @@ func parseDiskSummary(reportJSON string) string {
}
return result
}
// ---- v0.7.0: host-domain (slice 3) ----
// Additive store surface for the agent's host-report stream. The controller-path
// methods above are untouched.
// Host is one customer agent. Mixes operator-intent columns (Desired*, DRRecord —
// INERT until slice 10) with box-reported reality (AgentVersion, LastReportAt).
type Host struct {
HostID string
CustomerID string
APIKey string
AgentVersion string
LastReportAt *time.Time
DesiredJSON string
DesiredGeneration int64
DRRecordJSON string
CreatedAt time.Time
UpdatedAt time.Time
}
// Guest is one controller LXC. Reality columns are report-driven; APIKey and
// DesiredSpecJSON are INERT until slice 10 and must survive report upserts.
type Guest struct {
GuestID string
CustomerID string
HostID string
VMID int
DisplayName string
Status string
ControllerVersion string
LastSeenAt *time.Time
APIKey string
DesiredSpecJSON string
CreatedAt time.Time
UpdatedAt time.Time
}
// HostReportDenorm are the denormalized fields pulled from a host-report for the
// dashboard / staleness, mirroring the reports table's denorm pattern.
type HostReportDenorm struct {
AgentVersion string
CPUPercent float64
MemoryPercent float64
DiskPercent float64
GuestTotal int
GuestRunning int
CloudflaredStatus string
}
// HostStaleRow is the minimal per-host recency row the dead-man's-switch reads.
type HostStaleRow struct {
HostID string
CustomerID string
LastReportAt time.Time
}
// GuestID derives the interim guest primary key from host + vmid. The hub owns the
// id scheme (locked decision 3) so the slice-10 swap to durable ids is hub-only.
func GuestID(hostID string, vmid int) string {
return hostID + "/" + strconv.Itoa(vmid)
}
func scanHost(scan func(dest ...any) error) (*Host, error) {
var h Host
var lastReport sql.NullString
var createdAt, updatedAt string
err := scan(&h.HostID, &h.CustomerID, &h.APIKey, &h.AgentVersion, &lastReport,
&h.DesiredJSON, &h.DesiredGeneration, &h.DRRecordJSON, &createdAt, &updatedAt)
if err != nil {
return nil, err
}
if lastReport.Valid {
t := parseSQLiteTime(lastReport.String)
h.LastReportAt = &t
}
h.CreatedAt = parseSQLiteTime(createdAt)
h.UpdatedAt = parseSQLiteTime(updatedAt)
return &h, nil
}
const hostSelectCols = `host_id, customer_id, api_key, agent_version, last_report_at,
desired_json, desired_generation, dr_record_json, created_at, updated_at`
// GetHostByAPIKey looks up a host by its per-host hub key. Returns nil (no error)
// if no match — parallels GetCustomerConfigByAPIKey.
func (s *Store) GetHostByAPIKey(apiKey string) (*Host, error) {
h, err := scanHost(s.db.QueryRow(`SELECT `+hostSelectCols+` FROM hosts WHERE api_key = ?`, apiKey).Scan)
if err == sql.ErrNoRows {
return nil, nil
}
return h, err
}
// GetHost looks up a host by id. Returns nil (no error) if not found.
func (s *Store) GetHost(hostID string) (*Host, error) {
h, err := scanHost(s.db.QueryRow(`SELECT `+hostSelectCols+` FROM hosts WHERE host_id = ?`, hostID).Scan)
if err == sql.ErrNoRows {
return nil, nil
}
return h, err
}
// ListHosts returns all hosts (debug / host-domain views).
func (s *Store) ListHosts() ([]Host, error) {
rows, err := s.db.Query(`SELECT ` + hostSelectCols + ` FROM hosts ORDER BY host_id`)
if err != nil {
return nil, err
}
defer rows.Close()
var hosts []Host
for rows.Next() {
h, err := scanHost(rows.Scan)
if err != nil {
return nil, err
}
hosts = append(hosts, *h)
}
return hosts, rows.Err()
}
// UpsertHost creates or updates a host identity (used by the admin mint). On
// conflict it updates only operator-settable identity fields + updated_at; it does
// NOT touch the reality columns (agent_version/last_report_at) or the inert intent
// columns (desired_*/dr_record_json) — those are owned elsewhere.
func (s *Store) UpsertHost(h *Host) error {
_, err := s.db.Exec(`
INSERT INTO hosts (host_id, customer_id, api_key, updated_at)
VALUES (?, ?, ?, datetime('now'))
ON CONFLICT(host_id) DO UPDATE SET
customer_id = excluded.customer_id,
api_key = excluded.api_key,
updated_at = datetime('now')`,
h.HostID, h.CustomerID, h.APIKey,
)
return err
}
// SaveHostReport inserts a host_reports row and bumps the host's reality columns
// (agent_version/last_report_at/updated_at) — never the inert intent columns.
func (s *Store) SaveHostReport(hostID, customerID string, reportJSON []byte, d HostReportDenorm) error {
_, err := s.db.Exec(`
INSERT INTO host_reports (host_id, customer_id, report_json, agent_version,
cpu_percent, memory_percent, disk_percent, guest_total, guest_running, cloudflared_status)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
hostID, customerID, string(reportJSON), d.AgentVersion,
d.CPUPercent, d.MemoryPercent, d.DiskPercent, d.GuestTotal, d.GuestRunning, d.CloudflaredStatus,
)
if err != nil {
return err
}
_, err = s.db.Exec(`
UPDATE hosts SET agent_version = ?, last_report_at = datetime('now'), updated_at = datetime('now')
WHERE host_id = ?`, d.AgentVersion, hostID)
return err
}
// UpsertGuestFromReport upserts the REALITY columns of a guest. On conflict it
// must NOT clobber the inert columns (api_key / desired_spec_json).
func (s *Store) UpsertGuestFromReport(g *Guest) error {
_, err := s.db.Exec(`
INSERT INTO guests (guest_id, customer_id, host_id, vmid, display_name, status,
controller_version, last_seen_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))
ON CONFLICT(guest_id) DO UPDATE SET
vmid = excluded.vmid,
display_name = excluded.display_name,
status = excluded.status,
controller_version = excluded.controller_version,
last_seen_at = datetime('now'),
updated_at = datetime('now')`,
g.GuestID, g.CustomerID, g.HostID, g.VMID, g.DisplayName, g.Status,
g.ControllerVersion,
)
return err
}
// GetHostStaleness returns per-host recency for the dead-man's-switch. Hosts that
// have never reported (NULL last_report_at) are skipped — a freshly-minted host is
// not "down" until it has checked in at least once.
func (s *Store) GetHostStaleness() ([]HostStaleRow, error) {
rows, err := s.db.Query(`SELECT host_id, customer_id, last_report_at FROM hosts WHERE last_report_at IS NOT NULL`)
if err != nil {
return nil, err
}
defer rows.Close()
var out []HostStaleRow
for rows.Next() {
var r HostStaleRow
var last string
if err := rows.Scan(&r.HostID, &r.CustomerID, &last); err != nil {
return nil, err
}
r.LastReportAt = parseSQLiteTime(last)
out = append(out, r)
}
return out, rows.Err()
}
+1 -1
View File
@@ -10,7 +10,7 @@ import (
var (
reANSI = regexp.MustCompile(`\x1b\[[0-9;]*m`)
reTimestamp = regexp.MustCompile(`\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*([+-]\d{2}:?\d{2})?[Z ]?:? ?`)
reTimestamp = regexp.MustCompile(`\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*([+-]\d{2}:?\d{2})?[Z ]?:? ?`)
reSyslog = regexp.MustCompile(`[A-Z][a-z]{2}\s+\d{1,2} \d{2}:\d{2}:\d{2} `)
)