feat(hub): host-domain ingest — tables + /host-report + per-host auth + host dead-man's-switch (v0.7.0, slice 3)

Purely additive; the controller path (reports/customer_configs/checkAuthCustomer/
existing checkers) is untouched. Cutover remains slice 10.

- store: new hosts/guests/host_reports tables (full schema incl. columns INERT
  until slice 10, so no later ALTER); GetHostByAPIKey/GetHost/ListHosts/UpsertHost/
  SaveHostReport/UpsertGuestFromReport (preserves inert cols)/GetHostStaleness/
  GuestID; Prune also prunes host_reports.
- api: checkAuthHost (sibling of checkAuthCustomer); POST /host-report (per-host
  Bearer, 4MiB, denorm + guest upsert, control envelope); POST /admin/hosts
  (PROVISIONAL global-key host mint); host_* event types registered.
- monitor: HostStalenessChecker sibling over host_reports (host_stale/down/
  recovered), wired on the existing 60s ticker; controller checkers unchanged.
- tests (hermetic): store intent/inert-column preservation, auth, ingest
  (envelope+denorm, mismatch/unknown/blocked/oversize), admin mint round-trip,
  host staleness transitions.

CHANGELOG v0.7.0. Contract matches the agent host-report spec field-for-field.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-08 16:36:16 +02:00
parent 0d832def7b
commit 7c0c75457f
12 changed files with 1204 additions and 38 deletions
@@ -0,0 +1,88 @@
package monitor
import (
"database/sql"
"fmt"
"io"
"log"
"path/filepath"
"testing"
"time"
"gitea.dooplex.hu/admin/felhom-hub/internal/store"
_ "modernc.org/sqlite"
)
// backdate sets a host's last_report_at to N minutes ago, simulating the passage
// of time without sleeping. Uses a second connection (the checker reads via store).
func backdate(t *testing.T, db *sql.DB, hostID string, minutesAgo int) {
t.Helper()
if _, err := db.Exec(`UPDATE hosts SET last_report_at = datetime('now', ?) WHERE host_id = ?`,
fmt.Sprintf("-%d minutes", minutesAgo), hostID); err != nil {
t.Fatal(err)
}
}
func TestHostStalenessChecker(t *testing.T) {
path := filepath.Join(t.TempDir(), "test.db")
st, err := store.New(path, log.New(io.Discard, "", 0))
if err != nil {
t.Fatal(err)
}
defer st.Close()
db, _ := sql.Open("sqlite", path)
defer db.Close()
st.SaveCustomerConfig(&store.CustomerConfig{CustomerID: "c1", APIKey: "ck", RetrievalPassword: "p"})
st.UpsertHost(&store.Host{HostID: "h1", CustomerID: "c1", APIKey: "k1"})
st.SaveHostReport("h1", "c1", []byte(`{}`), store.HostReportDenorm{}) // sets last_report_at
var events []string
onEvent := func(customerID, eventType, severity, message, detailsJSON, source string) {
events = append(events, eventType)
}
// Seed already-stale (40m) → state stale, but NO event on init.
backdate(t, db, "h1", 40)
sc := NewHostStalenessChecker(st, 30*time.Minute, onEvent, log.New(io.Discard, "", 0))
if len(events) != 0 {
t.Fatalf("seed must not emit events, got %v", events)
}
if sc.GetState("h1") != "stale" {
t.Fatalf("seeded state = %q, want stale", sc.GetState("h1"))
}
// Same age → no transition.
sc.Check()
if len(events) != 0 {
t.Fatalf("no transition expected, got %v", events)
}
// Fresh report → host_recovered.
backdate(t, db, "h1", 2)
sc.Check()
if last(events) != "host_recovered" {
t.Fatalf("events = %v, want last host_recovered", events)
}
// Aged to stale → host_stale.
backdate(t, db, "h1", 40)
sc.Check()
if last(events) != "host_stale" {
t.Fatalf("events = %v, want last host_stale", events)
}
// Aged past 2× → host_down.
backdate(t, db, "h1", 130)
sc.Check()
if last(events) != "host_down" {
t.Fatalf("events = %v, want last host_down", events)
}
}
func last(s []string) string {
if len(s) == 0 {
return ""
}
return s[len(s)-1]
}