feat(hub): host-report client + collector + first daemon loop (slice 3, v0.3.0)

internal/hub: the agent's first daemon — a periodic read-only host-report POSTed to
the hub (the heartbeat; no separate ping).

- HostReport wire contract (shared field-for-field with the hub ingest): host
  metrics, guests (vmid + spec), cloudflared status; storage/backups/restore-tests/
  pbs/audit collections DEFINED but emitted empty (slices 5/6 fill).
- Collector over a read-only proxmoxReader (adapted to the real proxmox surface;
  no proxmox changes) + a CloudflaredProber. Partial-failure: NodeStatus fail = hard
  (skip POST); per-guest GuestConfig fail = status "unknown", still report.
- Client: Bearer-auth POST, standard TLS (system roots / optional ca_file), typed
  TransportError/HTTPError, token never in errors.
- Loop: immediate first report, adopt hub poll_interval (clamp [60,3600]), resilient
  to collect/report errors, clean ctx-cancel shutdown.
- ControlEnvelope: only poll_interval_seconds acted on; blocked/desired_generation/
  has_signed_ops parsed-but-ignored (slice 4).
- config: HubConfig + FELHOM_AGENT_HUB_* overlay + mode-aware HubConfig.Validate +
  WithDefaults + hub-key redaction; example config updated.
- main: no-selftest mode is now the daemon; added --selftest=hub. Version -> 0.3.0.

Tests: report serialization, client (incl. token-redaction), collector partial-
failure, loop continuation+interval adoption, config. internal/proxmox + internal/
authz untouched.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-08 16:20:09 +02:00
parent f0fee7e193
commit ab77fa3544
16 changed files with 1352 additions and 91 deletions
+55
View File
@@ -36,6 +36,61 @@ func TestValidate(t *testing.T) {
}
}
func TestRedactedMasksHubKey(t *testing.T) {
c := Default()
c.Hub.APIKey = "hub-secret-abcdef"
if got := c.Redacted().Hub.APIKey; got == "hub-secret-abcdef" || got == "" {
t.Fatalf("hub key not masked: %q", got)
}
if !strings.Contains(c.Hub.APIKey, "abcdef") {
t.Error("Redacted mutated the original hub key")
}
}
func TestHubConfigValidate(t *testing.T) {
base := HubConfig{URL: "https://hub.felhom.eu", HostID: "h1", APIKey: "k"}
if err := base.Validate(); err != nil {
t.Fatalf("valid hub config rejected: %v", err)
}
bad := []HubConfig{
{HostID: "h", APIKey: "k"}, // no URL
{URL: "https://x", APIKey: "k"}, // no host
{URL: "https://x", HostID: "h"}, // no key
{URL: "http://hub.felhom.eu", HostID: "h", APIKey: "k"}, // http non-loopback
{URL: "ftp://x", HostID: "h", APIKey: "k"}, // bad scheme
}
for i, h := range bad {
if err := h.Validate(); err == nil {
t.Errorf("case %d: expected validation error for %+v", i, h)
}
}
// http is allowed for loopback (tests).
if err := (HubConfig{URL: "http://127.0.0.1:8443", HostID: "h", APIKey: "k"}).Validate(); err != nil {
t.Errorf("http loopback should be allowed: %v", err)
}
}
func TestHubEnvOverlayAndDefaults(t *testing.T) {
t.Setenv("FELHOM_AGENT_HUB_URL", "https://hub.example")
t.Setenv("FELHOM_AGENT_HUB_HOST_ID", "env-host")
t.Setenv("FELHOM_AGENT_HUB_API_KEY", "env-key")
t.Setenv("FELHOM_AGENT_HUB_POLL_SECONDS", "120")
cfg, err := Load("")
if err != nil {
t.Fatal(err)
}
if cfg.Hub.URL != "https://hub.example" || cfg.Hub.HostID != "env-host" || cfg.Hub.APIKey != "env-key" {
t.Errorf("hub env overlay failed: %+v", cfg.Hub)
}
if cfg.Hub.PollSeconds != 120 {
t.Errorf("poll seconds = %d, want 120", cfg.Hub.PollSeconds)
}
// withDefaults fills zero timeout.
if (HubConfig{}).WithDefaults().TimeoutSeconds != 30 {
t.Error("WithDefaults should set TimeoutSeconds=30")
}
}
func TestLoadFileThenEnvOverride(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "agent.json")