Files
felhom-agent/internal/hub/collect_test.go
T
admin ab77fa3544 feat(hub): host-report client + collector + first daemon loop (slice 3, v0.3.0)
internal/hub: the agent's first daemon — a periodic read-only host-report POSTed to
the hub (the heartbeat; no separate ping).

- HostReport wire contract (shared field-for-field with the hub ingest): host
  metrics, guests (vmid + spec), cloudflared status; storage/backups/restore-tests/
  pbs/audit collections DEFINED but emitted empty (slices 5/6 fill).
- Collector over a read-only proxmoxReader (adapted to the real proxmox surface;
  no proxmox changes) + a CloudflaredProber. Partial-failure: NodeStatus fail = hard
  (skip POST); per-guest GuestConfig fail = status "unknown", still report.
- Client: Bearer-auth POST, standard TLS (system roots / optional ca_file), typed
  TransportError/HTTPError, token never in errors.
- Loop: immediate first report, adopt hub poll_interval (clamp [60,3600]), resilient
  to collect/report errors, clean ctx-cancel shutdown.
- ControlEnvelope: only poll_interval_seconds acted on; blocked/desired_generation/
  has_signed_ops parsed-but-ignored (slice 4).
- config: HubConfig + FELHOM_AGENT_HUB_* overlay + mode-aware HubConfig.Validate +
  WithDefaults + hub-key redaction; example config updated.
- main: no-selftest mode is now the daemon; added --selftest=hub. Version -> 0.3.0.

Tests: report serialization, client (incl. token-redaction), collector partial-
failure, loop continuation+interval adoption, config. internal/proxmox + internal/
authz untouched.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 16:20:09 +02:00

109 lines
3.4 KiB
Go

package hub
import (
"context"
"errors"
"testing"
"gitea.dooplex.hu/admin/felhom-agent/internal/proxmox"
)
func newTestNodeStatus() proxmox.NodeStatus {
var ns proxmox.NodeStatus
ns.CPU = 0.05 // → 5%
ns.Uptime = 86400
ns.LoadAvg = []string{"0.10", "0.20", "0.15"}
ns.Memory.Total = 16000000000
ns.Memory.Used = 4000000000
ns.RootFS.Total = 100000000000
ns.RootFS.Used = 20000000000
return ns
}
func TestCollect_HostAndGuests(t *testing.T) {
px := &fakePx{
node: "demo-felhom",
ns: newTestNodeStatus(),
lxc: []proxmox.Guest{
{VMID: 100, Name: "acme", Status: "running", MaxMem: 2147483648, MaxDisk: 21474836480},
},
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2, Memory: 2048}},
}
c := NewCollector(px, fakeProber{status: "active"}, "demo-host-01", "0.3.0", quietLogger())
r, err := c.Collect(context.Background())
if err != nil {
t.Fatalf("Collect: %v", err)
}
if r.HostID != "demo-host-01" || r.AgentVersion != "0.3.0" {
t.Errorf("top-level wrong: %+v", r)
}
if r.Host.Node != "demo-felhom" || r.Host.CPUPercent != 5 {
t.Errorf("host = %+v", r.Host)
}
if r.Host.MemoryPercent != 25 || r.Host.DiskPercent != 20 {
t.Errorf("percents = mem %v disk %v", r.Host.MemoryPercent, r.Host.DiskPercent)
}
if len(r.Guests) != 1 {
t.Fatalf("guests = %d", len(r.Guests))
}
g := r.Guests[0]
if g.VMID != 100 || g.Status != "running" || g.Spec == nil {
t.Fatalf("guest = %+v", g)
}
if g.Spec.Cores != 2 || g.Spec.MemoryBytes != 2147483648 || g.Spec.DiskBytes != 21474836480 {
t.Errorf("spec = %+v", g.Spec)
}
if r.Cloudflared.Status != "active" {
t.Errorf("cloudflared = %q", r.Cloudflared.Status)
}
}
func TestCollect_GuestConfigFailureDegradesButStillReports(t *testing.T) {
px := &fakePx{
node: "demo-felhom",
ns: newTestNodeStatus(),
lxc: []proxmox.Guest{
{VMID: 100, Name: "ok", Status: "running", MaxMem: 1 << 31, MaxDisk: 1 << 34},
{VMID: 200, Name: "bad", Status: "running"},
},
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
cfgErr: map[int]error{200: errors.New("config read failed")},
}
c := NewCollector(px, fakeProber{status: "active"}, "h", "0.3.0", quietLogger())
r, err := c.Collect(context.Background())
if err != nil {
t.Fatalf("a per-guest failure must NOT fail the whole report: %v", err)
}
if len(r.Guests) != 2 {
t.Fatalf("guests = %d", len(r.Guests))
}
bad := r.Guests[1]
if bad.Status != "unknown" || bad.Spec != nil {
t.Errorf("degraded guest = %+v (want status=unknown, spec=nil)", bad)
}
}
func TestCollect_NodeStatusFailureIsHardError(t *testing.T) {
px := &fakePx{node: "n", nsErr: errors.New("proxmox down")}
c := NewCollector(px, fakeProber{status: "active"}, "h", "0.3.0", quietLogger())
if _, err := c.Collect(context.Background()); err == nil {
t.Fatal("NodeStatus failure must be a hard error (no useful report)")
}
}
func TestCollect_CloudflaredProbeErrorIsUnknown(t *testing.T) {
px := &fakePx{node: "n", ns: newTestNodeStatus()}
c := NewCollector(px, fakeProber{err: errors.New("no systemctl")}, "h", "0.3.0", quietLogger())
r, err := c.Collect(context.Background())
if err != nil {
t.Fatalf("cloudflared failure must not be fatal: %v", err)
}
if r.Cloudflared.Status != "unknown" {
t.Errorf("cloudflared = %q, want unknown", r.Cloudflared.Status)
}
// Empty collections still present as non-nil.
if r.Guests == nil || r.StorageTargets == nil || r.AuditTail == nil {
t.Error("empty collections must be non-nil")
}
}