feat(hub): host-report client + collector + first daemon loop (slice 3, v0.3.0)
internal/hub: the agent's first daemon — a periodic read-only host-report POSTed to the hub (the heartbeat; no separate ping). - HostReport wire contract (shared field-for-field with the hub ingest): host metrics, guests (vmid + spec), cloudflared status; storage/backups/restore-tests/ pbs/audit collections DEFINED but emitted empty (slices 5/6 fill). - Collector over a read-only proxmoxReader (adapted to the real proxmox surface; no proxmox changes) + a CloudflaredProber. Partial-failure: NodeStatus fail = hard (skip POST); per-guest GuestConfig fail = status "unknown", still report. - Client: Bearer-auth POST, standard TLS (system roots / optional ca_file), typed TransportError/HTTPError, token never in errors. - Loop: immediate first report, adopt hub poll_interval (clamp [60,3600]), resilient to collect/report errors, clean ctx-cancel shutdown. - ControlEnvelope: only poll_interval_seconds acted on; blocked/desired_generation/ has_signed_ops parsed-but-ignored (slice 4). - config: HubConfig + FELHOM_AGENT_HUB_* overlay + mode-aware HubConfig.Validate + WithDefaults + hub-key redaction; example config updated. - main: no-selftest mode is now the daemon; added --selftest=hub. Version -> 0.3.0. Tests: report serialization, client (incl. token-redaction), collector partial- failure, loop continuation+interval adoption, config. internal/proxmox + internal/ authz untouched. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
package hub
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"gitea.dooplex.hu/admin/felhom-agent/internal/proxmox"
|
||||
)
|
||||
|
||||
func newTestNodeStatus() proxmox.NodeStatus {
|
||||
var ns proxmox.NodeStatus
|
||||
ns.CPU = 0.05 // → 5%
|
||||
ns.Uptime = 86400
|
||||
ns.LoadAvg = []string{"0.10", "0.20", "0.15"}
|
||||
ns.Memory.Total = 16000000000
|
||||
ns.Memory.Used = 4000000000
|
||||
ns.RootFS.Total = 100000000000
|
||||
ns.RootFS.Used = 20000000000
|
||||
return ns
|
||||
}
|
||||
|
||||
func TestCollect_HostAndGuests(t *testing.T) {
|
||||
px := &fakePx{
|
||||
node: "demo-felhom",
|
||||
ns: newTestNodeStatus(),
|
||||
lxc: []proxmox.Guest{
|
||||
{VMID: 100, Name: "acme", Status: "running", MaxMem: 2147483648, MaxDisk: 21474836480},
|
||||
},
|
||||
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2, Memory: 2048}},
|
||||
}
|
||||
c := NewCollector(px, fakeProber{status: "active"}, "demo-host-01", "0.3.0", quietLogger())
|
||||
r, err := c.Collect(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("Collect: %v", err)
|
||||
}
|
||||
if r.HostID != "demo-host-01" || r.AgentVersion != "0.3.0" {
|
||||
t.Errorf("top-level wrong: %+v", r)
|
||||
}
|
||||
if r.Host.Node != "demo-felhom" || r.Host.CPUPercent != 5 {
|
||||
t.Errorf("host = %+v", r.Host)
|
||||
}
|
||||
if r.Host.MemoryPercent != 25 || r.Host.DiskPercent != 20 {
|
||||
t.Errorf("percents = mem %v disk %v", r.Host.MemoryPercent, r.Host.DiskPercent)
|
||||
}
|
||||
if len(r.Guests) != 1 {
|
||||
t.Fatalf("guests = %d", len(r.Guests))
|
||||
}
|
||||
g := r.Guests[0]
|
||||
if g.VMID != 100 || g.Status != "running" || g.Spec == nil {
|
||||
t.Fatalf("guest = %+v", g)
|
||||
}
|
||||
if g.Spec.Cores != 2 || g.Spec.MemoryBytes != 2147483648 || g.Spec.DiskBytes != 21474836480 {
|
||||
t.Errorf("spec = %+v", g.Spec)
|
||||
}
|
||||
if r.Cloudflared.Status != "active" {
|
||||
t.Errorf("cloudflared = %q", r.Cloudflared.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollect_GuestConfigFailureDegradesButStillReports(t *testing.T) {
|
||||
px := &fakePx{
|
||||
node: "demo-felhom",
|
||||
ns: newTestNodeStatus(),
|
||||
lxc: []proxmox.Guest{
|
||||
{VMID: 100, Name: "ok", Status: "running", MaxMem: 1 << 31, MaxDisk: 1 << 34},
|
||||
{VMID: 200, Name: "bad", Status: "running"},
|
||||
},
|
||||
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
|
||||
cfgErr: map[int]error{200: errors.New("config read failed")},
|
||||
}
|
||||
c := NewCollector(px, fakeProber{status: "active"}, "h", "0.3.0", quietLogger())
|
||||
r, err := c.Collect(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("a per-guest failure must NOT fail the whole report: %v", err)
|
||||
}
|
||||
if len(r.Guests) != 2 {
|
||||
t.Fatalf("guests = %d", len(r.Guests))
|
||||
}
|
||||
bad := r.Guests[1]
|
||||
if bad.Status != "unknown" || bad.Spec != nil {
|
||||
t.Errorf("degraded guest = %+v (want status=unknown, spec=nil)", bad)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollect_NodeStatusFailureIsHardError(t *testing.T) {
|
||||
px := &fakePx{node: "n", nsErr: errors.New("proxmox down")}
|
||||
c := NewCollector(px, fakeProber{status: "active"}, "h", "0.3.0", quietLogger())
|
||||
if _, err := c.Collect(context.Background()); err == nil {
|
||||
t.Fatal("NodeStatus failure must be a hard error (no useful report)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollect_CloudflaredProbeErrorIsUnknown(t *testing.T) {
|
||||
px := &fakePx{node: "n", ns: newTestNodeStatus()}
|
||||
c := NewCollector(px, fakeProber{err: errors.New("no systemctl")}, "h", "0.3.0", quietLogger())
|
||||
r, err := c.Collect(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("cloudflared failure must not be fatal: %v", err)
|
||||
}
|
||||
if r.Cloudflared.Status != "unknown" {
|
||||
t.Errorf("cloudflared = %q, want unknown", r.Cloudflared.Status)
|
||||
}
|
||||
// Empty collections still present as non-nil.
|
||||
if r.Guests == nil || r.StorageTargets == nil || r.AuditTail == nil {
|
||||
t.Error("empty collections must be non-nil")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user