fix(agent): slice-3 follow-ups — keep run-status on config fail, selftest usage, contract golden (v0.3.1)

- collect: a per-guest GuestConfig failure preserves the ListLXC run-status (only
  spec dropped); empty status normalized to "unknown". Test asserts preserved
  "running" + nil spec.
- main: --selftest usage error now reads (want read|task|hub).
- contract: testdata/host-report.golden.json + TestHostReport_ContractMatchesGolden
  (field-name key-set check vs golden; byte-identical with the hub copy).
- version 0.3.0 -> 0.3.1.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-08 18:29:05 +02:00
parent ab77fa3544
commit e68a7af4d3
7 changed files with 182 additions and 67 deletions
+8 -3
View File
@@ -105,13 +105,18 @@ func (c *Collector) collectGuests(ctx context.Context) []Guest {
guests := make([]Guest, 0, len(lxc))
for _, g := range lxc {
entry := Guest{VMID: g.VMID, Name: g.Name, Status: g.Status, ControllerVersion: ""}
// Normalize an empty run-status to "unknown" so the wire value is always one
// of running|stopped|unknown (matches the hub handler's empty→unknown default).
if entry.Status == "" {
entry.Status = "unknown"
}
// GuestConfig supplies cores; memory/disk come from the list entry (bytes).
// On failure, KEEP the known run-status from ListLXC — only the spec is lost.
cfg, err := c.px.GuestConfig(ctx, g.VMID)
if err != nil {
c.logger.Warn("hub: GuestConfig failed; guest degraded to unknown",
c.logger.Warn("hub: GuestConfig failed; spec omitted (run-status kept)",
"vmid", g.VMID, "err", err)
entry.Status = "unknown"
entry.Spec = nil // omitted
entry.Spec = nil
} else {
entry.Spec = &GuestSpec{
Cores: cfg.Cores,
+9 -4
View File
@@ -58,7 +58,7 @@ func TestCollect_HostAndGuests(t *testing.T) {
}
}
func TestCollect_GuestConfigFailureDegradesButStillReports(t *testing.T) {
func TestCollect_GuestConfigFailureKeepsStatusOmitsSpec(t *testing.T) {
px := &fakePx{
node: "demo-felhom",
ns: newTestNodeStatus(),
@@ -69,7 +69,7 @@ func TestCollect_GuestConfigFailureDegradesButStillReports(t *testing.T) {
cfg: map[int]proxmox.GuestConfig{100: {Cores: 2}},
cfgErr: map[int]error{200: errors.New("config read failed")},
}
c := NewCollector(px, fakeProber{status: "active"}, "h", "0.3.0", quietLogger())
c := NewCollector(px, fakeProber{status: "active"}, "h", "0.3.1", quietLogger())
r, err := c.Collect(context.Background())
if err != nil {
t.Fatalf("a per-guest failure must NOT fail the whole report: %v", err)
@@ -77,9 +77,14 @@ func TestCollect_GuestConfigFailureDegradesButStillReports(t *testing.T) {
if len(r.Guests) != 2 {
t.Fatalf("guests = %d", len(r.Guests))
}
// GuestConfig failed for vmid 200, but its run-status (from ListLXC) is known and
// must be PRESERVED — only the spec is dropped.
bad := r.Guests[1]
if bad.Status != "unknown" || bad.Spec != nil {
t.Errorf("degraded guest = %+v (want status=unknown, spec=nil)", bad)
if bad.Status != "running" {
t.Errorf("status = %q, want preserved \"running\" (not forced to unknown)", bad.Status)
}
if bad.Spec != nil {
t.Errorf("spec = %+v, want nil (omitted on config failure)", bad.Spec)
}
}
+76
View File
@@ -0,0 +1,76 @@
package hub
import (
"encoding/json"
"os"
"reflect"
"sort"
"testing"
)
// The host-report shape is a contract DUPLICATED across two repos (no shared types
// module yet). testdata/host-report.golden.json MUST be kept byte-identical with
// felhom-hub's hub/internal/api/testdata/host-report.golden.json. This test fails
// if a json tag on HostReport/HostMetrics/Guest is renamed/added/removed relative
// to the golden, catching silent drift before slices 5/6 populate the empty
// collections. (Promote to a shared types module when those land.)
func TestHostReport_ContractMatchesGolden(t *testing.T) {
raw, err := os.ReadFile("testdata/host-report.golden.json")
if err != nil {
t.Fatal(err)
}
var golden map[string]any
if err := json.Unmarshal(raw, &golden); err != nil {
t.Fatalf("golden is not valid JSON: %v", err)
}
// A constructed report mirroring the golden's populated shape (guests[0] has spec).
report := &HostReport{
HostID: "demo-host-01", ReportedAt: "2026-06-08T12:00:00Z", AgentVersion: "0.3.1",
Host: HostMetrics{Node: "demo-felhom", LoadAvg: []string{"0.10"}},
Guests: []Guest{
{VMID: 100, Name: "a", Status: "running", ControllerVersion: "", Spec: &GuestSpec{Cores: 2}},
{VMID: 101, Name: "b", Status: "stopped", ControllerVersion: ""},
},
StorageTargets: []StorageTarget{}, Backups: []Backup{}, RestoreTests: []RestoreTest{},
PBSSnapshots: []PBSSnapshot{}, AuditTail: []AuditEntry{},
Cloudflared: Cloudflared{Status: "active"},
}
b, _ := json.Marshal(report)
var got map[string]any
json.Unmarshal(b, &got)
assertSameKeys(t, "<top>", golden, got)
assertSameKeys(t, "host", golden["host"], got["host"])
assertSameKeys(t, "guests[0]",
firstElem(golden["guests"]), firstElem(got["guests"]))
}
func firstElem(v any) any {
arr, ok := v.([]any)
if !ok || len(arr) == 0 {
return map[string]any{}
}
return arr[0]
}
func assertSameKeys(t *testing.T, where string, a, b any) {
t.Helper()
ka, kb := keysOf(a), keysOf(b)
if !reflect.DeepEqual(ka, kb) {
t.Errorf("contract drift at %s:\n golden keys = %v\n struct keys = %v", where, ka, kb)
}
}
func keysOf(v any) []string {
m, ok := v.(map[string]any)
if !ok {
return nil
}
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}
+38
View File
@@ -0,0 +1,38 @@
{
"host_id": "demo-host-01",
"reported_at": "2026-06-08T12:00:00Z",
"agent_version": "0.3.1",
"host": {
"node": "demo-felhom",
"cpu_percent": 3.2,
"memory_total_bytes": 16777216000,
"memory_used_bytes": 4194304000,
"memory_percent": 25,
"disk_total_bytes": 152000000000,
"disk_used_bytes": 30000000000,
"disk_percent": 19.7,
"loadavg": ["0.10", "0.20", "0.15"],
"uptime_seconds": 86400
},
"guests": [
{
"vmid": 100,
"name": "felhom-cust-acme",
"status": "running",
"controller_version": "",
"spec": { "cores": 2, "memory_bytes": 2147483648, "disk_bytes": 21474836480 }
},
{
"vmid": 101,
"name": "felhom-cust-beta",
"status": "stopped",
"controller_version": ""
}
],
"storage_targets": [],
"backups": [],
"restore_tests": [],
"pbs_snapshots": [],
"cloudflared": { "status": "active" },
"audit_tail": []
}