e68a7af4d3
- collect: a per-guest GuestConfig failure preserves the ListLXC run-status (only spec dropped); empty status normalized to "unknown". Test asserts preserved "running" + nil spec. - main: --selftest usage error now reads (want read|task|hub). - contract: testdata/host-report.golden.json + TestHostReport_ContractMatchesGolden (field-name key-set check vs golden; byte-identical with the hub copy). - version 0.3.0 -> 0.3.1. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
150 lines
4.7 KiB
Go
150 lines
4.7 KiB
Go
package hub
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-agent/internal/proxmox"
|
|
)
|
|
|
|
// proxmoxReader is the read-only subset the collector needs. Signatures match the
|
|
// REAL internal/proxmox.Client surface (slice 1): the node is held by the Client
|
|
// (no per-call node arg), reads return values (not pointers), and the guest type is
|
|
// proxmox.Guest. (The task's sketch used node-arg/pointer/LXC shapes; adapted to
|
|
// the actual exports per its instruction — no proxmox changes were needed: ListLXC
|
|
// already carries status/maxmem/maxdisk, GuestConfig carries cores.)
|
|
type proxmoxReader interface {
|
|
Node() string
|
|
NodeStatus(ctx context.Context) (proxmox.NodeStatus, error)
|
|
ListLXC(ctx context.Context) ([]proxmox.Guest, error)
|
|
GuestConfig(ctx context.Context, vmid int) (proxmox.GuestConfig, error)
|
|
}
|
|
|
|
// Collector builds a HostReport from read-only sources. All deps are behind narrow
|
|
// interfaces for unit testing.
|
|
type Collector struct {
|
|
px proxmoxReader
|
|
cf CloudflaredProber
|
|
hostID string
|
|
agentVersion string
|
|
logger *slog.Logger
|
|
now func() time.Time
|
|
}
|
|
|
|
// NewCollector builds a collector. hostID echoes config.Hub.HostID; agentVersion is
|
|
// the binary version.
|
|
func NewCollector(px proxmoxReader, cf CloudflaredProber, hostID, agentVersion string, logger *slog.Logger) *Collector {
|
|
if logger == nil {
|
|
logger = slog.Default()
|
|
}
|
|
return &Collector{
|
|
px: px,
|
|
cf: cf,
|
|
hostID: hostID,
|
|
agentVersion: agentVersion,
|
|
logger: logger,
|
|
now: func() time.Time { return time.Now().UTC() },
|
|
}
|
|
}
|
|
|
|
// Collect builds the report. Best-effort liveness: a failed NodeStatus is a hard
|
|
// error (no useful report — the cycle skips the POST); a failed per-guest
|
|
// GuestConfig degrades that guest to status="unknown" without spec but still sends;
|
|
// a cloudflared probe failure yields status="unknown" and is never fatal.
|
|
func (c *Collector) Collect(ctx context.Context) (*HostReport, error) {
|
|
ns, err := c.px.NodeStatus(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("hub: NodeStatus failed (no useful report): %w", err)
|
|
}
|
|
|
|
report := &HostReport{
|
|
HostID: c.hostID,
|
|
ReportedAt: c.now().Format(time.RFC3339),
|
|
AgentVersion: c.agentVersion,
|
|
Host: hostMetrics(c.px.Node(), ns),
|
|
Guests: c.collectGuests(ctx),
|
|
// Defined-but-empty this slice (slices 5/6). Non-nil so they marshal as [].
|
|
StorageTargets: []StorageTarget{},
|
|
Backups: []Backup{},
|
|
RestoreTests: []RestoreTest{},
|
|
PBSSnapshots: []PBSSnapshot{},
|
|
AuditTail: []AuditEntry{},
|
|
Cloudflared: Cloudflared{Status: c.cloudflaredStatus(ctx)},
|
|
}
|
|
return report, nil
|
|
}
|
|
|
|
func hostMetrics(node string, ns proxmox.NodeStatus) HostMetrics {
|
|
h := HostMetrics{
|
|
Node: node,
|
|
CPUPercent: ns.CPU * 100, // PVE cpu is a 0..1 fraction
|
|
MemoryTotalBytes: ns.Memory.Total,
|
|
MemoryUsedBytes: ns.Memory.Used,
|
|
DiskTotalBytes: ns.RootFS.Total,
|
|
DiskUsedBytes: ns.RootFS.Used,
|
|
LoadAvg: ns.LoadAvg,
|
|
UptimeSeconds: ns.Uptime,
|
|
}
|
|
h.MemoryPercent = percent(ns.Memory.Used, ns.Memory.Total)
|
|
h.DiskPercent = percent(ns.RootFS.Used, ns.RootFS.Total)
|
|
if h.LoadAvg == nil {
|
|
h.LoadAvg = []string{}
|
|
}
|
|
return h
|
|
}
|
|
|
|
func (c *Collector) collectGuests(ctx context.Context) []Guest {
|
|
lxc, err := c.px.ListLXC(ctx)
|
|
if err != nil {
|
|
// Not fatal: a report with no guest list still carries host liveness.
|
|
c.logger.Warn("hub: ListLXC failed; reporting no guests", "err", err)
|
|
return []Guest{}
|
|
}
|
|
guests := make([]Guest, 0, len(lxc))
|
|
for _, g := range lxc {
|
|
entry := Guest{VMID: g.VMID, Name: g.Name, Status: g.Status, ControllerVersion: ""}
|
|
// Normalize an empty run-status to "unknown" so the wire value is always one
|
|
// of running|stopped|unknown (matches the hub handler's empty→unknown default).
|
|
if entry.Status == "" {
|
|
entry.Status = "unknown"
|
|
}
|
|
// GuestConfig supplies cores; memory/disk come from the list entry (bytes).
|
|
// On failure, KEEP the known run-status from ListLXC — only the spec is lost.
|
|
cfg, err := c.px.GuestConfig(ctx, g.VMID)
|
|
if err != nil {
|
|
c.logger.Warn("hub: GuestConfig failed; spec omitted (run-status kept)",
|
|
"vmid", g.VMID, "err", err)
|
|
entry.Spec = nil
|
|
} else {
|
|
entry.Spec = &GuestSpec{
|
|
Cores: cfg.Cores,
|
|
MemoryBytes: g.MaxMem,
|
|
DiskBytes: g.MaxDisk,
|
|
}
|
|
}
|
|
guests = append(guests, entry)
|
|
}
|
|
return guests
|
|
}
|
|
|
|
func (c *Collector) cloudflaredStatus(ctx context.Context) string {
|
|
if c.cf == nil {
|
|
return "unknown"
|
|
}
|
|
st, err := c.cf.Status(ctx)
|
|
if err != nil || st == "" {
|
|
c.logger.Warn("hub: cloudflared probe failed", "err", err)
|
|
return "unknown"
|
|
}
|
|
return st
|
|
}
|
|
|
|
func percent(used, total int64) float64 {
|
|
if total <= 0 {
|
|
return 0
|
|
}
|
|
return float64(used) / float64(total) * 100
|
|
}
|