feat(hub): host-report client + collector + first daemon loop (slice 3, v0.3.0)
internal/hub: the agent's first daemon — a periodic read-only host-report POSTed to the hub (the heartbeat; no separate ping). - HostReport wire contract (shared field-for-field with the hub ingest): host metrics, guests (vmid + spec), cloudflared status; storage/backups/restore-tests/ pbs/audit collections DEFINED but emitted empty (slices 5/6 fill). - Collector over a read-only proxmoxReader (adapted to the real proxmox surface; no proxmox changes) + a CloudflaredProber. Partial-failure: NodeStatus fail = hard (skip POST); per-guest GuestConfig fail = status "unknown", still report. - Client: Bearer-auth POST, standard TLS (system roots / optional ca_file), typed TransportError/HTTPError, token never in errors. - Loop: immediate first report, adopt hub poll_interval (clamp [60,3600]), resilient to collect/report errors, clean ctx-cancel shutdown. - ControlEnvelope: only poll_interval_seconds acted on; blocked/desired_generation/ has_signed_ops parsed-but-ignored (slice 4). - config: HubConfig + FELHOM_AGENT_HUB_* overlay + mode-aware HubConfig.Validate + WithDefaults + hub-key redaction; example config updated. - main: no-selftest mode is now the daemon; added --selftest=hub. Version -> 0.3.0. Tests: report serialization, client (incl. token-redaction), collector partial- failure, loop continuation+interval adoption, config. internal/proxmox + internal/ authz untouched. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,144 @@
|
||||
package hub
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func intPtr(i int) *int { return &i }
|
||||
|
||||
type fakeCollector struct {
|
||||
report *HostReport
|
||||
err error
|
||||
n *int32
|
||||
}
|
||||
|
||||
func (c *fakeCollector) Collect(ctx context.Context) (*HostReport, error) {
|
||||
atomic.AddInt32(c.n, 1)
|
||||
return c.report, c.err
|
||||
}
|
||||
|
||||
type fakeReporter struct {
|
||||
env *ControlEnvelope
|
||||
errSeq []error // per-call error (nil = ok); calls past the slice are ok
|
||||
n *int32
|
||||
}
|
||||
|
||||
func (r *fakeReporter) Report(ctx context.Context, _ *HostReport) (*ControlEnvelope, error) {
|
||||
i := int(atomic.AddInt32(r.n, 1) - 1)
|
||||
if i < len(r.errSeq) && r.errSeq[i] != nil {
|
||||
return nil, r.errSeq[i]
|
||||
}
|
||||
return r.env, nil
|
||||
}
|
||||
|
||||
func TestClampInterval(t *testing.T) {
|
||||
cases := []struct {
|
||||
in int
|
||||
wantSec int
|
||||
clamped bool
|
||||
}{
|
||||
{10, 60, true}, {59, 60, true}, {60, 60, false}, {120, 120, false},
|
||||
{3600, 3600, false}, {99999, 3600, true},
|
||||
}
|
||||
for _, c := range cases {
|
||||
d, clamped := clampInterval(c.in)
|
||||
if int(d.Seconds()) != c.wantSec || clamped != c.clamped {
|
||||
t.Errorf("clampInterval(%d) = %v,%v want %ds,%v", c.in, d, clamped, c.wantSec, c.clamped)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoop_CycleAdoptsAndClamps(t *testing.T) {
|
||||
current := 900 * time.Second
|
||||
mk := func(env *ControlEnvelope, collErr, repErr error) *Loop {
|
||||
var cn, rn int32
|
||||
return NewLoop(
|
||||
&fakeCollector{report: &HostReport{}, err: collErr, n: &cn},
|
||||
&fakeReporter{env: env, errSeq: []error{repErr}, n: &rn},
|
||||
current, quietLogger())
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
env *ControlEnvelope
|
||||
want time.Duration
|
||||
}{
|
||||
{"adopt in-range", &ControlEnvelope{PollIntervalSeconds: intPtr(120)}, 120 * time.Second},
|
||||
{"clamp low", &ControlEnvelope{PollIntervalSeconds: intPtr(10)}, 60 * time.Second},
|
||||
{"clamp high", &ControlEnvelope{PollIntervalSeconds: intPtr(99999)}, 3600 * time.Second},
|
||||
{"missing keeps current", &ControlEnvelope{PollIntervalSeconds: nil}, current},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := mk(tt.env, nil, nil).cycle(context.Background(), current)
|
||||
if got != tt.want {
|
||||
t.Errorf("cycle adopted %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("collect error keeps current", func(t *testing.T) {
|
||||
got := mk(&ControlEnvelope{PollIntervalSeconds: intPtr(120)}, errors.New("x"), nil).cycle(context.Background(), current)
|
||||
if got != current {
|
||||
t.Errorf("got %v, want current %v", got, current)
|
||||
}
|
||||
})
|
||||
t.Run("report error keeps current", func(t *testing.T) {
|
||||
got := mk(nil, nil, errors.New("x")).cycle(context.Background(), current)
|
||||
if got != current {
|
||||
t.Errorf("got %v, want current %v", got, current)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestLoop_RunImmediateAndResilientAfterError(t *testing.T) {
|
||||
var cn, rn int32
|
||||
loop := NewLoop(
|
||||
&fakeCollector{report: &HostReport{}, n: &cn},
|
||||
// first report errors; subsequent ok; no interval override (keeps fast tick)
|
||||
&fakeReporter{env: &ControlEnvelope{}, errSeq: []error{errors.New("hub 5xx")}, n: &rn},
|
||||
10*time.Millisecond, quietLogger())
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- loop.Run(ctx) }()
|
||||
time.Sleep(90 * time.Millisecond)
|
||||
cancel()
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
t.Fatalf("Run returned error: %v", err)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("Run did not return after cancel")
|
||||
}
|
||||
// Immediate report + several ticks despite the first error → ≥3 collect calls.
|
||||
if got := atomic.LoadInt32(&cn); got < 3 {
|
||||
t.Errorf("collect calls = %d, want ≥3 (immediate + continuation after error)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoop_RunAdoptsSlowerInterval(t *testing.T) {
|
||||
var cn, rn int32
|
||||
loop := NewLoop(
|
||||
&fakeCollector{report: &HostReport{}, n: &cn},
|
||||
// every report tells the agent to slow to 60s → after the immediate report,
|
||||
// the ticker resets to 60s and no further ticks fire within the test window.
|
||||
&fakeReporter{env: &ControlEnvelope{PollIntervalSeconds: intPtr(60)}, n: &rn},
|
||||
10*time.Millisecond, quietLogger())
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- loop.Run(ctx) }()
|
||||
time.Sleep(120 * time.Millisecond)
|
||||
cancel()
|
||||
<-done
|
||||
|
||||
if got := atomic.LoadInt32(&cn); got != 1 {
|
||||
t.Errorf("collect calls = %d, want 1 (immediate report adopted 60s, ticker slowed)", got)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user