v0.40.0: bootstrap pull+merge onboarding (controller pulls config from hub)

Fix the onboarding 401: instead of seeding controller.yaml from the agent's
HOST hub key (which the hub's customer-scoped /api/v1/report rejects), the
controller now PULLS its full controller.yaml from the hub on first boot using
the bootstrap's retrieval passphrase (yielding the customer-scoped key) and
MERGES in the per-guest local_api block.

- internal/bootstrap: contract v1->v2 (customer.id + hub.url +
  hub.retrieval_password + local_api; drop host key/identity). MaybeIngest gains
  an injected PullFunc (keeps bootstrap free of the heavy report package),
  pulls with bounded transient-only retry, merges local_api at YAML-map level
  (preserves all hub-emitted fields), idempotent + fail-safe + never-crash.
- main.go: wire report.PullConfig as the pull adapter (maps ErrHubUnreachable
  -> ErrPullTransient; auth/not-found permanent).
- Lockstep with felhom-agent v0.19.0.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-11 13:22:37 +02:00
parent b76d8b298c
commit 6a594f9ec2
4 changed files with 347 additions and 132 deletions
+177 -69
View File
@@ -1,133 +1,241 @@
package bootstrap
import (
"errors"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"
"testing"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
)
func testLogger() *log.Logger { return log.New(io.Discard, "", 0) }
const goodBootstrap = `{
"schema": "felhom.bootstrap/v1",
"customer": {"id": "cust-8200", "name": "Teszt", "domain": "cust8200.felhom.eu", "email": "a@b.hu"},
"hub": {"url": "https://hub.felhom.eu", "api_key": "HUBKEY", "host_id": "demo-felhom-01"},
// A valid v2 bootstrap: only customer.id + hub.url + hub.retrieval_password + the per-guest local_api.
const goodBootstrapV2 = `{
"schema": "felhom.bootstrap/v2",
"customer": {"id": "cust-8200"},
"hub": {"url": "https://hub.felhom.eu", "retrieval_password": "five-word-passphrase-here"},
"local_api": {"endpoint": "192.168.0.162:8443", "fingerprint": "ab12", "token": "PERGUESTTOKEN"}
}`
// A present bootstrap on an unconfigured controller seeds controller.yaml and skips setup.
func TestMaybeIngest_SeedsWhenUnconfigured(t *testing.T) {
dir := t.TempDir()
bpath := filepath.Join(dir, "bootstrap.json")
cfgPath := filepath.Join(dir, "controller.yaml")
if err := os.WriteFile(bpath, []byte(goodBootstrap), 0o600); err != nil {
// hubYAML is what the hub's /api/v1/config/{id} returns: a full controller.yaml carrying the
// CUSTOMER-scoped hub key + identity + assets, but NO local_api (the hub can't know per-guest
// Proxmox internals). Includes an unmodeled field (`assets.source_url`) to prove map-level merge
// preserves it.
const hubYAML = `# Felhom Controller Configuration
customer:
id: cust-8200
name: Teszt Ügyfél
domain: cust8200.felhom.eu
email: a@b.hu
hub:
enabled: true
url: https://hub.felhom.eu
api_key: CUSTKEY_FROM_HUB
assets:
source_url: https://hub.felhom.eu/assets
sync_enabled: true
web:
session_secret: deadbeef
`
func writeBootstrap(t *testing.T, dir, content string) (bpath, cfgPath string) {
t.Helper()
bpath = filepath.Join(dir, "bootstrap.json")
cfgPath = filepath.Join(dir, "controller.yaml")
if err := os.WriteFile(bpath, []byte(content), 0o600); err != nil {
t.Fatal(err)
}
t.Setenv("FELHOM_BOOTSTRAP_PATH", bpath)
return bpath, cfgPath
}
got := MaybeIngest(cfgPath, config.Default(), testLogger())
// PULL+MERGE: an unconfigured controller pulls the hub yaml and merges in the per-guest local_api.
// The written controller.yaml must carry BOTH the hub's customer key/identity/assets AND the
// bootstrap's local_api — and must NOT contain a host key.
func TestMaybeIngest_PullsAndMerges(t *testing.T) {
dir := t.TempDir()
_, cfgPath := writeBootstrap(t, dir, goodBootstrapV2)
var calls int
var gotURL, gotID, gotPass string
pull := func(hubURL, customerID, pass string) (string, error) {
calls++
gotURL, gotID, gotPass = hubURL, customerID, pass
return hubYAML, nil
}
got := MaybeIngest(cfgPath, config.Default(), testLogger(), pull)
// pull was called once with the bootstrap's values
if calls != 1 || gotURL != "https://hub.felhom.eu" || gotID != "cust-8200" || gotPass != "five-word-passphrase-here" {
t.Fatalf("pull args wrong: calls=%d url=%q id=%q pass=%q", calls, gotURL, gotID, gotPass)
}
// returned cfg carries the hub's CUSTOMER key + identity (from the pull)
if got.Hub.APIKey != "CUSTKEY_FROM_HUB" || !got.Hub.Enabled || got.Hub.URL != "https://hub.felhom.eu" {
t.Fatalf("hub not from pulled config: %+v", got.Hub)
}
if got.Customer.ID != "cust-8200" || got.Customer.Domain != "cust8200.felhom.eu" {
t.Fatalf("customer not seeded: %+v", got.Customer)
t.Fatalf("customer not from pulled config: %+v", got.Customer)
}
// AND the per-guest local_api merged in from the bootstrap
if got.LocalAPI.Endpoint != "192.168.0.162:8443" || got.LocalAPI.Token != "PERGUESTTOKEN" || got.LocalAPI.Fingerprint != "ab12" {
t.Fatalf("local_api not seeded: %+v", got.LocalAPI)
t.Fatalf("local_api not merged from bootstrap: %+v", got.LocalAPI)
}
if !got.Hub.Enabled || got.Hub.URL != "https://hub.felhom.eu" || got.Hub.APIKey != "HUBKEY" {
t.Fatalf("hub not seeded: %+v", got.Hub)
// unmodeled hub field preserved (forward-compat: map-level merge)
if got.Assets.SourceURL != "https://hub.felhom.eu/assets" {
t.Fatalf("assets.source_url not preserved through merge: %+v", got.Assets)
}
// controller.yaml must now exist on disk (so a restart reads it directly).
if _, err := os.Stat(cfgPath); err != nil {
// the written file must reload configured, carry the customer key, and NOT carry a host key
raw, err := os.ReadFile(cfgPath)
if err != nil {
t.Fatalf("controller.yaml not written: %v", err)
}
// And it must reload as configured (not setup).
s := string(raw)
if !strings.Contains(s, "CUSTKEY_FROM_HUB") {
t.Fatalf("written controller.yaml missing customer key:\n%s", s)
}
if !strings.Contains(s, "PERGUESTTOKEN") || !strings.Contains(s, "192.168.0.162:8443") {
t.Fatalf("written controller.yaml missing merged local_api:\n%s", s)
}
if strings.Contains(s, "host_id") || strings.Contains(s, "HOSTKEY") {
t.Fatalf("written controller.yaml leaked a host key/id:\n%s", s)
}
reloaded, err := config.LoadPermissive(cfgPath)
if err != nil || reloaded.Customer.ID != "cust-8200" {
t.Fatalf("seeded controller.yaml does not reload configured: %v / %+v", err, reloaded.Customer)
if err != nil || reloaded.Customer.ID != "cust-8200" || reloaded.Hub.APIKey != "CUSTKEY_FROM_HUB" {
t.Fatalf("written controller.yaml does not reload configured: %v / %+v", err, reloaded)
}
}
// An already-configured controller is NEVER clobbered (idempotent).
func TestMaybeIngest_DoesNotClobberConfigured(t *testing.T) {
// IDEMPOTENT: an already-configured controller is never clobbered, and pull is NEVER invoked.
func TestMaybeIngest_DoesNotClobberConfigured_NoPull(t *testing.T) {
dir := t.TempDir()
bpath := filepath.Join(dir, "bootstrap.json")
cfgPath := filepath.Join(dir, "controller.yaml")
if err := os.WriteFile(bpath, []byte(goodBootstrap), 0o600); err != nil {
t.Fatal(err)
}
t.Setenv("FELHOM_BOOTSTRAP_PATH", bpath)
_, cfgPath := writeBootstrap(t, dir, goodBootstrapV2)
existing := config.Default()
existing.Customer.ID = "already-here"
existing.Customer.Domain = "existing.felhom.eu"
got := MaybeIngest(cfgPath, existing, testLogger())
pulled := false
pull := func(string, string, string) (string, error) { pulled = true; return hubYAML, nil }
got := MaybeIngest(cfgPath, existing, testLogger(), pull)
if pulled {
t.Fatal("pull was invoked on an already-configured controller")
}
if got.Customer.ID != "already-here" {
t.Fatalf("configured controller was clobbered by bootstrap: %+v", got.Customer)
t.Fatalf("configured controller was clobbered: %+v", got.Customer)
}
if _, err := os.Stat(cfgPath); err == nil {
t.Fatal("controller.yaml was written despite an already-configured controller")
t.Fatal("controller.yaml written despite an already-configured controller")
}
}
// A malformed bootstrap leaves the controller in setup mode (cfg unchanged), no crash.
func TestMaybeIngest_MalformedStaysInSetup(t *testing.T) {
// FAIL-SAFE (transient): a persistently-unreachable hub is retried, then leaves cfg in setup mode
// (no controller.yaml). Asserts the retry count (1 initial + len(pullRetryDelays)).
func TestMaybeIngest_TransientRetriesThenSetup(t *testing.T) {
dir := t.TempDir()
bpath := filepath.Join(dir, "bootstrap.json")
cfgPath := filepath.Join(dir, "controller.yaml")
if err := os.WriteFile(bpath, []byte("{not json"), 0o600); err != nil {
t.Fatal(err)
}
t.Setenv("FELHOM_BOOTSTRAP_PATH", bpath)
_, cfgPath := writeBootstrap(t, dir, goodBootstrapV2)
got := MaybeIngest(cfgPath, config.Default(), testLogger())
// shrink the backoff so the test is fast
orig := pullRetryDelays
pullRetryDelays = []time.Duration{time.Millisecond, time.Millisecond, time.Millisecond}
defer func() { pullRetryDelays = orig }()
calls := 0
pull := func(string, string, string) (string, error) {
calls++
return "", fmt.Errorf("%w: dial tcp: timeout", ErrPullTransient)
}
got := MaybeIngest(cfgPath, config.Default(), testLogger(), pull)
if got.Customer.ID != "" {
t.Fatalf("malformed bootstrap seeded a config: %+v", got.Customer)
t.Fatalf("seeded despite a failing hub pull: %+v", got.Customer)
}
if _, err := os.Stat(cfgPath); err == nil {
t.Fatal("controller.yaml written from malformed bootstrap")
t.Fatal("controller.yaml written despite a failing pull")
}
if want := 1 + len(pullRetryDelays); calls != want {
t.Fatalf("transient retry count: got %d, want %d", calls, want)
}
}
// A bootstrap missing the minimum identity is rejected (stays in setup).
func TestMaybeIngest_MissingIdentityStaysInSetup(t *testing.T) {
// FAIL-SAFE (permanent): an auth/not-found failure is NOT retried (fail fast), setup mode.
func TestMaybeIngest_PermanentNoRetry(t *testing.T) {
dir := t.TempDir()
bpath := filepath.Join(dir, "bootstrap.json")
cfgPath := filepath.Join(dir, "controller.yaml")
if err := os.WriteFile(bpath, []byte(`{"schema":"felhom.bootstrap/v1","local_api":{"endpoint":"x:1"}}`), 0o600); err != nil {
t.Fatal(err)
}
t.Setenv("FELHOM_BOOTSTRAP_PATH", bpath)
_, cfgPath := writeBootstrap(t, dir, goodBootstrapV2)
got := MaybeIngest(cfgPath, config.Default(), testLogger())
calls := 0
pull := func(string, string, string) (string, error) {
calls++
return "", errors.New("authentication failed") // permanent (not wrapped with ErrPullTransient)
}
got := MaybeIngest(cfgPath, config.Default(), testLogger(), pull)
if got.Customer.ID != "" {
t.Fatal("seeded despite missing customer identity")
t.Fatalf("seeded despite a permanent pull failure: %+v", got.Customer)
}
if calls != 1 {
t.Fatalf("permanent failure was retried: %d calls", calls)
}
}
// An absent bootstrap is a no-op (normal setup).
func TestMaybeIngest_AbsentIsNoop(t *testing.T) {
// SCHEMA REJECT: a v1 (or any non-v2) schema is rejected → setup mode, no pull.
func TestMaybeIngest_RejectsNonV2Schema(t *testing.T) {
dir := t.TempDir()
_, cfgPath := writeBootstrap(t, dir, `{"schema":"felhom.bootstrap/v1","customer":{"id":"x"},"hub":{"url":"u","retrieval_password":"p"},"local_api":{"endpoint":"e","fingerprint":"f","token":"t"}}`)
pulled := false
pull := func(string, string, string) (string, error) { pulled = true; return hubYAML, nil }
got := MaybeIngest(cfgPath, config.Default(), testLogger(), pull)
if pulled {
t.Fatal("pull invoked for a non-v2 schema")
}
if got.Customer.ID != "" {
t.Fatal("seeded from a non-v2 schema")
}
}
// MISSING REQUIRED FIELDS: a v2 bootstrap missing the retrieval passphrase (or local_api) is rejected.
func TestMaybeIngest_MissingRequiredStaysInSetup(t *testing.T) {
dir := t.TempDir()
_, cfgPath := writeBootstrap(t, dir, `{"schema":"felhom.bootstrap/v2","customer":{"id":"x"},"hub":{"url":"u"},"local_api":{"endpoint":"e","fingerprint":"f","token":"t"}}`)
pulled := false
pull := func(string, string, string) (string, error) { pulled = true; return hubYAML, nil }
got := MaybeIngest(cfgPath, config.Default(), testLogger(), pull)
if pulled {
t.Fatal("pull invoked despite a missing retrieval_password")
}
if got.Customer.ID != "" {
t.Fatal("seeded despite missing required fields")
}
}
// MALFORMED / ABSENT: never crash, stay in setup, no pull.
func TestMaybeIngest_MalformedAndAbsent(t *testing.T) {
dir := t.TempDir()
pulled := false
pull := func(string, string, string) (string, error) { pulled = true; return hubYAML, nil }
// malformed
_, cfgPath := writeBootstrap(t, dir, "{not json")
if got := MaybeIngest(cfgPath, config.Default(), testLogger(), pull); got.Customer.ID != "" {
t.Fatal("seeded from malformed bootstrap")
}
// absent
t.Setenv("FELHOM_BOOTSTRAP_PATH", filepath.Join(dir, "nope.json"))
got := MaybeIngest(filepath.Join(dir, "controller.yaml"), config.Default(), testLogger())
if got.Customer.ID != "" {
if got := MaybeIngest(filepath.Join(dir, "c2.yaml"), config.Default(), testLogger(), pull); got.Customer.ID != "" {
t.Fatal("seeded with no bootstrap present")
}
}
// An unsupported schema is rejected.
func TestMaybeIngest_UnsupportedSchema(t *testing.T) {
dir := t.TempDir()
bpath := filepath.Join(dir, "bootstrap.json")
if err := os.WriteFile(bpath, []byte(`{"schema":"felhom.bootstrap/v999","customer":{"id":"x","domain":"y"}}`), 0o600); err != nil {
t.Fatal(err)
}
t.Setenv("FELHOM_BOOTSTRAP_PATH", bpath)
got := MaybeIngest(filepath.Join(dir, "controller.yaml"), config.Default(), testLogger())
if got.Customer.ID != "" {
t.Fatal("seeded from an unsupported schema")
if pulled {
t.Fatal("pull invoked for malformed/absent bootstrap")
}
}