diff --git a/CHANGELOG.md b/CHANGELOG.md index 0253a18..f5487b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ ## Changelog +### v0.41.1 — wire the controller dashboard into traefik (`felhom.` routing) (2026-06-11) + +Completes v0.41.0: the base-infra bring-up stood up traefik/cloudflared/filebrowser but nothing routed +the **controller itself** through traefik, so `felhom.` 404'd (live-confirmed: controller on +`bridge` only, no traefik labels, empty `dynamic/`). filebrowser self-registers via Docker labels + +network membership baked into its compose; the controller can't — it's started by the golden bootstrap +*before* `traefik-public` exists, and the v2 `bootstrap.json` carries no domain (it comes from the hub +pull). So the wiring must happen post-pull. + +- `infra.RenderControllerRoute(domain)` — a traefik file-provider dynamic route: + `Host(felhom.)` → `http://felhom-controller:8080` on websecure (`tls: {}` inherits the + entrypoint's default `letsencrypt` resolver when ACME is configured, else self-signed). +- `EnsureBaseStack` now calls `wireController`: writes `dynamic/controller.yml` (write-if-changed, so the + traefik file watcher doesn't reload every health tick) and `docker network connect traefik-public + felhom-controller` (idempotent — skipped when already attached) so traefik can resolve the controller + by name. Runs on first boot and every self-heal tick. The Section-G shared `/opt/docker/stacks` mount + means traefik picks up the dynamic file live. +- Diagnostic confirmed the tunnel chain was already healthy (token tunnel-id matches the DNS tunnel; + CF ingress `*. → https://traefik`); the only gap was this controller wiring. + ### v0.41.0 — first-boot base-infrastructure bring-up + self-heal (+ Section-G mount fix) (2026-06-11) Lockstep with `felhom-agent` v0.20.0 + a golden rebake. A freshly-onboarded controller came up ONLINE diff --git a/controller/internal/infra/infra.go b/controller/internal/infra/infra.go index eb1b535..0759857 100644 --- a/controller/internal/infra/infra.go +++ b/controller/internal/infra/infra.go @@ -161,6 +161,31 @@ networks: `, domain, FileBrowserImage, storageSection, domain) } +// RenderControllerRoute returns a traefik file-provider dynamic config routing the controller's own +// dashboard — Host(felhom.) → http://felhom-controller:8080 on websecure. This can only be +// produced POST config-pull (the v2 bootstrap.json carries no domain), which is why the controller +// wires its OWN route at bring-up instead of via a static Docker label at bootstrap time. `tls: {}` +// inherits the websecure entrypoint's default certResolver (letsencrypt) when ACME is configured, and +// otherwise falls back to traefik's default self-signed cert. +func RenderControllerRoute(domain string) string { + return fmt.Sprintf(`# Traefik dynamic route for the felhom-controller dashboard — managed by felhom-controller. +# WARNING: auto-generated at base-infra bring-up. Manual edits are overwritten. +http: + routers: + felhom-controller: + rule: "Host(`+"`"+`felhom.%s`+"`"+`)" + entryPoints: + - websecure + service: felhom-controller + tls: {} + services: + felhom-controller: + loadBalancer: + servers: + - url: "http://felhom-controller:8080" +`, domain) +} + // RenderFileBrowserConfig returns a FileBrowser Quantum config.yaml with one source per registered // storage path (each a named sidebar entry). Empty paths → a single default /srv source. Ported // verbatim from internal/web/handlers.go. diff --git a/controller/internal/infra/infra_test.go b/controller/internal/infra/infra_test.go index cdd2c79..e68bb26 100644 --- a/controller/internal/infra/infra_test.go +++ b/controller/internal/infra/infra_test.go @@ -162,6 +162,23 @@ func TestCloudflaredRender(t *testing.T) { } } +func TestControllerRoute(t *testing.T) { + r := RenderControllerRoute("demo-felhom.eu") + if !strings.Contains(r, "Host(`felhom.demo-felhom.eu`)") { + t.Errorf("domain not wired into controller route rule: %q", r) + } + if !strings.Contains(r, "http://felhom-controller:8080") { + t.Errorf("controller service URL missing: %q", r) + } + if !strings.Contains(r, "websecure") { + t.Error("controller route must be on the websecure entrypoint") + } + var v any + if err := yaml.Unmarshal([]byte(r), &v); err != nil { + t.Fatalf("controller route is not valid YAML: %v\n%s", err, r) + } +} + func TestFileBrowserRender(t *testing.T) { compose := RenderFileBrowserCompose("demo-felhom.eu", nil) if !strings.Contains(compose, "Host(`files.demo-felhom.eu`)") { diff --git a/controller/internal/stacks/infra.go b/controller/internal/stacks/infra.go index 493d570..d1cd90a 100644 --- a/controller/internal/stacks/infra.go +++ b/controller/internal/stacks/infra.go @@ -36,12 +36,22 @@ func (m *Manager) EnsureBaseStack() error { } base := m.cfg.Paths.StacksDir + traefikDir := filepath.Join(base, "traefik") var errs []string - if err := m.ensureTraefik(filepath.Join(base, "traefik")); err != nil { + if err := m.ensureTraefik(traefikDir); err != nil { errs = append(errs, fmt.Sprintf("traefik: %v", err)) } + // Wire the controller's OWN dashboard route into traefik. Unlike filebrowser (which self-registers + // via Docker labels + network membership baked into its compose), the controller is started by the + // golden bootstrap before traefik-public exists and the v2 bootstrap carries no domain — so it can't + // self-label. We do it here, post-pull, where the domain is known: drop a file-provider route and + // join the controller to traefik-public so traefik can resolve felhom-controller:8080. + if err := m.wireController(traefikDir); err != nil { + errs = append(errs, fmt.Sprintf("controller-route: %v", err)) + } + if m.cfg.Infrastructure.CFTunnelToken != "" { if err := m.ensureCloudflared(filepath.Join(base, "cloudflared")); err != nil { errs = append(errs, fmt.Sprintf("cloudflared: %v", err)) @@ -137,6 +147,57 @@ func (m *Manager) ensureFileBrowser(dir string) error { return m.composeUp(dir) } +// controllerContainer is the fixed name of the in-guest controller container (set by the golden +// bootstrap `docker run --name`). traefik resolves it by this name once both share traefik-public. +const controllerContainer = "felhom-controller" + +// wireController makes the controller dashboard reachable through traefik: it writes the file-provider +// route (Host(felhom.) → http://felhom-controller:8080) and connects the controller container +// to traefik-public. Both are idempotent — the route is written only when its content changes (so the +// traefik file watcher doesn't reload every health tick), and the network connect is skipped when the +// controller is already attached. Domain is required (it comes from the hub pull); a missing domain is +// a no-op (logged) rather than an error. +func (m *Manager) wireController(traefikDir string) error { + domain := m.cfg.Customer.Domain + if domain == "" { + m.logger.Printf("[WARN] [infra] controller route skipped — no customer domain configured") + return nil + } + + dynDir := filepath.Join(traefikDir, "dynamic") + if err := os.MkdirAll(dynDir, 0o755); err != nil { + return fmt.Errorf("mkdir dynamic: %w", err) + } + routePath := filepath.Join(dynDir, "controller.yml") + want := infra.RenderControllerRoute(domain) + if cur, err := os.ReadFile(routePath); err != nil || string(cur) != want { + if err := os.WriteFile(routePath, []byte(want), 0o644); err != nil { + return fmt.Errorf("write controller route: %w", err) + } + m.logger.Printf("[INFO] [infra] wrote controller route → %s (Host felhom.%s → felhom-controller:8080)", routePath, domain) + } + + if !containerOnNetwork(controllerContainer, traefikNetwork) { + out, err := exec.Command("docker", "network", "connect", traefikNetwork, controllerContainer).CombinedOutput() + if err != nil && !strings.Contains(string(out), "already exists") { + return fmt.Errorf("network connect %s: %s: %w", controllerContainer, strings.TrimSpace(string(out)), err) + } + m.logger.Printf("[INFO] [infra] connected %s to %s", controllerContainer, traefikNetwork) + } + return nil +} + +// containerOnNetwork reports whether the named container is attached to the given docker network. +func containerOnNetwork(name, network string) bool { + out, err := exec.Command("docker", "inspect", "--format", + fmt.Sprintf("{{index .NetworkSettings.Networks %q}}", network), name).Output() + if err != nil { + return false + } + s := strings.TrimSpace(string(out)) + return s != "" && s != "" +} + // ensureTraefikNetwork creates the external traefik-public docker network if absent (idempotent; // tolerates a create/inspect race). Uses the docker CLI directly — it's a network op, not compose. func (m *Manager) ensureTraefikNetwork() error {