From e61e7dd8fcbfc71c76f3ca7dec0c3355be6d9aa7 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Thu, 11 Jun 2026 18:04:39 +0200 Subject: [PATCH] v0.42.1: wildcard cert via controller route (entrypoint domains don't issue) Empirically (staging on 9201): traefik v3 issues a cert from a router-level tls.domains but NOT from the entrypoint http.tls.domains. So the wildcard moves to RenderControllerRoute (the always-present anchor): when DNS-01 ACME is configured it carries tls.certResolver+domains *.+apex, and every other router serves that wildcard by SNI (no per-app labels). Reverts v0.42.0's dead entrypoint-domains + TraefikData.Domain. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 23 ++++++----- controller/internal/infra/infra.go | 38 +++++++++++++------ controller/internal/infra/infra_test.go | 35 +++++++++++------ .../internal/infra/templates/traefik.yml.tmpl | 9 ----- controller/internal/stacks/infra.go | 5 ++- 5 files changed, 66 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 580a667..75dd0fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,21 +1,24 @@ ## Changelog -### v0.42.0 — real Let's Encrypt cert: wildcard proactive issuance (2026-06-11) +### v0.42.1 — real Let's Encrypt cert: wildcard proactive issuance via the controller route (2026-06-11) The base-infra traefik obtained **no** real cert (acme.json empty) — both routers relied on the websecure entrypoint-default `certResolver`, which does not trigger proactive DNS-01 issuance, so everything ran on traefik's self-signed default (masked externally by the tunnel's `noTLSVerify`). This blocks LAN-direct (a LAN client TLS-handshakes straight to traefik and needs the real cert). -- **`internal/infra/templates/traefik.yml.tmpl`** — the websecure entrypoint's `http.tls` now declares - `domains: [{main: "*.", sans: [""]}]` so traefik **proactively obtains the wildcard - `*.` + apex at startup** (via Cloudflare DNS-01). Every router then serves the real cert by - SNI match — no per-app `certresolver` labels to forget, cert ready before the first client connects. - Gated on `.CFAPIToken` (wildcards require DNS-01; HTTP-01 can't issue them). -- **`infra.TraefikData`** gains a `Domain` field; **`stacks.ensureTraefik`** now wires - `Domain: cfg.Customer.Domain` into `RenderTraefik` (previously unset). -- Validated staging→prod on guest 9201 (Fake LE → real LE), then GATE: `felhom.` + - `files.` return `200 0` (real cert, TLS verify OK) from a real LAN host. +- **`infra.RenderControllerRoute(domain, wildcardTLS)`** — the always-present controller route is now + the **wildcard-issuance anchor**: when DNS-01 ACME is configured it carries router-level + `tls.certResolver: letsencrypt` + `tls.domains: [{main: "*.", sans: [""]}]`, so + traefik **proactively obtains `*.` + apex at startup** via Cloudflare DNS-01. Every other + router (filebrowser, future apps) then serves that one wildcard by SNI match — **no per-app + certresolver labels**, real cert ready before the first client connects. `stacks.wireController` + passes `wildcardTLS = (CFAPIToken != "" && Email != "")`. +- **Empirically established (staging on 9201):** traefik v3 issues from a **router-level** `tls.domains` + but **NOT** from the entrypoint-level `http.tls.domains` (acme.json stayed empty with the latter). The + v0.42.0 attempt (entrypoint `domains` + `TraefikData.Domain`) was reverted accordingly. +- Validated staging→prod on guest 9201 (Fake LE wildcard → real LE wildcard), then GATE: `felhom.` + + `files.` return `200 0` (real wildcard cert, TLS verify OK) direct-to-guest from a real LAN host. ### v0.41.2 — fix controller-route auto-connect + dead dashboard cross-drive block (2026-06-11) diff --git a/controller/internal/infra/infra.go b/controller/internal/infra/infra.go index 0bd16c5..a4f15e9 100644 --- a/controller/internal/infra/infra.go +++ b/controller/internal/infra/infra.go @@ -40,11 +40,11 @@ type FileSpec struct { } // TraefikData is the per-customer input for the traefik stack. ACMEEmail empty → no Let's Encrypt -// (traefik serves self-signed); CFAPIToken empty → HTTP-01 instead of Cloudflare DNS-01, and no .env -// (and no wildcard — HTTP-01 can't issue wildcards). Domain drives the wildcard proactive-issuance -// SAN (`*.` + apex) when DNS-01 is in use. +// (traefik serves self-signed); CFAPIToken empty → HTTP-01 instead of Cloudflare DNS-01, and no .env. +// (Wildcard proactive issuance is driven by the controller route, NOT here — see RenderControllerRoute: +// the entrypoint-level `http.tls.domains` does NOT trigger issuance in traefik v3, a router-level +// `tls.domains` does.) type TraefikData struct { - Domain string ACMEEmail string CFAPIToken string } @@ -167,10 +167,27 @@ networks: // RenderControllerRoute returns a traefik file-provider dynamic config routing the controller's own // dashboard — Host(felhom.) → http://felhom-controller:8080 on websecure. This can only be // produced POST config-pull (the v2 bootstrap.json carries no domain), which is why the controller -// wires its OWN route at bring-up instead of via a static Docker label at bootstrap time. `tls: {}` -// inherits the websecure entrypoint's default certResolver (letsencrypt) when ACME is configured, and -// otherwise falls back to traefik's default self-signed cert. -func RenderControllerRoute(domain string) string { +// wires its OWN route at bring-up instead of via a static Docker label at bootstrap time. +// +// When wildcardTLS is true (DNS-01 ACME configured = CF API token + email), this route is ALSO the +// **wildcard-issuance anchor**: its router-level `tls.domains` makes traefik proactively obtain +// `*.` + apex via Cloudflare DNS-01 at startup. Every other router (filebrowser, future apps) +// then serves that one wildcard by SNI match — no per-app certresolver labels, real cert before the +// first client connects. (Empirically, traefik v3 issues from a router-level `tls.domains` but NOT +// from the entrypoint-level `http.tls.domains` — hence this lives here, not in traefik.yml.) +// When wildcardTLS is false (no DNS-01: HTTP-01 or no ACME — wildcards need DNS-01), it emits a plain +// TLS router (traefik's self-signed default until/unless a cert exists). +func RenderControllerRoute(domain string, wildcardTLS bool) string { + tlsBlock := " tls: {}\n" + if wildcardTLS { + tlsBlock = fmt.Sprintf(` tls: + certResolver: letsencrypt + domains: + - main: "*.%s" + sans: + - "%s" +`, domain, domain) + } return fmt.Sprintf(`# Traefik dynamic route for the felhom-controller dashboard — managed by felhom-controller. # WARNING: auto-generated at base-infra bring-up. Manual edits are overwritten. http: @@ -180,13 +197,12 @@ http: entryPoints: - websecure service: felhom-controller - tls: {} - services: +%s services: felhom-controller: loadBalancer: servers: - url: "http://felhom-controller:8080" -`, domain) +`, domain, tlsBlock) } // RenderFileBrowserConfig returns a FileBrowser Quantum config.yaml with one source per registered diff --git a/controller/internal/infra/infra_test.go b/controller/internal/infra/infra_test.go index 6f70c81..d19d62f 100644 --- a/controller/internal/infra/infra_test.go +++ b/controller/internal/infra/infra_test.go @@ -25,9 +25,9 @@ func allRendered(t *testing.T) []string { t.Helper() var out []string for _, td := range []TraefikData{ - {Domain: "example.com", ACMEEmail: "admin@example.com", CFAPIToken: "cf-api-tok"}, - {Domain: "example.com", ACMEEmail: "admin@example.com"}, // email, no CF token → HTTP-01 - {Domain: "example.com"}, // token-less / LAN-only + {ACMEEmail: "admin@example.com", CFAPIToken: "cf-api-tok"}, + {ACMEEmail: "admin@example.com"}, // email, no CF token → HTTP-01 + {}, // token-less / LAN-only } { files, err := RenderTraefik(td) if err != nil { @@ -64,7 +64,7 @@ func TestNoLatestTagSurvives(t *testing.T) { } func TestTraefikWithCloudflareToken(t *testing.T) { - files, err := RenderTraefik(TraefikData{Domain: "example.com", ACMEEmail: "admin@example.com", CFAPIToken: "cf-api-tok"}) + files, err := RenderTraefik(TraefikData{ACMEEmail: "admin@example.com", CFAPIToken: "cf-api-tok"}) if err != nil { t.Fatal(err) } @@ -75,9 +75,9 @@ func TestTraefikWithCloudflareToken(t *testing.T) { if !strings.Contains(yml, "dnsChallenge") || !strings.Contains(yml, "provider: cloudflare") { t.Error("expected Cloudflare DNS-01 challenge when CF API token set") } - // Wildcard proactive issuance (DNS-01 path): the entrypoint must request *. + apex. - if !strings.Contains(yml, `main: "*.example.com"`) || !strings.Contains(yml, `- "example.com"`) { - t.Errorf("expected wildcard domains block (*.example.com + apex) on the DNS-01 path:\n%s", yml) + // The wildcard is NOT in traefik.yml — the entrypoint-level domains doesn't trigger issuance. + if strings.Contains(yml, "domains:") { + t.Error("traefik.yml must not carry the entrypoint domains block (proven not to issue)") } if strings.Contains(yml, "httpChallenge") { t.Error("HTTP-01 must NOT appear when a CF API token is set") @@ -121,9 +121,6 @@ func TestTraefikEmailNoCloudflareToken(t *testing.T) { if strings.Contains(yml, "dnsChallenge") { t.Error("DNS-01 must NOT appear without a CF token") } - if strings.Contains(yml, "main: \"*.") { - t.Error("wildcard domains block must NOT appear on the HTTP-01 path (wildcards need DNS-01)") - } if _, ok := files[".env"]; ok { t.Error("no .env should be emitted without a CF API token") } @@ -170,7 +167,8 @@ func TestCloudflaredRender(t *testing.T) { } func TestControllerRoute(t *testing.T) { - r := RenderControllerRoute("demo-felhom.eu") + // Wildcard path (DNS-01 ACME): the route anchors *. + apex proactive issuance. + r := RenderControllerRoute("demo-felhom.eu", true) if !strings.Contains(r, "Host(`felhom.demo-felhom.eu`)") { t.Errorf("domain not wired into controller route rule: %q", r) } @@ -180,9 +178,22 @@ func TestControllerRoute(t *testing.T) { if !strings.Contains(r, "websecure") { t.Error("controller route must be on the websecure entrypoint") } + if !strings.Contains(r, "certResolver: letsencrypt") || + !strings.Contains(r, `main: "*.demo-felhom.eu"`) || !strings.Contains(r, `- "demo-felhom.eu"`) { + t.Errorf("wildcard issuance anchor missing on the DNS-01 controller route:\n%s", r) + } var v any if err := yaml.Unmarshal([]byte(r), &v); err != nil { - t.Fatalf("controller route is not valid YAML: %v\n%s", err, r) + t.Fatalf("controller route (wildcard) is not valid YAML: %v\n%s", err, r) + } + + // Non-ACME path: plain TLS, no resolver/domains, still valid YAML. + plain := RenderControllerRoute("demo-felhom.eu", false) + if strings.Contains(plain, "certResolver") || strings.Contains(plain, "domains:") { + t.Errorf("non-ACME route must not carry certResolver/domains:\n%s", plain) + } + if err := yaml.Unmarshal([]byte(plain), &v); err != nil { + t.Fatalf("controller route (plain) is not valid YAML: %v\n%s", err, plain) } } diff --git a/controller/internal/infra/templates/traefik.yml.tmpl b/controller/internal/infra/templates/traefik.yml.tmpl index d777ca6..fd00199 100644 --- a/controller/internal/infra/templates/traefik.yml.tmpl +++ b/controller/internal/infra/templates/traefik.yml.tmpl @@ -19,15 +19,6 @@ entryPoints: http: tls: certResolver: letsencrypt -{{- if .CFAPIToken}} - # Wildcard proactive issuance (DNS-01 only — HTTP-01 can't do wildcards): traefik obtains - # *. (+ apex) at startup, so every router serves the real cert by SNI match with no - # per-app labels and the cert is ready before the first client connects. - domains: - - main: "*.{{.Domain}}" - sans: - - "{{.Domain}}" -{{- end}} {{- end}} providers: diff --git a/controller/internal/stacks/infra.go b/controller/internal/stacks/infra.go index 47e50ba..1894470 100644 --- a/controller/internal/stacks/infra.go +++ b/controller/internal/stacks/infra.go @@ -92,7 +92,6 @@ func (m *Manager) ensureTraefik(dir string) error { return fmt.Errorf("chmod acme.json: %w", err) } files, err := infra.RenderTraefik(infra.TraefikData{ - Domain: m.cfg.Customer.Domain, ACMEEmail: m.cfg.Customer.Email, CFAPIToken: m.cfg.Infrastructure.CFAPIToken, }) @@ -170,7 +169,9 @@ func (m *Manager) wireController(traefikDir string) error { return fmt.Errorf("mkdir dynamic: %w", err) } routePath := filepath.Join(dynDir, "controller.yml") - want := infra.RenderControllerRoute(domain) + // DNS-01 ACME configured (CF token + email) → this route anchors wildcard proactive issuance. + wildcardTLS := m.cfg.Infrastructure.CFAPIToken != "" && m.cfg.Customer.Email != "" + want := infra.RenderControllerRoute(domain, wildcardTLS) if cur, err := os.ReadFile(routePath); err != nil || string(cur) != want { if err := os.WriteFile(routePath, []byte(want), 0o644); err != nil { return fmt.Errorf("write controller route: %w", err)