v0.41.0: first-boot base-infra bring-up + self-heal (+ Section-G mount fix)

New internal/infra package renders traefik/cloudflared/filebrowser from config
(pinned images, single source of truth; web filebrowser path delegates here).
stacks.EnsureBaseStack deploys the traefik-public network + the three stacks,
single-flight + idempotent + non-fatal; wired to first boot and every health
tick. monitor.EffectiveProtected drops cloudflared when no tunnel token.
Section-G fix lives in felhom-agent build-golden.sh (same-path stacks bind).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-11 14:56:42 +02:00
parent ba0e1eb04a
commit abbd9488c6
13 changed files with 873 additions and 111 deletions
+221
View File
@@ -0,0 +1,221 @@
// Package infra renders the base-infrastructure stacks (traefik, cloudflared, filebrowser) from the
// controller's config. It is PURE: templates in, file contents out — no docker, no filesystem, no IO.
// The orchestration (write the files, create the network, compose-up) lives in
// internal/stacks/infra.go (EnsureBaseStack), which owns the side effects.
//
// The templates are lifted verbatim from scripts/docker-setup.sh (the bare-metal installer, the
// historical source of truth for these stacks); bash `${VAR}` became Go template `{{.Field}}` and the
// heredoc conditionals became `{{if}}`. Image tags are PINNED here as the single source of truth — the
// web FileBrowser sync path (internal/web/handlers.go) delegates here so the pins can never diverge.
package infra
import (
"embed"
"fmt"
"path/filepath"
"strings"
"text/template"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
)
// Pinned image tags — NEVER ":latest" (a floating tag breaks reproducible golden bakes and lets the
// deployed version drift). Verified to resolve on Docker Hub before baking.
const (
TraefikImage = "traefik:v3.6.7"
CloudflaredImage = "cloudflare/cloudflared:2026.6.0"
FileBrowserImage = "gtstef/filebrowser:1.3.3-stable"
)
//go:embed templates/*.tmpl
var templateFS embed.FS
var tmpl = template.Must(template.New("infra").ParseFS(templateFS, "templates/*.tmpl"))
// FileSpec is one rendered file: its content and the mode it must be written with. The mode matters —
// the traefik .env carries the Cloudflare API token (0600), the rest are world-readable config (0644).
type FileSpec struct {
Content string
Mode uint32 // os.FileMode bits (e.g. 0o600); uint32 keeps this package IO-free
}
// TraefikData is the per-customer input for the traefik stack. ACMEEmail empty → no Let's Encrypt
// (traefik serves self-signed); CFAPIToken empty → HTTP-01 instead of Cloudflare DNS-01, and no .env.
type TraefikData struct {
ACMEEmail string
CFAPIToken string
}
type traefikTmpl struct {
TraefikData
Image string
}
// CloudflaredData is the per-customer input for the cloudflared stack (just the tunnel token).
type CloudflaredData struct {
CFTunnelToken string
}
type cloudflaredTmpl struct {
CloudflaredData
Image string
}
func render(name string, data any) (string, error) {
var b strings.Builder
if err := tmpl.ExecuteTemplate(&b, name, data); err != nil {
return "", fmt.Errorf("render %s: %w", name, err)
}
return b.String(), nil
}
// RenderTraefik returns the traefik stack files: traefik.yml (static config), docker-compose.yml, and
// — only when a Cloudflare API token is set — a 0600 .env carrying CF_DNS_API_TOKEN (kept out of the
// compose file). The orchestrator additionally creates dynamic/, certs/ and an empty 0600 acme.json.
func RenderTraefik(d TraefikData) (map[string]FileSpec, error) {
td := traefikTmpl{TraefikData: d, Image: TraefikImage}
yml, err := render("traefik.yml.tmpl", td)
if err != nil {
return nil, err
}
compose, err := render("traefik-compose.yml.tmpl", td)
if err != nil {
return nil, err
}
files := map[string]FileSpec{
"traefik.yml": {Content: yml, Mode: 0o644},
"docker-compose.yml": {Content: compose, Mode: 0o644},
}
if d.CFAPIToken != "" {
env := fmt.Sprintf("# Cloudflare API token for Let's Encrypt DNS-01 challenge (Zone:DNS:Edit).\n"+
"# Managed by felhom-controller — do not edit.\nCF_DNS_API_TOKEN=%s\n", d.CFAPIToken)
files[".env"] = FileSpec{Content: env, Mode: 0o600}
}
return files, nil
}
// RenderCloudflared returns the cloudflared stack files (compose only — no bind mounts; the tunnel
// token is the entire config). Caller deploys this only when a tunnel token is configured.
func RenderCloudflared(d CloudflaredData) (map[string]FileSpec, error) {
cd := cloudflaredTmpl{CloudflaredData: d, Image: CloudflaredImage}
compose, err := render("cloudflared-compose.yml.tmpl", cd)
if err != nil {
return nil, err
}
return map[string]FileSpec{
"docker-compose.yml": {Content: compose, Mode: 0o644},
}, nil
}
// RenderFileBrowserCompose returns FileBrowser's docker-compose.yml for the given domain and storage
// volume-mount lines. Ported verbatim from internal/web/handlers.go (the single source of truth now
// lives here so the pinned image can't diverge between bring-up and the web storage-sync path).
func RenderFileBrowserCompose(domain string, storageMounts []string) string {
storageSection := ""
if len(storageMounts) > 0 {
storageSection = "\n # Storage paths (auto-generated by felhom-controller)\n" +
strings.Join(storageMounts, "\n")
}
return fmt.Sprintf(`# FileBrowser Quantum — Infrastructure file manager
# Domain: files.%s
# Managed by felhom-controller. WARNING: Volume mounts are auto-generated; manual edits are overwritten.
services:
filebrowser:
image: %s
container_name: filebrowser
restart: unless-stopped
environment:
- TZ=Europe/Budapest
- FILEBROWSER_CONFIG=/home/filebrowser/config.yaml
volumes:
- filebrowser_data:/home/filebrowser/data
- ./config.yaml:/home/filebrowser/config.yaml:ro%s
networks:
- traefik-public
deploy:
resources:
limits:
memory: 256M
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:80/"]
interval: 30s
timeout: 5s
retries: 3
start_period: 15s
labels:
- "traefik.enable=true"
- "traefik.http.routers.filebrowser.rule=Host(`+"`"+`files.%s`+"`"+`)"
- "traefik.http.routers.filebrowser.entrypoints=websecure"
- "traefik.http.routers.filebrowser.tls=true"
- "traefik.http.services.filebrowser.loadbalancer.server.port=80"
- "traefik.docker.network=traefik-public"
volumes:
filebrowser_data:
networks:
traefik-public:
external: true
`, domain, FileBrowserImage, storageSection, domain)
}
// RenderFileBrowserConfig returns a FileBrowser Quantum config.yaml with one source per registered
// storage path (each a named sidebar entry). Empty paths → a single default /srv source. Ported
// verbatim from internal/web/handlers.go.
func RenderFileBrowserConfig(paths []settings.StoragePath) string {
var sources string
if len(paths) == 0 {
sources = ` - path: "/srv"
`
} else {
for _, sp := range paths {
mountName := filepath.Base(sp.Path)
label := sp.Label
if label == "" {
label = mountName
}
sources += fmt.Sprintf(" - path: \"/srv/%s\"\n name: %q\n config:\n defaultEnabled: true\n", mountName, label)
}
}
return fmt.Sprintf(`# FileBrowser Quantum — managed by felhom-controller
# WARNING: This file is auto-generated. Manual edits will be overwritten.
server:
port: 80
baseURL: "/"
database: "/home/filebrowser/data/database.db"
logging:
- levels: "info|warning|error"
sources:
%suserDefaults:
stickySidebar: true
darkMode: true
viewMode: "normal"
showHidden: false
dateFormat: false
gallerySize: 3
themeColor: "var(--blue)"
preview:
disableHideSidebar: false
highQuality: true
image: true
video: true
motionVideoPreview: true
office: true
popup: true
autoplayMedia: true
folder: true
permissions:
api: false
admin: false
modify: false
share: false
realtime: false
delete: false
create: false
download: true
`, sources)
}
+191
View File
@@ -0,0 +1,191 @@
package infra
import (
"strings"
"testing"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gopkg.in/yaml.v3"
)
// TestRenderedYAMLParses guards against template-whitespace indentation bugs: every rendered
// compose/config/static-config must be well-formed YAML across the token matrix.
func TestRenderedYAMLParses(t *testing.T) {
for i, s := range allRendered(t) {
var v any
if err := yaml.Unmarshal([]byte(s), &v); err != nil {
t.Fatalf("rendered output #%d is not valid YAML: %v\n---\n%s", i, err, s)
}
}
}
// allComposeStrings renders every compose/config we emit, across the token/token-less matrix, so a
// single ":latest" anywhere is caught.
func allRendered(t *testing.T) []string {
t.Helper()
var out []string
for _, td := range []TraefikData{
{ACMEEmail: "admin@example.com", CFAPIToken: "cf-api-tok"},
{ACMEEmail: "admin@example.com"}, // email, no CF token → HTTP-01
{}, // token-less / LAN-only
} {
files, err := RenderTraefik(td)
if err != nil {
t.Fatalf("RenderTraefik(%+v): %v", td, err)
}
for _, f := range files {
out = append(out, f.Content)
}
}
cf, err := RenderCloudflared(CloudflaredData{CFTunnelToken: "tunnel-tok"})
if err != nil {
t.Fatalf("RenderCloudflared: %v", err)
}
for _, f := range cf {
out = append(out, f.Content)
}
out = append(out, RenderFileBrowserCompose("example.com", nil))
out = append(out, RenderFileBrowserCompose("example.com", []string{" - /mnt/hdd_1:/srv/hdd_1"}))
out = append(out, RenderFileBrowserConfig(nil))
return out
}
func TestNoLatestTagSurvives(t *testing.T) {
for _, c := range []string{TraefikImage, CloudflaredImage, FileBrowserImage} {
if strings.HasSuffix(c, ":latest") || !strings.Contains(c, ":") {
t.Fatalf("image constant is not pinned: %q", c)
}
}
for _, s := range allRendered(t) {
if strings.Contains(s, ":latest") {
t.Fatalf(":latest survived in rendered output:\n%s", s)
}
}
}
func TestTraefikWithCloudflareToken(t *testing.T) {
files, err := RenderTraefik(TraefikData{ACMEEmail: "admin@example.com", CFAPIToken: "cf-api-tok"})
if err != nil {
t.Fatal(err)
}
yml := files["traefik.yml"].Content
if !strings.Contains(yml, "certResolver: letsencrypt") {
t.Error("expected certResolver on websecure when ACME email set")
}
if !strings.Contains(yml, "dnsChallenge") || !strings.Contains(yml, "provider: cloudflare") {
t.Error("expected Cloudflare DNS-01 challenge when CF API token set")
}
if strings.Contains(yml, "httpChallenge") {
t.Error("HTTP-01 must NOT appear when a CF API token is set")
}
if !strings.Contains(yml, "email: admin@example.com") {
t.Error("ACME email must appear in the cert-resolver block")
}
compose := files["docker-compose.yml"].Content
if !strings.Contains(compose, "env_file") {
t.Error("expected env_file in traefik compose when CF API token set")
}
if !strings.Contains(compose, TraefikImage) {
t.Errorf("expected pinned traefik image %q in compose", TraefikImage)
}
env, ok := files[".env"]
if !ok {
t.Fatal("expected a .env file when CF API token is set")
}
if !strings.Contains(env.Content, "CF_DNS_API_TOKEN=cf-api-tok") {
t.Errorf("CF API token not wired into .env: %q", env.Content)
}
if env.Mode != 0o600 {
t.Errorf(".env must be 0600 (carries the CF token), got %o", env.Mode)
}
if files["traefik.yml"].Mode != 0o644 || files["docker-compose.yml"].Mode != 0o644 {
t.Error("traefik.yml and docker-compose.yml must be 0644")
}
}
func TestTraefikEmailNoCloudflareToken(t *testing.T) {
files, err := RenderTraefik(TraefikData{ACMEEmail: "admin@example.com"})
if err != nil {
t.Fatal(err)
}
yml := files["traefik.yml"].Content
if !strings.Contains(yml, "httpChallenge") {
t.Error("expected HTTP-01 challenge when email set but no CF token")
}
if strings.Contains(yml, "dnsChallenge") {
t.Error("DNS-01 must NOT appear without a CF token")
}
if _, ok := files[".env"]; ok {
t.Error("no .env should be emitted without a CF API token")
}
}
func TestTraefikTokenless(t *testing.T) {
files, err := RenderTraefik(TraefikData{})
if err != nil {
t.Fatal(err)
}
yml := files["traefik.yml"].Content
if strings.Contains(yml, "certificatesResolvers") || strings.Contains(yml, "certResolver") {
t.Error("token-less node must emit no cert resolver (traefik serves self-signed)")
}
compose := files["docker-compose.yml"].Content
if strings.Contains(compose, "env_file") {
t.Error("token-less compose must not reference env_file")
}
if _, ok := files[".env"]; ok {
t.Error("token-less node must emit no .env")
}
// Structural difference vs the with-token case is the whole point: the resolver section is absent.
withTok, _ := RenderTraefik(TraefikData{ACMEEmail: "admin@example.com", CFAPIToken: "x"})
if withTok["traefik.yml"].Content == yml {
t.Error("token-less and with-token traefik.yml must differ structurally")
}
}
func TestCloudflaredRender(t *testing.T) {
files, err := RenderCloudflared(CloudflaredData{CFTunnelToken: "tunnel-tok-123"})
if err != nil {
t.Fatal(err)
}
compose := files["docker-compose.yml"].Content
if !strings.Contains(compose, "TUNNEL_TOKEN=tunnel-tok-123") {
t.Errorf("tunnel token not wired into cloudflared env: %q", compose)
}
if !strings.Contains(compose, CloudflaredImage) {
t.Errorf("expected pinned cloudflared image %q", CloudflaredImage)
}
if !strings.Contains(compose, "command: tunnel run") {
t.Error("expected `command: tunnel run`")
}
}
func TestFileBrowserRender(t *testing.T) {
compose := RenderFileBrowserCompose("demo-felhom.eu", nil)
if !strings.Contains(compose, "Host(`files.demo-felhom.eu`)") {
t.Errorf("domain not wired into filebrowser routing label: %q", compose)
}
if !strings.Contains(compose, FileBrowserImage) {
t.Errorf("expected pinned filebrowser image %q", FileBrowserImage)
}
// Default config (no storage paths) → a single /srv source.
def := RenderFileBrowserConfig(nil)
if !strings.Contains(def, `- path: "/srv"`) {
t.Errorf("empty config must default to a /srv source: %q", def)
}
// With paths → a named per-drive source.
withPaths := RenderFileBrowserConfig([]settings.StoragePath{{Path: "/mnt/hdd_1", Label: "Media"}})
if !strings.Contains(withPaths, `- path: "/srv/hdd_1"`) || !strings.Contains(withPaths, `name: "Media"`) {
t.Errorf("storage path not wired into filebrowser config: %q", withPaths)
}
// Storage mounts wire into the compose volumes section.
withMounts := RenderFileBrowserCompose("demo-felhom.eu", []string{" - /mnt/hdd_1:/srv/hdd_1"})
if !strings.Contains(withMounts, "/mnt/hdd_1:/srv/hdd_1") {
t.Errorf("storage mount not wired into filebrowser compose: %q", withMounts)
}
}
@@ -0,0 +1,22 @@
# Cloudflare Tunnel — external access connector — managed by felhom-controller (base-infra bring-up).
# Routes are configured in the Cloudflare dashboard (Zero Trust > Networks > Tunnels > Public Hostname);
# the tunnel connects Cloudflare's edge to Traefik, which handles TLS + routing internally.
services:
cloudflared:
image: {{.Image}}
container_name: cloudflared
restart: unless-stopped
command: tunnel run
environment:
- TUNNEL_TOKEN={{.CFTunnelToken}}
dns:
- 1.1.1.1
- 8.8.8.8
security_opt:
- no-new-privileges:true
networks:
- traefik-public
networks:
traefik-public:
external: true
@@ -0,0 +1,30 @@
# Traefik Reverse Proxy — managed by felhom-controller (base-infra bring-up).
services:
traefik:
image: {{.Image}}
container_name: traefik
restart: unless-stopped
dns:
- 1.1.1.1
- 8.8.8.8
security_opt:
- no-new-privileges:true
ports:
- "80:80"
- "443:443"
{{- if .CFAPIToken}}
env_file:
- .env
{{- end}}
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- ./traefik.yml:/etc/traefik/traefik.yml:ro
- ./dynamic:/etc/traefik/dynamic:ro
- ./acme.json:/etc/traefik/acme.json
- ./certs:/etc/traefik/certs:ro
networks:
- traefik-public
networks:
traefik-public:
external: true
@@ -0,0 +1,54 @@
# Traefik Static Configuration
# Generated by felhom-controller (base-infra bring-up). Do not edit — regenerated on bring-up.
api:
dashboard: true
insecure: false
entryPoints:
web:
address: ":80"
http:
redirections:
entryPoint:
to: websecure
scheme: https
websecure:
address: ":443"
{{- if .ACMEEmail}}
http:
tls:
certResolver: letsencrypt
{{- end}}
providers:
docker:
endpoint: "unix:///var/run/docker.sock"
exposedByDefault: false
network: traefik-public
file:
directory: /etc/traefik/dynamic
watch: true
log:
level: INFO
accessLog: {}
{{- if .ACMEEmail}}
certificatesResolvers:
letsencrypt:
acme:
email: {{.ACMEEmail}}
storage: /etc/traefik/acme.json
{{- if .CFAPIToken}}
dnsChallenge:
provider: cloudflare
resolvers:
- "1.1.1.1:53"
- "8.8.8.8:53"
{{- else}}
httpChallenge:
entryPoint: web
{{- end}}
{{- end}}