v0.41.0: first-boot base-infra bring-up + self-heal (+ Section-G mount fix)

New internal/infra package renders traefik/cloudflared/filebrowser from config
(pinned images, single source of truth; web filebrowser path delegates here).
stacks.EnsureBaseStack deploys the traefik-public network + the three stacks,
single-flight + idempotent + non-fatal; wired to first boot and every health
tick. monitor.EffectiveProtected drops cloudflared when no tunnel token.
Section-G fix lives in felhom-agent build-golden.sh (same-path stacks bind).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-11 14:56:42 +02:00
parent ba0e1eb04a
commit abbd9488c6
13 changed files with 873 additions and 111 deletions
@@ -0,0 +1,39 @@
package monitor
import (
"testing"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
)
func contains(ss []string, want string) bool {
for _, s := range ss {
if s == want {
return true
}
}
return false
}
// EffectiveProtected must drop cloudflared when no tunnel token is configured (LAN-only node), so the
// health loop doesn't report it missing forever — but keep it when a token IS configured.
func TestEffectiveProtectedDropsCloudflaredWithoutToken(t *testing.T) {
base := config.StacksConfig{Protected: []string{"traefik", "cloudflared", "felhom-controller", "filebrowser"}}
cfgNoTok := &config.Config{Stacks: base}
got := EffectiveProtected(cfgNoTok)
if contains(got, "cloudflared") {
t.Errorf("cloudflared must be dropped when no tunnel token: %v", got)
}
for _, must := range []string{"traefik", "felhom-controller", "filebrowser"} {
if !contains(got, must) {
t.Errorf("%s must remain protected: %v", must, got)
}
}
cfgTok := &config.Config{Stacks: base}
cfgTok.Infrastructure.CFTunnelToken = "tok"
if !contains(EffectiveProtected(cfgTok), "cloudflared") {
t.Error("cloudflared must remain protected when a tunnel token is configured")
}
}
+21 -3
View File
@@ -152,11 +152,13 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
}
}
// 6. Protected containers
// 6. Protected containers (effective set: cloudflared only counts when a tunnel token is
// configured, so a LAN-only node doesn't report FAIL forever for a stack it intentionally skips).
protected := EffectiveProtected(cfg)
if debug {
logger.Printf("[DEBUG] [monitor] Checking %d protected containers: %v", len(cfg.Stacks.Protected), cfg.Stacks.Protected)
logger.Printf("[DEBUG] [monitor] Checking %d protected containers: %v", len(protected), protected)
}
missingProtected := checkProtectedContainers(cfg.Stacks.Protected)
missingProtected := checkProtectedContainers(protected)
for _, name := range missingProtected {
report.Issues = append(report.Issues, fmt.Sprintf("Protected container not running: %s", name))
}
@@ -237,6 +239,22 @@ func checkDocker() error {
return nil
}
// EffectiveProtected returns the protected-container set that actually applies to this node. It is
// the configured cfg.Stacks.Protected minus stacks that are intentionally not deployed here:
// cloudflared is dropped when no tunnel token is configured (a LAN-only node legitimately runs
// without it, so it must not be reported as a missing protected container forever). The bring-up
// (stacks.EnsureBaseStack) applies the same cloudflared condition, so detection and deployment agree.
func EffectiveProtected(cfg *config.Config) []string {
out := make([]string, 0, len(cfg.Stacks.Protected))
for _, name := range cfg.Stacks.Protected {
if name == "cloudflared" && cfg.Infrastructure.CFTunnelToken == "" {
continue
}
out = append(out, name)
}
return out
}
func checkProtectedContainers(protected []string) []string {
var missing []string
for _, name := range protected {