From 62d26be8ae23a2c31ed8f78377cf05510ea7ba15 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Fri, 27 Feb 2026 09:19:27 +0100 Subject: [PATCH] feat: include controller in app telemetry reports Add the felhom-controller container as a special entry in the app_telemetry array sent to the hub. This reuses all existing hub infrastructure (storage, aggregation, UI) with zero hub-side changes. The controller's memory/CPU metrics and log warnings/errors are now collected alongside app telemetry, giving the hub visibility into controller health, memory trends, and known issues. Co-Authored-By: Claude Opus 4.6 --- controller/internal/report/telemetry.go | 70 +++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/controller/internal/report/telemetry.go b/controller/internal/report/telemetry.go index 04498d5..4f54d48 100644 --- a/controller/internal/report/telemetry.go +++ b/controller/internal/report/telemetry.go @@ -9,8 +9,12 @@ import ( "gitea.dooplex.hu/admin/felhom-controller/internal/stacks" ) +// controllerContainerName is the Docker container name for the controller itself. +const controllerContainerName = "felhom-controller" + // buildAppTelemetrySection collects metrics telemetry and log scans for all -// non-protected, deployed stacks and returns per-app telemetry data. +// non-protected, deployed stacks plus the controller itself, and returns +// per-app telemetry data. func buildAppTelemetrySection(stackMgr *stacks.Manager, metricsStore *metrics.MetricsStore, logger *log.Logger) []AppTelemetry { allStacks := stackMgr.GetStacks() @@ -35,11 +39,27 @@ func buildAppTelemetrySection(stackMgr *stacks.Manager, metricsStore *metrics.Me } } - // 3. Scan logs + // 3. Include controller container in log scan + containerNames = append(containerNames, controllerContainerName) + + // 4. Scan logs (includes controller) logs := metrics.ScanContainerLogs(containerNames, 15*time.Minute, logger) - // 4. Build per-app telemetry - return buildAppTelemetry(allStacks, telemetry, logs) + // 5. Build per-app telemetry (stacks only) + result := buildAppTelemetry(allStacks, telemetry, logs) + + // 6. Append controller telemetry entry + if ctrl := buildControllerTelemetry(telemetry, logs); ctrl != nil { + result = append(result, *ctrl) + sort.Slice(result, func(i, j int) bool { + return result[i].AppName < result[j].AppName + }) + } + + if result == nil { + result = []AppTelemetry{} + } + return result } // buildAppTelemetry aggregates container-level telemetry and log data into per-stack AppTelemetry entries. @@ -118,6 +138,48 @@ func buildAppTelemetry(allStacks []stacks.Stack, telemetry []metrics.ContainerTe return result } +// buildControllerTelemetry creates a telemetry entry for the controller container. +// Returns nil if no metrics or log data is available. +func buildControllerTelemetry(telemetry []metrics.ContainerTelemetry, logs []metrics.ContainerLogSummary) *AppTelemetry { + app := AppTelemetry{ + AppName: controllerContainerName, + DisplayName: "Felhom Controller", + Containers: []string{controllerContainerName}, + } + + // Find metrics for the controller container + for _, ct := range telemetry { + if ct.ContainerName == controllerContainerName { + app.MemoryCurrentMB = ct.MemoryCurrentMB + app.MemoryAvgMB = ct.MemoryAvgMB + app.MemoryPeakMB = ct.MemoryPeakMB + app.CPUAvgPercent = ct.CPUAvgPercent + break + } + } + + // Find log scan results for the controller container + for _, ls := range logs { + if ls.ContainerName == controllerContainerName { + app.LogErrors = ls.ErrorCount + app.LogWarnings = ls.WarnCount + issues := ls.RecentIssues + if len(issues) > 10 { + issues = issues[:10] + } + app.Issues = issues + break + } + } + + // Only include if we have at least metrics or log data + if app.MemoryCurrentMB == 0 && app.MemoryAvgMB == 0 && app.LogErrors == 0 && app.LogWarnings == 0 { + return nil + } + + return &app +} + // isStackRunning returns true if the stack has containers actively running // (running, starting, or unhealthy but still up). Stopped, exited, deploying // etc. are excluded to avoid sending zero-value telemetry to the hub.