Files
deploy-felhom-compose/controller/internal/report/builder.go
T
admin 05ecd65412 feat(telemetry): add per-app metrics and log telemetry to hub reports (v0.28.0)
- New internal/metrics/telemetry.go: MetricsStore.GetContainerTelemetry()
  aggregates container memory/CPU from SQLite over the last 15 min
- New internal/metrics/logscanner.go: ScanContainerLogs() scans docker logs
  for errors/warnings, deduplicates via fingerprinting (strips timestamps,
  replaces 6+ digit numbers, hex strings, UUIDs)
- New internal/report/telemetry.go: buildAppTelemetrySection() assembles
  per-stack AppTelemetry by aggregating container metrics and log summaries
- internal/report/types.go: added AppTelemetry field to Report struct plus
  AppTelemetry type with memory/CPU/log fields and LogIssue references
- internal/report/builder.go: calls buildAppTelemetrySection() in BuildReport()
- Backward-compatible: old Hub versions silently ignore app_telemetry field

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-23 10:46:27 +01:00

306 lines
7.6 KiB
Go

package report
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"log"
"os"
"strconv"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
)
// BuildReport collects current state from all subsystems and returns a Report.
func BuildReport(
cfg *config.Config,
configPath string,
stackMgr *stacks.Manager,
backupMgr *backup.Manager,
cpuCollector *system.CPUCollector,
metricsStore *metrics.MetricsStore,
version string,
storagePaths []settings.StoragePath,
logger *log.Logger,
) *Report {
debug := cfg.Logging.Level == "debug"
if debug && logger != nil {
logger.Printf("[DEBUG] BuildReport: starting — version=%s, storagePaths=%d", version, len(storagePaths))
}
r := &Report{
Version: 1,
CustomerID: cfg.Customer.ID,
CustomerName: cfg.Customer.Name,
ControllerVersion: version,
Timestamp: time.Now().UTC(),
}
// Controller URL for hub callbacks (self-update trigger, etc.)
if cfg.Customer.Domain != "" {
r.ControllerURL = fmt.Sprintf("https://felhom.%s", cfg.Customer.Domain)
}
// Config hash for Hub comparison
if configPath != "" {
if data, err := os.ReadFile(configPath); err == nil {
h := sha256.Sum256(data)
r.ConfigHash = hex.EncodeToString(h[:])
if debug && logger != nil {
logger.Printf("[DEBUG] BuildReport: configHash=%s (%d bytes)", r.ConfigHash[:12]+"...", len(data))
}
}
}
// System info
staticInfo := metrics.GetStaticInfo()
hddPath := cfg.Paths.HDDPath
if len(storagePaths) > 0 {
hddPath = storagePaths[0].Path
}
sysInfo := system.GetInfo(hddPath, cpuCollector)
r.System = SystemReport{
Hostname: staticInfo.Hostname,
OS: staticInfo.OS,
Kernel: staticInfo.Kernel,
CPUModel: staticInfo.CPUModel,
CPUCores: staticInfo.CPUCores,
UptimeSeconds: staticInfo.UptimeSeconds,
CPUPercent: sysInfo.CPUPercent,
MemoryTotalMB: sysInfo.TotalMemMB,
MemoryUsedMB: sysInfo.UsedMemMB,
MemoryPercent: sysInfo.MemPercent,
TemperatureCelsius: sysInfo.TemperatureCelsius,
LoadAvg1: sysInfo.LoadAvg1,
LoadAvg5: sysInfo.LoadAvg5,
LoadAvg15: sysInfo.LoadAvg15,
}
// Storage — root filesystem + all registered storage paths
r.Storage = []StorageReport{
{Mount: "/", Label: "SSD", TotalGB: sysInfo.DiskTotalGB, UsedGB: sysInfo.DiskUsedGB, Percent: sysInfo.DiskPercent},
}
for _, sp := range storagePaths {
if sp.Decommissioned {
r.Storage = append(r.Storage, StorageReport{
Mount: sp.Path,
Label: sp.Label,
Decommissioned: true,
MigratedTo: sp.MigratedTo,
})
continue
}
if sp.Disconnected {
r.Storage = append(r.Storage, StorageReport{
Mount: sp.Path,
Label: sp.Label,
Disconnected: true,
})
continue
}
di := system.GetDiskUsage(sp.Path)
if di == nil {
continue
}
r.Storage = append(r.Storage, StorageReport{
Mount: sp.Path,
Label: sp.Label,
TotalGB: di.TotalGB,
UsedGB: di.UsedGB,
Percent: di.UsedPercent,
})
}
if debug && logger != nil {
logger.Printf("[DEBUG] BuildReport: system info collected — cpu=%.1f%%, mem=%d/%dMB, temp=%.1fC",
sysInfo.CPUPercent, sysInfo.UsedMemMB, sysInfo.TotalMemMB, sysInfo.TemperatureCelsius)
logger.Printf("[DEBUG] BuildReport: storage entries=%d", len(r.Storage))
}
// Containers
r.Containers = buildContainerReport(stackMgr, metricsStore)
// Backup
r.Backup = buildBackupReport(cfg, backupMgr)
// Health
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, storagePaths, logger)
r.Health = HealthReport{
Status: healthReport.Status,
Issues: healthReport.Issues,
Warnings: healthReport.Warnings,
}
if r.Health.Issues == nil {
r.Health.Issues = []string{}
}
if r.Health.Warnings == nil {
r.Health.Warnings = []string{}
}
// Stacks
r.Stacks = buildStacksReport(stackMgr)
// App telemetry (metrics + log scan)
r.AppTelemetry = buildAppTelemetrySection(stackMgr, metricsStore, logger)
if debug && logger != nil {
logger.Printf("[DEBUG] BuildReport: complete — containers=%d, health=%s, deployed=%d, available=%d, app_telemetry=%d",
r.Containers.Total, r.Health.Status, len(r.Stacks.Deployed), len(r.Stacks.Available), len(r.AppTelemetry))
}
return r
}
func buildContainerReport(stackMgr *stacks.Manager, metricsStore *metrics.MetricsStore) ContainerReport {
cr := ContainerReport{}
allStacks := stackMgr.GetStacks()
// Build a map of container stats from metrics store
statsMap := make(map[string]metrics.ContainerCurrentStats)
if metricsStore != nil {
if stats, err := metricsStore.QueryContainerSummary(); err == nil {
for _, s := range stats {
statsMap[s.ContainerName] = s
}
}
}
for _, s := range allStacks {
if !s.Deployed {
continue
}
for _, c := range s.Containers {
cr.Total++
switch c.State {
case stacks.StateRunning, stacks.StateStarting:
cr.Running++
case stacks.StateUnhealthy:
cr.Unhealthy++
cr.Running++ // unhealthy containers are still running
default:
cr.Stopped++
}
detail := ContainerDetailReport{
Name: c.Name,
State: string(c.State),
}
if cs, ok := statsMap[c.Name]; ok {
detail.CPUPercent = cs.CPUPercent
detail.MemoryMB = cs.MemUsageMB
}
cr.List = append(cr.List, detail)
}
}
if cr.List == nil {
cr.List = []ContainerDetailReport{}
}
return cr
}
func buildBackupReport(cfg *config.Config, backupMgr *backup.Manager) BackupReport {
br := BackupReport{
Enabled: cfg.Backup.Enabled,
}
if backupMgr == nil {
return br
}
nextDBDump := scheduler.NextDailyRun(cfg.Backup.DBDumpSchedule)
nextBackup := scheduler.NextDailyRun(cfg.Backup.ResticSchedule)
status := backupMgr.GetFullStatus(nextDBDump, nextBackup)
if status.LastDBDump != nil {
t := status.LastDBDump.LastRun
br.LastDBDump = &t
}
if status.LastBackup != nil {
t := status.LastBackup.LastRun
br.LastSnapshot = &t
}
if status.RepoStats != nil {
br.SnapshotCount = status.RepoStats.SnapshotCount
br.RepoSizeMB = parseSizeToMB(status.RepoStats.TotalSize)
}
if !status.LastCheckTime.IsZero() {
t := status.LastCheckTime
br.LastIntegrityCheck = &t
}
br.IntegrityOK = status.LastCheckOK
// Include restic password for hub-side disaster recovery
if pw, err := backupMgr.GetResticPassword(); err == nil {
br.ResticPassword = pw
}
return br
}
func buildStacksReport(stackMgr *stacks.Manager) StacksReport {
sr := StacksReport{}
allStacks := stackMgr.GetStacks()
for _, s := range allStacks {
if s.Protected {
continue
}
if s.Deployed {
sr.Deployed = append(sr.Deployed, s.Name)
} else {
sr.Available = append(sr.Available, s.Name)
}
}
if sr.Deployed == nil {
sr.Deployed = []string{}
}
if sr.Available == nil {
sr.Available = []string{}
}
return sr
}
// parseSizeToMB parses a formatted size string like "1.5 GB", "512.0 MB" into MB.
func parseSizeToMB(s string) int64 {
s = strings.TrimSpace(s)
if s == "" {
return 0
}
parts := strings.Fields(s)
if len(parts) != 2 {
return 0
}
val, err := strconv.ParseFloat(parts[0], 64)
if err != nil {
return 0
}
switch strings.ToUpper(parts[1]) {
case "GB":
return int64(val * 1024)
case "MB":
return int64(val)
case "KB":
return int64(val / 1024)
default:
return int64(val)
}
}