Files
deploy-felhom-compose/controller/internal/metrics/logscanner.go
T
admin 95c821deb2 feat: comprehensive debug logging across all controller modules
Add detailed [DEBUG] logging to every controller module when
logging.level is set to "debug". Each module with stateful debug
uses SetDebug(bool) wired from main.go. Covers stacks, backup,
cloudflare, integrations, system, monitor, settings, scheduler,
web handlers, storage, metrics, API, selfupdate, and assets.

Also includes the app export/import (.fab bundles) feature from
v0.32.0 and its debug page integration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 18:14:43 +01:00

224 lines
6.2 KiB
Go

package metrics
import (
"context"
"fmt"
"log"
"os/exec"
"regexp"
"sort"
"strings"
"time"
"unicode/utf8"
)
// ContainerLogSummary holds log analysis results for one container.
type ContainerLogSummary struct {
ContainerName string `json:"container_name"`
ErrorCount int `json:"error_count"`
WarnCount int `json:"warn_count"`
RecentIssues []LogIssue `json:"recent_issues,omitempty"`
}
// LogIssue represents a deduplicated log issue.
type LogIssue struct {
Severity string `json:"severity"`
Message string `json:"message"`
Count int `json:"count"`
LastSeen time.Time `json:"last_seen"`
}
var (
// Strip ANSI escape codes (color, bold, etc.)
reANSI = regexp.MustCompile(`\x1b\[[0-9;]*m`)
// Strip ISO timestamp: 2006-01-02T15:04:05 or 2006/01/02 15:04:05, with optional tz offset
reTimestamp = regexp.MustCompile(`\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*([+-]\d{2}:?\d{2})?[Z ]?:? ?`)
// Strip syslog-style timestamp: Jan 2 15:04:05
reSyslog = regexp.MustCompile(`[A-Z][a-z]{2}\s+\d{1,2} \d{2}:\d{2}:\d{2} `)
// Replace 6+ digit sequences with <N> (avoids mangling 4-digit HTTP codes/ports)
reNumbers = regexp.MustCompile(`\b\d{6,}\b`)
// Replace 8+ char hex strings
reHex = regexp.MustCompile(`\b[0-9a-fA-F]{8,}\b`)
// Replace UUIDs
reUUID = regexp.MustCompile(`[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`)
errorKeywords = []string{"error", "fatal", "panic", "crit", "oom", "killed", "exception", "traceback"}
warnKeywords = []string{"warn", "warning"}
)
// ScanContainerLogs scans docker logs for the given containers and returns a summary
// of errors/warnings found. Containers are scanned sequentially to avoid load spikes.
// The caller should filter out infrastructure/protected containers before calling this.
func ScanContainerLogs(containerNames []string, since time.Duration, logger *log.Logger) []ContainerLogSummary {
if len(containerNames) == 0 {
return []ContainerLogSummary{}
}
dbg := func(format string, args ...interface{}) {
if logger != nil {
logger.Printf("[DEBUG] [metrics] logscanner: "+format, args...)
}
}
dbg("starting log scan for %d containers (since %s)", len(containerNames), since)
start := time.Now()
results := make([]ContainerLogSummary, 0, len(containerNames))
for _, name := range containerNames {
cStart := time.Now()
summary := scanOneContainer(name, since, logger)
dbg("scanned %s: errors=%d warnings=%d issues=%d (took %s)",
name, summary.ErrorCount, summary.WarnCount, len(summary.RecentIssues), time.Since(cStart).Round(time.Millisecond))
results = append(results, summary)
}
elapsed := time.Since(start)
dbg("log scan completed: %d containers in %s", len(containerNames), elapsed.Round(time.Millisecond))
if elapsed > 5*time.Minute && logger != nil {
logger.Printf("[WARN] Log scan took %s (>5min) for %d containers", elapsed.Round(time.Second), len(containerNames))
}
return results
}
func scanOneContainer(name string, since time.Duration, logger *log.Logger) ContainerLogSummary {
summary := ContainerLogSummary{ContainerName: name}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
sinceStr := formatSinceDuration(since)
cmd := exec.CommandContext(ctx, "docker", "logs", "--since="+sinceStr, "--tail=1000", name)
output, err := cmd.CombinedOutput()
if err != nil {
if logger != nil {
logger.Printf("[DEBUG] logscanner: docker logs %s: %v", name, err)
}
return summary
}
// fingerprint → issue tracking
type issueEntry struct {
severity string
message string
count int
lastSeen time.Time
}
fingerprints := make(map[string]*issueEntry)
lines := strings.Split(string(output), "\n")
for _, line := range lines {
if !utf8.Valid([]byte(line)) {
continue
}
if len(line) > 500 {
line = line[:500]
}
if line == "" {
continue
}
severity := classifyLine(line)
if severity == "" {
continue
}
if severity == "error" {
summary.ErrorCount++
} else {
summary.WarnCount++
}
fp := fingerprint(line)
if e, ok := fingerprints[fp]; ok {
e.count++
e.lastSeen = time.Now()
} else {
msg := cleanLine(line)
if len(msg) > 200 {
msg = msg[:200]
}
fingerprints[fp] = &issueEntry{
severity: severity,
message: msg,
count: 1,
lastSeen: time.Now(),
}
}
}
// Convert map to slice, sort by count DESC then lastSeen DESC, cap at 10
issues := make([]LogIssue, 0, len(fingerprints))
for _, e := range fingerprints {
issues = append(issues, LogIssue{
Severity: e.severity,
Message: e.message,
Count: e.count,
LastSeen: e.lastSeen,
})
}
sort.Slice(issues, func(i, j int) bool {
if issues[i].Count != issues[j].Count {
return issues[i].Count > issues[j].Count
}
return issues[i].LastSeen.After(issues[j].LastSeen)
})
if len(issues) > 10 {
issues = issues[:10]
}
summary.RecentIssues = issues
return summary
}
// cleanLine strips ANSI escape codes and timestamps from a log line.
func cleanLine(line string) string {
s := reANSI.ReplaceAllString(line, "")
s = reTimestamp.ReplaceAllString(s, "")
s = reSyslog.ReplaceAllString(s, "")
return strings.TrimSpace(s)
}
// classifyLine returns "error", "warn", or "" based on first 5 words of the line.
func classifyLine(line string) string {
cleaned := reANSI.ReplaceAllString(line, "")
lower := strings.ToLower(cleaned)
words := strings.Fields(lower)
if len(words) > 5 {
words = words[:5]
}
prefix := strings.Join(words, " ")
for _, kw := range errorKeywords {
if strings.Contains(prefix, kw) {
return "error"
}
}
for _, kw := range warnKeywords {
if strings.Contains(prefix, kw) {
return "warn"
}
}
return ""
}
// fingerprint produces a deduplication key for a log line.
func fingerprint(line string) string {
s := cleanLine(line)
// Replace UUIDs before hex to avoid partial matches
s = reUUID.ReplaceAllString(s, "<UUID>")
s = reHex.ReplaceAllString(s, "<HEX>")
s = reNumbers.ReplaceAllString(s, "<N>")
s = strings.TrimSpace(s)
return strings.ToLower(s)
}
// formatSinceDuration converts a duration to docker logs --since format (e.g. "15m").
func formatSinceDuration(d time.Duration) string {
minutes := int(d.Minutes())
if minutes <= 0 {
minutes = 15
}
return fmt.Sprintf("%dm", minutes)
}