package metrics import ( "context" "fmt" "log" "os/exec" "regexp" "sort" "strings" "time" "unicode/utf8" ) // ContainerLogSummary holds log analysis results for one container. type ContainerLogSummary struct { ContainerName string `json:"container_name"` ErrorCount int `json:"error_count"` WarnCount int `json:"warn_count"` RecentIssues []LogIssue `json:"recent_issues,omitempty"` } // LogIssue represents a deduplicated log issue. type LogIssue struct { Severity string `json:"severity"` Message string `json:"message"` Count int `json:"count"` LastSeen time.Time `json:"last_seen"` } var ( // Strip ANSI escape codes (color, bold, etc.) reANSI = regexp.MustCompile(`\x1b\[[0-9;]*m`) // Strip ISO timestamp: 2006-01-02T15:04:05 or 2006/01/02 15:04:05, with optional tz offset reTimestamp = regexp.MustCompile(`\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*([+-]\d{2}:?\d{2})?[Z ]?:? ?`) // Strip syslog-style timestamp: Jan 2 15:04:05 reSyslog = regexp.MustCompile(`[A-Z][a-z]{2}\s+\d{1,2} \d{2}:\d{2}:\d{2} `) // Replace 6+ digit sequences with (avoids mangling 4-digit HTTP codes/ports) reNumbers = regexp.MustCompile(`\b\d{6,}\b`) // Replace 8+ char hex strings reHex = regexp.MustCompile(`\b[0-9a-fA-F]{8,}\b`) // Replace UUIDs reUUID = regexp.MustCompile(`[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`) errorKeywords = []string{"error", "fatal", "panic", "crit", "oom", "killed", "exception", "traceback"} warnKeywords = []string{"warn", "warning"} ) // ScanContainerLogs scans docker logs for the given containers and returns a summary // of errors/warnings found. Containers are scanned sequentially to avoid load spikes. // The caller should filter out infrastructure/protected containers before calling this. func ScanContainerLogs(containerNames []string, since time.Duration, logger *log.Logger) []ContainerLogSummary { if len(containerNames) == 0 { return []ContainerLogSummary{} } start := time.Now() results := make([]ContainerLogSummary, 0, len(containerNames)) for _, name := range containerNames { summary := scanOneContainer(name, since, logger) results = append(results, summary) } elapsed := time.Since(start) if elapsed > 5*time.Minute && logger != nil { logger.Printf("[WARN] Log scan took %s (>5min) for %d containers", elapsed.Round(time.Second), len(containerNames)) } return results } func scanOneContainer(name string, since time.Duration, logger *log.Logger) ContainerLogSummary { summary := ContainerLogSummary{ContainerName: name} ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() sinceStr := formatSinceDuration(since) cmd := exec.CommandContext(ctx, "docker", "logs", "--since="+sinceStr, "--tail=1000", name) output, err := cmd.CombinedOutput() if err != nil { if logger != nil { logger.Printf("[DEBUG] logscanner: docker logs %s: %v", name, err) } return summary } // fingerprint → issue tracking type issueEntry struct { severity string message string count int lastSeen time.Time } fingerprints := make(map[string]*issueEntry) lines := strings.Split(string(output), "\n") for _, line := range lines { if !utf8.Valid([]byte(line)) { continue } if len(line) > 500 { line = line[:500] } if line == "" { continue } severity := classifyLine(line) if severity == "" { continue } if severity == "error" { summary.ErrorCount++ } else { summary.WarnCount++ } fp := fingerprint(line) if e, ok := fingerprints[fp]; ok { e.count++ e.lastSeen = time.Now() } else { msg := cleanLine(line) if len(msg) > 200 { msg = msg[:200] } fingerprints[fp] = &issueEntry{ severity: severity, message: msg, count: 1, lastSeen: time.Now(), } } } // Convert map to slice, sort by count DESC then lastSeen DESC, cap at 10 issues := make([]LogIssue, 0, len(fingerprints)) for _, e := range fingerprints { issues = append(issues, LogIssue{ Severity: e.severity, Message: e.message, Count: e.count, LastSeen: e.lastSeen, }) } sort.Slice(issues, func(i, j int) bool { if issues[i].Count != issues[j].Count { return issues[i].Count > issues[j].Count } return issues[i].LastSeen.After(issues[j].LastSeen) }) if len(issues) > 10 { issues = issues[:10] } summary.RecentIssues = issues return summary } // cleanLine strips ANSI escape codes and timestamps from a log line. func cleanLine(line string) string { s := reANSI.ReplaceAllString(line, "") s = reTimestamp.ReplaceAllString(s, "") s = reSyslog.ReplaceAllString(s, "") return strings.TrimSpace(s) } // classifyLine returns "error", "warn", or "" based on first 5 words of the line. func classifyLine(line string) string { cleaned := reANSI.ReplaceAllString(line, "") lower := strings.ToLower(cleaned) words := strings.Fields(lower) if len(words) > 5 { words = words[:5] } prefix := strings.Join(words, " ") for _, kw := range errorKeywords { if strings.Contains(prefix, kw) { return "error" } } for _, kw := range warnKeywords { if strings.Contains(prefix, kw) { return "warn" } } return "" } // fingerprint produces a deduplication key for a log line. func fingerprint(line string) string { s := cleanLine(line) // Replace UUIDs before hex to avoid partial matches s = reUUID.ReplaceAllString(s, "") s = reHex.ReplaceAllString(s, "") s = reNumbers.ReplaceAllString(s, "") s = strings.TrimSpace(s) return strings.ToLower(s) } // formatSinceDuration converts a duration to docker logs --since format (e.g. "15m"). func formatSinceDuration(d time.Duration) string { minutes := int(d.Minutes()) if minutes <= 0 { minutes = 15 } return fmt.Sprintf("%dm", minutes) }