telemetry: fix log deduplication — strip ANSI codes, tz offsets, mid-line timestamps (v0.30.6)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -29,10 +29,12 @@ type LogIssue struct {
|
||||
}
|
||||
|
||||
var (
|
||||
// Strip leading ISO timestamp: 2006-01-02T15:04:05 or 2006/01/02 15:04:05 etc.
|
||||
reTimestamp = regexp.MustCompile(`^\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*[Z ]?`)
|
||||
// Strip ANSI escape codes (color, bold, etc.)
|
||||
reANSI = regexp.MustCompile(`\x1b\[[0-9;]*m`)
|
||||
// Strip ISO timestamp: 2006-01-02T15:04:05 or 2006/01/02 15:04:05, with optional tz offset
|
||||
reTimestamp = regexp.MustCompile(`\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*([+-]\d{2}:?\d{2})?[Z ]?:? ?`)
|
||||
// Strip syslog-style timestamp: Jan 2 15:04:05
|
||||
reSyslog = regexp.MustCompile(`^[A-Z][a-z]{2}\s+\d{1,2} \d{2}:\d{2}:\d{2} `)
|
||||
reSyslog = regexp.MustCompile(`[A-Z][a-z]{2}\s+\d{1,2} \d{2}:\d{2}:\d{2} `)
|
||||
// Replace 6+ digit sequences with <N> (avoids mangling 4-digit HTTP codes/ports)
|
||||
reNumbers = regexp.MustCompile(`\b\d{6,}\b`)
|
||||
// Replace 8+ char hex strings
|
||||
@@ -121,10 +123,7 @@ func scanOneContainer(name string, since time.Duration, logger *log.Logger) Cont
|
||||
e.count++
|
||||
e.lastSeen = time.Now()
|
||||
} else {
|
||||
// Use original line trimmed as message (strip timestamp)
|
||||
msg := reTimestamp.ReplaceAllString(line, "")
|
||||
msg = reSyslog.ReplaceAllString(msg, "")
|
||||
msg = strings.TrimSpace(msg)
|
||||
msg := cleanLine(line)
|
||||
if len(msg) > 200 {
|
||||
msg = msg[:200]
|
||||
}
|
||||
@@ -161,9 +160,18 @@ func scanOneContainer(name string, since time.Duration, logger *log.Logger) Cont
|
||||
return summary
|
||||
}
|
||||
|
||||
// cleanLine strips ANSI escape codes and timestamps from a log line.
|
||||
func cleanLine(line string) string {
|
||||
s := reANSI.ReplaceAllString(line, "")
|
||||
s = reTimestamp.ReplaceAllString(s, "")
|
||||
s = reSyslog.ReplaceAllString(s, "")
|
||||
return strings.TrimSpace(s)
|
||||
}
|
||||
|
||||
// classifyLine returns "error", "warn", or "" based on first 5 words of the line.
|
||||
func classifyLine(line string) string {
|
||||
lower := strings.ToLower(line)
|
||||
cleaned := reANSI.ReplaceAllString(line, "")
|
||||
lower := strings.ToLower(cleaned)
|
||||
words := strings.Fields(lower)
|
||||
if len(words) > 5 {
|
||||
words = words[:5]
|
||||
@@ -185,9 +193,7 @@ func classifyLine(line string) string {
|
||||
|
||||
// fingerprint produces a deduplication key for a log line.
|
||||
func fingerprint(line string) string {
|
||||
// Strip leading timestamp
|
||||
s := reTimestamp.ReplaceAllString(line, "")
|
||||
s = reSyslog.ReplaceAllString(s, "")
|
||||
s := cleanLine(line)
|
||||
// Replace UUIDs before hex to avoid partial matches
|
||||
s = reUUID.ReplaceAllString(s, "<UUID>")
|
||||
s = reHex.ReplaceAllString(s, "<HEX>")
|
||||
|
||||
Reference in New Issue
Block a user