telemetry: fix log deduplication — strip ANSI codes, tz offsets, mid-line timestamps (v0.30.6)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,15 @@
|
||||
## Changelog
|
||||
|
||||
### v0.30.6 — Telemetry: Better Log Deduplication (2026-02-25)
|
||||
|
||||
#### Fixed
|
||||
- **ANSI escape code stripping** — Log scanner now strips ANSI color codes (e.g. `\x1b[35m`) before classifying and fingerprinting lines, preventing color codes from polluting error messages and breaking deduplication
|
||||
- **Timezone offset in timestamps** — ISO timestamp regex now handles `+01:00`/`-0500` timezone offsets and optional trailing colons (fixes Vikunja-style log entries)
|
||||
- **Mid-line timestamps** — Removed `^` anchor from both ISO and syslog timestamp regexes, so timestamps embedded after log-level keywords (e.g. `ERROR 2026-02-24T21:27:05`) are now stripped correctly
|
||||
|
||||
#### Improved
|
||||
- **`cleanLine()` helper** — Consolidated ANSI + timestamp stripping into a single reusable function used by both message display and fingerprint deduplication
|
||||
|
||||
### v0.30.5 — Health Probe: Fast Initial Checking (2026-02-25)
|
||||
|
||||
#### Improved
|
||||
|
||||
@@ -892,7 +892,7 @@ Each report push now includes per-app telemetry data:
|
||||
**Log scanning** (`logscanner.go`):
|
||||
- `ScanContainerLogs(containerNames, since, logger)` runs `docker logs --since=15m --tail=1000` sequentially on all non-protected deployed containers.
|
||||
- Classifies lines by keyword match (errors: `error`, `fatal`, `panic`, `crit`, `oom`, `killed`, `exception`, `traceback`; warnings: `warn`, `warning`) on the first 5 words (case-insensitive).
|
||||
- Deduplicates via fingerprinting: strips timestamps, replaces 6+ digit numbers with `<N>`, 8+ char hex with `<HEX>`, UUIDs with `<UUID>`. Groups identical fingerprints, keeps top 10 per container.
|
||||
- Deduplicates via fingerprinting: strips ANSI escape codes, ISO timestamps (with timezone offsets), and syslog timestamps (including mid-line); replaces 6+ digit numbers with `<N>`, 8+ char hex with `<HEX>`, UUIDs with `<UUID>`. Groups identical fingerprints, keeps top 10 per container.
|
||||
- Returns `[]ContainerLogSummary` with `ErrorCount`, `WarnCount`, `RecentIssues []LogIssue`.
|
||||
|
||||
**Report integration** (`report/telemetry.go`):
|
||||
|
||||
@@ -29,10 +29,12 @@ type LogIssue struct {
|
||||
}
|
||||
|
||||
var (
|
||||
// Strip leading ISO timestamp: 2006-01-02T15:04:05 or 2006/01/02 15:04:05 etc.
|
||||
reTimestamp = regexp.MustCompile(`^\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*[Z ]?`)
|
||||
// Strip ANSI escape codes (color, bold, etc.)
|
||||
reANSI = regexp.MustCompile(`\x1b\[[0-9;]*m`)
|
||||
// Strip ISO timestamp: 2006-01-02T15:04:05 or 2006/01/02 15:04:05, with optional tz offset
|
||||
reTimestamp = regexp.MustCompile(`\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*([+-]\d{2}:?\d{2})?[Z ]?:? ?`)
|
||||
// Strip syslog-style timestamp: Jan 2 15:04:05
|
||||
reSyslog = regexp.MustCompile(`^[A-Z][a-z]{2}\s+\d{1,2} \d{2}:\d{2}:\d{2} `)
|
||||
reSyslog = regexp.MustCompile(`[A-Z][a-z]{2}\s+\d{1,2} \d{2}:\d{2}:\d{2} `)
|
||||
// Replace 6+ digit sequences with <N> (avoids mangling 4-digit HTTP codes/ports)
|
||||
reNumbers = regexp.MustCompile(`\b\d{6,}\b`)
|
||||
// Replace 8+ char hex strings
|
||||
@@ -121,10 +123,7 @@ func scanOneContainer(name string, since time.Duration, logger *log.Logger) Cont
|
||||
e.count++
|
||||
e.lastSeen = time.Now()
|
||||
} else {
|
||||
// Use original line trimmed as message (strip timestamp)
|
||||
msg := reTimestamp.ReplaceAllString(line, "")
|
||||
msg = reSyslog.ReplaceAllString(msg, "")
|
||||
msg = strings.TrimSpace(msg)
|
||||
msg := cleanLine(line)
|
||||
if len(msg) > 200 {
|
||||
msg = msg[:200]
|
||||
}
|
||||
@@ -161,9 +160,18 @@ func scanOneContainer(name string, since time.Duration, logger *log.Logger) Cont
|
||||
return summary
|
||||
}
|
||||
|
||||
// cleanLine strips ANSI escape codes and timestamps from a log line.
|
||||
func cleanLine(line string) string {
|
||||
s := reANSI.ReplaceAllString(line, "")
|
||||
s = reTimestamp.ReplaceAllString(s, "")
|
||||
s = reSyslog.ReplaceAllString(s, "")
|
||||
return strings.TrimSpace(s)
|
||||
}
|
||||
|
||||
// classifyLine returns "error", "warn", or "" based on first 5 words of the line.
|
||||
func classifyLine(line string) string {
|
||||
lower := strings.ToLower(line)
|
||||
cleaned := reANSI.ReplaceAllString(line, "")
|
||||
lower := strings.ToLower(cleaned)
|
||||
words := strings.Fields(lower)
|
||||
if len(words) > 5 {
|
||||
words = words[:5]
|
||||
@@ -185,9 +193,7 @@ func classifyLine(line string) string {
|
||||
|
||||
// fingerprint produces a deduplication key for a log line.
|
||||
func fingerprint(line string) string {
|
||||
// Strip leading timestamp
|
||||
s := reTimestamp.ReplaceAllString(line, "")
|
||||
s = reSyslog.ReplaceAllString(s, "")
|
||||
s := cleanLine(line)
|
||||
// Replace UUIDs before hex to avoid partial matches
|
||||
s = reUUID.ReplaceAllString(s, "<UUID>")
|
||||
s = reHex.ReplaceAllString(s, "<HEX>")
|
||||
|
||||
Reference in New Issue
Block a user