telemetry: fix log deduplication — strip ANSI codes, tz offsets, mid-line timestamps (v0.30.6)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,15 @@
|
|||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
|
### v0.30.6 — Telemetry: Better Log Deduplication (2026-02-25)
|
||||||
|
|
||||||
|
#### Fixed
|
||||||
|
- **ANSI escape code stripping** — Log scanner now strips ANSI color codes (e.g. `\x1b[35m`) before classifying and fingerprinting lines, preventing color codes from polluting error messages and breaking deduplication
|
||||||
|
- **Timezone offset in timestamps** — ISO timestamp regex now handles `+01:00`/`-0500` timezone offsets and optional trailing colons (fixes Vikunja-style log entries)
|
||||||
|
- **Mid-line timestamps** — Removed `^` anchor from both ISO and syslog timestamp regexes, so timestamps embedded after log-level keywords (e.g. `ERROR 2026-02-24T21:27:05`) are now stripped correctly
|
||||||
|
|
||||||
|
#### Improved
|
||||||
|
- **`cleanLine()` helper** — Consolidated ANSI + timestamp stripping into a single reusable function used by both message display and fingerprint deduplication
|
||||||
|
|
||||||
### v0.30.5 — Health Probe: Fast Initial Checking (2026-02-25)
|
### v0.30.5 — Health Probe: Fast Initial Checking (2026-02-25)
|
||||||
|
|
||||||
#### Improved
|
#### Improved
|
||||||
|
|||||||
@@ -892,7 +892,7 @@ Each report push now includes per-app telemetry data:
|
|||||||
**Log scanning** (`logscanner.go`):
|
**Log scanning** (`logscanner.go`):
|
||||||
- `ScanContainerLogs(containerNames, since, logger)` runs `docker logs --since=15m --tail=1000` sequentially on all non-protected deployed containers.
|
- `ScanContainerLogs(containerNames, since, logger)` runs `docker logs --since=15m --tail=1000` sequentially on all non-protected deployed containers.
|
||||||
- Classifies lines by keyword match (errors: `error`, `fatal`, `panic`, `crit`, `oom`, `killed`, `exception`, `traceback`; warnings: `warn`, `warning`) on the first 5 words (case-insensitive).
|
- Classifies lines by keyword match (errors: `error`, `fatal`, `panic`, `crit`, `oom`, `killed`, `exception`, `traceback`; warnings: `warn`, `warning`) on the first 5 words (case-insensitive).
|
||||||
- Deduplicates via fingerprinting: strips timestamps, replaces 6+ digit numbers with `<N>`, 8+ char hex with `<HEX>`, UUIDs with `<UUID>`. Groups identical fingerprints, keeps top 10 per container.
|
- Deduplicates via fingerprinting: strips ANSI escape codes, ISO timestamps (with timezone offsets), and syslog timestamps (including mid-line); replaces 6+ digit numbers with `<N>`, 8+ char hex with `<HEX>`, UUIDs with `<UUID>`. Groups identical fingerprints, keeps top 10 per container.
|
||||||
- Returns `[]ContainerLogSummary` with `ErrorCount`, `WarnCount`, `RecentIssues []LogIssue`.
|
- Returns `[]ContainerLogSummary` with `ErrorCount`, `WarnCount`, `RecentIssues []LogIssue`.
|
||||||
|
|
||||||
**Report integration** (`report/telemetry.go`):
|
**Report integration** (`report/telemetry.go`):
|
||||||
|
|||||||
@@ -29,10 +29,12 @@ type LogIssue struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Strip leading ISO timestamp: 2006-01-02T15:04:05 or 2006/01/02 15:04:05 etc.
|
// Strip ANSI escape codes (color, bold, etc.)
|
||||||
reTimestamp = regexp.MustCompile(`^\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*[Z ]?`)
|
reANSI = regexp.MustCompile(`\x1b\[[0-9;]*m`)
|
||||||
|
// Strip ISO timestamp: 2006-01-02T15:04:05 or 2006/01/02 15:04:05, with optional tz offset
|
||||||
|
reTimestamp = regexp.MustCompile(`\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[.\d]*([+-]\d{2}:?\d{2})?[Z ]?:? ?`)
|
||||||
// Strip syslog-style timestamp: Jan 2 15:04:05
|
// Strip syslog-style timestamp: Jan 2 15:04:05
|
||||||
reSyslog = regexp.MustCompile(`^[A-Z][a-z]{2}\s+\d{1,2} \d{2}:\d{2}:\d{2} `)
|
reSyslog = regexp.MustCompile(`[A-Z][a-z]{2}\s+\d{1,2} \d{2}:\d{2}:\d{2} `)
|
||||||
// Replace 6+ digit sequences with <N> (avoids mangling 4-digit HTTP codes/ports)
|
// Replace 6+ digit sequences with <N> (avoids mangling 4-digit HTTP codes/ports)
|
||||||
reNumbers = regexp.MustCompile(`\b\d{6,}\b`)
|
reNumbers = regexp.MustCompile(`\b\d{6,}\b`)
|
||||||
// Replace 8+ char hex strings
|
// Replace 8+ char hex strings
|
||||||
@@ -121,10 +123,7 @@ func scanOneContainer(name string, since time.Duration, logger *log.Logger) Cont
|
|||||||
e.count++
|
e.count++
|
||||||
e.lastSeen = time.Now()
|
e.lastSeen = time.Now()
|
||||||
} else {
|
} else {
|
||||||
// Use original line trimmed as message (strip timestamp)
|
msg := cleanLine(line)
|
||||||
msg := reTimestamp.ReplaceAllString(line, "")
|
|
||||||
msg = reSyslog.ReplaceAllString(msg, "")
|
|
||||||
msg = strings.TrimSpace(msg)
|
|
||||||
if len(msg) > 200 {
|
if len(msg) > 200 {
|
||||||
msg = msg[:200]
|
msg = msg[:200]
|
||||||
}
|
}
|
||||||
@@ -161,9 +160,18 @@ func scanOneContainer(name string, since time.Duration, logger *log.Logger) Cont
|
|||||||
return summary
|
return summary
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cleanLine strips ANSI escape codes and timestamps from a log line.
|
||||||
|
func cleanLine(line string) string {
|
||||||
|
s := reANSI.ReplaceAllString(line, "")
|
||||||
|
s = reTimestamp.ReplaceAllString(s, "")
|
||||||
|
s = reSyslog.ReplaceAllString(s, "")
|
||||||
|
return strings.TrimSpace(s)
|
||||||
|
}
|
||||||
|
|
||||||
// classifyLine returns "error", "warn", or "" based on first 5 words of the line.
|
// classifyLine returns "error", "warn", or "" based on first 5 words of the line.
|
||||||
func classifyLine(line string) string {
|
func classifyLine(line string) string {
|
||||||
lower := strings.ToLower(line)
|
cleaned := reANSI.ReplaceAllString(line, "")
|
||||||
|
lower := strings.ToLower(cleaned)
|
||||||
words := strings.Fields(lower)
|
words := strings.Fields(lower)
|
||||||
if len(words) > 5 {
|
if len(words) > 5 {
|
||||||
words = words[:5]
|
words = words[:5]
|
||||||
@@ -185,9 +193,7 @@ func classifyLine(line string) string {
|
|||||||
|
|
||||||
// fingerprint produces a deduplication key for a log line.
|
// fingerprint produces a deduplication key for a log line.
|
||||||
func fingerprint(line string) string {
|
func fingerprint(line string) string {
|
||||||
// Strip leading timestamp
|
s := cleanLine(line)
|
||||||
s := reTimestamp.ReplaceAllString(line, "")
|
|
||||||
s = reSyslog.ReplaceAllString(s, "")
|
|
||||||
// Replace UUIDs before hex to avoid partial matches
|
// Replace UUIDs before hex to avoid partial matches
|
||||||
s = reUUID.ReplaceAllString(s, "<UUID>")
|
s = reUUID.ReplaceAllString(s, "<UUID>")
|
||||||
s = reHex.ReplaceAllString(s, "<HEX>")
|
s = reHex.ReplaceAllString(s, "<HEX>")
|
||||||
|
|||||||
Reference in New Issue
Block a user