fix: handle oversized lines in DB dump validation

Replace bufio.Scanner with bufio.Reader.ReadLine() which gracefully
skips lines exceeding the buffer (isPrefix=true) instead of failing.
Fixes validation of Immich's PostgreSQL dump which contains COPY lines
with binary-encoded image data exceeding the 256KB scanner limit.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 07:58:31 +01:00
parent debab0f38b
commit 0c0cacbe7c
+25 -11
View File
@@ -4,6 +4,7 @@ import (
"bufio"
"context"
"fmt"
"io"
"log"
"os"
"os/exec"
@@ -259,15 +260,34 @@ func ValidateDump(filePath string, dbType DBType) DumpValidation {
}
defer f.Close()
scanner := bufio.NewScanner(f)
// Increase token buffer for very long lines (some SQL lines can be large)
scanner.Buffer(make([]byte, 256*1024), 256*1024)
// Use bufio.Reader instead of Scanner: ReadLine gracefully handles lines
// longer than the buffer (isPrefix=true) so we can skip them. Only short
// lines matter (headers, CREATE TABLE). Long COPY/INSERT data lines
// (e.g., Immich's binary-encoded image data) are skipped without allocating.
reader := bufio.NewReaderSize(f, 256*1024)
lineNum := 0
headerFound := false
tableCount := 0
for scanner.Scan() {
line := scanner.Text()
for {
lineBytes, isPrefix, err := reader.ReadLine()
if err != nil {
if err != io.EOF {
v.Error = fmt.Sprintf("hiba az olvasás közben: %v", err)
log.Printf("[WARN] ValidateDump FAIL: %s — read error: %v", filePath, err)
return v
}
break // EOF
}
if isPrefix {
// Line exceeds buffer — skip remainder (COPY data, large INSERTs)
for isPrefix && err == nil {
_, isPrefix, err = reader.ReadLine()
}
continue
}
line := string(lineBytes)
lineNum++
// Header check — scan first 10 lines for expected dump header
@@ -294,12 +314,6 @@ func ValidateDump(filePath string, dbType DBType) DumpValidation {
}
}
if err := scanner.Err(); err != nil {
v.Error = fmt.Sprintf("hiba az olvasás közben: %v", err)
log.Printf("[WARN] ValidateDump FAIL: %s — scanner error: %v", filePath, err)
return v
}
v.TableCount = tableCount
if !headerFound {