package backup import ( "bufio" "context" "fmt" "log" "os" "os/exec" "path/filepath" "strings" "time" ) // DBType represents a database engine type. type DBType string const ( DBTypePostgres DBType = "postgres" DBTypeMariaDB DBType = "mariadb" ) // DiscoveredDB holds metadata about a running database container. type DiscoveredDB struct { ContainerName string ContainerID string DBType DBType DBUser string DBName string StackName string } // DumpResult holds the outcome of a single database dump. type DumpResult struct { DB DiscoveredDB FilePath string Size int64 Duration time.Duration Error error Validation DumpValidation } // DumpValidation holds the result of a dump file structural check. type DumpValidation struct { Valid bool TableCount int Error string FileSize int64 ModTime time.Time } // DumpFileInfo holds info about a dump file on disk. type DumpFileInfo struct { FileName string StackName string DBType DBType Size int64 ModTime time.Time Validation DumpValidation } // DiscoverDatabases finds running database containers via docker ps. func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB, error) { cmd := exec.CommandContext(ctx, "docker", "ps", "--format", "{{.ID}}\t{{.Names}}\t{{.Image}}", "--filter", "status=running") out, err := cmd.Output() if err != nil { return nil, fmt.Errorf("docker ps failed: %w", err) } var dbs []DiscoveredDB for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { if line == "" { continue } parts := strings.SplitN(line, "\t", 3) if len(parts) < 3 { continue } id, name, image := parts[0], parts[1], strings.ToLower(parts[2]) var dbType DBType if strings.Contains(image, "postgres") { dbType = DBTypePostgres } else if strings.Contains(image, "mariadb") || strings.Contains(image, "mysql") { dbType = DBTypeMariaDB } else { continue } db := DiscoveredDB{ ContainerID: id, ContainerName: name, DBType: dbType, StackName: deriveStackName(name), } // Get env vars from container if err := populateDBEnv(ctx, &db); err != nil { logger.Printf("[WARN] Could not read env vars for %s: %v", name, err) continue } dbs = append(dbs, db) } return dbs, nil } // DumpAll dumps all discovered databases. func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger) []DumpResult { // Clean up old .tmp files (older than 1 hour) cleanupTmpFiles(dumpDir, logger) var results []DumpResult for _, db := range dbs { result := DumpOne(ctx, db, dumpDir, logger) results = append(results, result) } return results } // DumpOne dumps a single database. func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger) DumpResult { start := time.Now() result := DumpResult{DB: db} // Ensure dump directory exists if err := os.MkdirAll(dumpDir, 0755); err != nil { result.Error = fmt.Errorf("creating dump dir: %w", err) result.Duration = time.Since(start) return result } filename := fmt.Sprintf("%s-%s.sql", db.StackName, db.DBType) tmpPath := filepath.Join(dumpDir, filename+".tmp") finalPath := filepath.Join(dumpDir, filename) // 5-minute timeout per dump dumpCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) defer cancel() // Verify container is still running checkCmd := exec.CommandContext(dumpCtx, "docker", "inspect", "--format", "{{.State.Running}}", db.ContainerID) checkOut, err := checkCmd.Output() if err != nil || strings.TrimSpace(string(checkOut)) != "true" { result.Error = fmt.Errorf("container %s no longer running", db.ContainerName) result.Duration = time.Since(start) return result } // Build dump command var cmd *exec.Cmd switch db.DBType { case DBTypePostgres: cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID, "pg_dump", "-U", db.DBUser, "-d", db.DBName, "--clean", "--if-exists", "--no-owner", "--no-privileges") case DBTypeMariaDB: // Get root password from container env password := getMariaDBPassword(dumpCtx, db.ContainerID) if password == "" { result.Error = fmt.Errorf("could not determine MariaDB root password for %s", db.ContainerName) result.Duration = time.Since(start) return result } cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID, "mariadb-dump", "-u", "root", "-p"+password, "--single-transaction", "--routines", "--triggers", db.DBName) default: result.Error = fmt.Errorf("unsupported DB type: %s", db.DBType) result.Duration = time.Since(start) return result } // Write output to tmp file tmpFile, err := os.Create(tmpPath) if err != nil { result.Error = fmt.Errorf("creating tmp file: %w", err) result.Duration = time.Since(start) return result } cmd.Stdout = tmpFile var stderr strings.Builder cmd.Stderr = &stderr err = cmd.Run() tmpFile.Close() if err != nil { os.Remove(tmpPath) errMsg := stderr.String() if len(errMsg) > 200 { errMsg = errMsg[:200] } result.Error = fmt.Errorf("dump failed: %v — %s", err, errMsg) result.Duration = time.Since(start) return result } // Check file size stat, err := os.Stat(tmpPath) if err != nil || stat.Size() == 0 { os.Remove(tmpPath) result.Error = fmt.Errorf("dump produced empty file for %s", db.ContainerName) result.Duration = time.Since(start) return result } // Rename tmp to final if err := os.Rename(tmpPath, finalPath); err != nil { os.Remove(tmpPath) result.Error = fmt.Errorf("renaming dump file: %w", err) result.Duration = time.Since(start) return result } result.FilePath = finalPath result.Size = stat.Size() result.Duration = time.Since(start) // Run validation on the dump file result.Validation = ValidateDump(finalPath, db.DBType) logger.Printf("[INFO] DB dump: %s → %s (%s, %s, %d tables)", db.ContainerName, filename, humanizeBytes(stat.Size()), result.Duration.Round(time.Millisecond), result.Validation.TableCount) return result } // ValidateDump checks a SQL dump file for basic structural integrity. func ValidateDump(filePath string, dbType DBType) DumpValidation { log.Printf("[DEBUG] ValidateDump: %s (type=%s)", filePath, dbType) stat, err := os.Stat(filePath) if err != nil { return DumpValidation{Error: fmt.Sprintf("stat failed: %v", err)} } v := DumpValidation{ FileSize: stat.Size(), ModTime: stat.ModTime(), } if stat.Size() < 100 { v.Error = "dump file too small (< 100 bytes)" log.Printf("[WARN] ValidateDump FAIL: %s — %s", filePath, v.Error) return v } // H1: Use bufio.Scanner to read line-by-line instead of loading entire file into memory. // Large dumps (500MB+) would cause massive allocations on every 5-min cache refresh. f, err := os.Open(filePath) if err != nil { v.Error = fmt.Sprintf("read failed: %v", err) log.Printf("[WARN] ValidateDump FAIL: %s — %s", filePath, v.Error) return v } defer f.Close() scanner := bufio.NewScanner(f) // Increase token buffer for very long lines (some SQL lines can be large) scanner.Buffer(make([]byte, 256*1024), 256*1024) lineNum := 0 headerFound := false tableCount := 0 for scanner.Scan() { line := scanner.Text() lineNum++ // Header check — scan first 10 lines for expected dump header // MariaDB 11.4+ prepends a sandbox comment before the header line if lineNum <= 10 && !headerFound { switch dbType { case DBTypeMariaDB: if strings.HasPrefix(line, "-- MariaDB dump") || strings.HasPrefix(line, "-- MySQL dump") || strings.HasPrefix(line, "-- mysqldump") { headerFound = true } case DBTypePostgres: if strings.HasPrefix(line, "-- PostgreSQL database dump") { headerFound = true } } } // Count CREATE TABLE statements upper := strings.ToUpper(strings.TrimSpace(line)) if strings.HasPrefix(upper, "CREATE TABLE") { tableCount++ } } v.TableCount = tableCount if !headerFound { switch dbType { case DBTypeMariaDB: v.Error = "MariaDB dump missing comment header" case DBTypePostgres: v.Error = "PostgreSQL dump missing comment header" } log.Printf("[WARN] ValidateDump FAIL: %s — %s", filePath, v.Error) return v } if tableCount == 0 { v.Error = "no CREATE TABLE statements found" log.Printf("[WARN] ValidateDump FAIL: %s — %s (header was found, scanned %d lines)", filePath, v.Error, lineNum) return v } v.Valid = true log.Printf("[DEBUG] ValidateDump OK: %s — %d tables, header found", filePath, tableCount) return v } // ListDumpFiles returns info about SQL dump files on disk. func ListDumpFiles(dumpDir string) ([]DumpFileInfo, error) { entries, err := os.ReadDir(dumpDir) if err != nil { if os.IsNotExist(err) { return nil, nil } return nil, fmt.Errorf("reading dump dir: %w", err) } var files []DumpFileInfo for _, e := range entries { // M2: Check .tmp before .sql to correctly skip ".sql.tmp" temp files (was dead code before). if e.IsDir() || strings.HasSuffix(e.Name(), ".tmp") { continue } if !strings.HasSuffix(e.Name(), ".sql") { continue } info, err := e.Info() if err != nil { continue } f := DumpFileInfo{ FileName: e.Name(), Size: info.Size(), ModTime: info.ModTime(), } // Parse stack name and DB type from filename: "paperless-ngx-postgres.sql" base := strings.TrimSuffix(e.Name(), ".sql") if strings.HasSuffix(base, "-postgres") { f.StackName = strings.TrimSuffix(base, "-postgres") f.DBType = DBTypePostgres } else if strings.HasSuffix(base, "-mariadb") { f.StackName = strings.TrimSuffix(base, "-mariadb") f.DBType = DBTypeMariaDB } else { f.StackName = base } // Run validation on the file fullPath := filepath.Join(dumpDir, e.Name()) f.Validation = ValidateDump(fullPath, f.DBType) files = append(files, f) } return files, nil } func populateDBEnv(ctx context.Context, db *DiscoveredDB) error { cmd := exec.CommandContext(ctx, "docker", "inspect", db.ContainerID, "--format", "{{range .Config.Env}}{{println .}}{{end}}") out, err := cmd.Output() if err != nil { return err } env := make(map[string]string) for _, line := range strings.Split(string(out), "\n") { if idx := strings.IndexByte(line, '='); idx > 0 { env[line[:idx]] = line[idx+1:] } } switch db.DBType { case DBTypePostgres: db.DBUser = env["POSTGRES_USER"] if db.DBUser == "" { db.DBUser = "postgres" } db.DBName = env["POSTGRES_DB"] if db.DBName == "" { db.DBName = db.DBUser } case DBTypeMariaDB: db.DBName = env["MYSQL_DATABASE"] if db.DBName == "" { db.DBName = env["MARIADB_DATABASE"] } if db.DBName == "" { db.DBName = "mysql" // fallback to dump all } db.DBUser = "root" } return nil } func getMariaDBPassword(ctx context.Context, containerID string) string { cmd := exec.CommandContext(ctx, "docker", "inspect", containerID, "--format", "{{range .Config.Env}}{{println .}}{{end}}") out, err := cmd.Output() if err != nil { return "" } for _, line := range strings.Split(string(out), "\n") { if strings.HasPrefix(line, "MYSQL_ROOT_PASSWORD=") { return strings.TrimPrefix(line, "MYSQL_ROOT_PASSWORD=") } if strings.HasPrefix(line, "MARIADB_ROOT_PASSWORD=") { return strings.TrimPrefix(line, "MARIADB_ROOT_PASSWORD=") } } return "" } // deriveStackName strips known DB suffixes from container name. func deriveStackName(containerName string) string { knownSuffixes := []string{"postgres", "db", "mariadb", "mysql", "database", "redis", "cache"} parts := strings.Split(containerName, "-") if len(parts) <= 1 { return containerName } last := strings.ToLower(parts[len(parts)-1]) for _, suffix := range knownSuffixes { if last == suffix { return strings.Join(parts[:len(parts)-1], "-") } } return containerName } func cleanupTmpFiles(dumpDir string, logger *log.Logger) { entries, err := os.ReadDir(dumpDir) if err != nil { return } cutoff := time.Now().Add(-1 * time.Hour) for _, e := range entries { if !strings.HasSuffix(e.Name(), ".tmp") { continue } info, err := e.Info() if err != nil { continue } if info.ModTime().Before(cutoff) { path := filepath.Join(dumpDir, e.Name()) os.Remove(path) logger.Printf("[INFO] Cleaned up stale tmp file: %s", e.Name()) } } } // M1: formatBytes removed — use humanizeBytes() from appdata.go (same package, no duplication).