package backup import ( "bufio" "context" "fmt" "io" "log" "os" "os/exec" "path/filepath" "strings" "time" "gitea.dooplex.hu/admin/felhom-controller/internal/util" ) // DBType represents a database engine type. type DBType string const ( DBTypePostgres DBType = "postgres" DBTypeMariaDB DBType = "mariadb" ) // DiscoveredDB holds metadata about a running database container. type DiscoveredDB struct { ContainerName string ContainerID string DBType DBType DBUser string DBName string StackName string } // DumpResult holds the outcome of a single database dump. type DumpResult struct { DB DiscoveredDB FilePath string Size int64 Duration time.Duration Error error Validation DumpValidation } // DumpValidation holds the result of a dump file structural check. type DumpValidation struct { Valid bool TableCount int Error string FileSize int64 ModTime time.Time } // DumpFileInfo holds info about a dump file on disk. type DumpFileInfo struct { FileName string StackName string DBType DBType Size int64 ModTime time.Time Validation DumpValidation } // DiscoverDatabases finds running database containers via docker ps. func DiscoverDatabases(ctx context.Context, logger *log.Logger, debug bool) ([]DiscoveredDB, error) { if debug { logger.Printf("[DEBUG] DiscoverDatabases: running docker ps to find database containers") } cmd := exec.CommandContext(ctx, "docker", "ps", "--format", "{{.ID}}\t{{.Names}}\t{{.Image}}", "--filter", "status=running") out, err := cmd.Output() if err != nil { return nil, fmt.Errorf("docker ps failed: %w", err) } if debug { logger.Printf("[DEBUG] DiscoverDatabases: docker ps output: %s", util.TruncateStr(strings.TrimSpace(string(out)), 500)) } var dbs []DiscoveredDB var skipped int for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { if line == "" { continue } parts := strings.SplitN(line, "\t", 3) if len(parts) < 3 { continue } id, name, image := parts[0], parts[1], strings.ToLower(parts[2]) var dbType DBType if strings.Contains(image, "postgres") { dbType = DBTypePostgres } else if strings.Contains(image, "mariadb") || strings.Contains(image, "mysql") { dbType = DBTypeMariaDB } else { if debug { logger.Printf("[DEBUG] DiscoverDatabases: skipping container %s (image=%s, not a database)", name, image) } skipped++ continue } if debug { logger.Printf("[DEBUG] DiscoverDatabases: found %s container: %s (id=%s)", dbType, name, id[:12]) } db := DiscoveredDB{ ContainerID: id, ContainerName: name, DBType: dbType, StackName: deriveStackName(name), } // Get env vars from container if err := populateDBEnv(ctx, &db); err != nil { logger.Printf("[WARN] Could not read env vars for %s: %v", name, err) if debug { logger.Printf("[DEBUG] DiscoverDatabases: skipping %s — env read failed", name) } continue } if debug { logger.Printf("[DEBUG] DiscoverDatabases: %s → stack=%s, dbUser=%s, dbName=%s", name, db.StackName, db.DBUser, db.DBName) } dbs = append(dbs, db) } if debug { logger.Printf("[DEBUG] DiscoverDatabases: found %d database(s), skipped %d non-DB container(s)", len(dbs), skipped) } return dbs, nil } // DumpAll dumps all discovered databases. func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) []DumpResult { // Clean up old .tmp files (older than 1 hour) cleanupTmpFiles(dumpDir, logger) var results []DumpResult for _, db := range dbs { result := DumpOne(ctx, db, dumpDir, logger, debug) results = append(results, result) } return results } // DumpOne dumps a single database. func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) DumpResult { start := time.Now() result := DumpResult{DB: db} if debug { logger.Printf("[DEBUG] DumpOne: starting dump for container=%s, stack=%s, dbType=%s, dumpDir=%s", db.ContainerName, db.StackName, db.DBType, dumpDir) } // Ensure dump directory exists if err := os.MkdirAll(dumpDir, 0755); err != nil { result.Error = fmt.Errorf("creating dump dir: %w", err) result.Duration = time.Since(start) return result } filename := fmt.Sprintf("%s-%s.sql", db.StackName, db.DBType) tmpPath := filepath.Join(dumpDir, filename+".tmp") finalPath := filepath.Join(dumpDir, filename) // 5-minute timeout per dump dumpCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) defer cancel() // Verify container is still running checkCmd := exec.CommandContext(dumpCtx, "docker", "inspect", "--format", "{{.State.Running}}", db.ContainerID) checkOut, err := checkCmd.Output() if err != nil || strings.TrimSpace(string(checkOut)) != "true" { result.Error = fmt.Errorf("container %s no longer running", db.ContainerName) result.Duration = time.Since(start) if debug { logger.Printf("[DEBUG] DumpOne: container %s is no longer running — skipping", db.ContainerName) } return result } // Build dump command var cmd *exec.Cmd switch db.DBType { case DBTypePostgres: cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID, "pg_dump", "-U", db.DBUser, "-d", db.DBName, "--clean", "--if-exists", "--no-owner", "--no-privileges") if debug { logger.Printf("[DEBUG] DumpOne: pg_dump command: docker exec %s pg_dump -U %s -d %s --clean --if-exists --no-owner --no-privileges", db.ContainerID[:12], db.DBUser, db.DBName) } case DBTypeMariaDB: // Get root password from container env password := getMariaDBPassword(dumpCtx, db.ContainerID) if password == "" { result.Error = fmt.Errorf("could not determine MariaDB root password for %s", db.ContainerName) result.Duration = time.Since(start) if debug { logger.Printf("[DEBUG] DumpOne: MariaDB root password not found for %s — skipping", db.ContainerName) } return result } cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID, "mariadb-dump", "-u", "root", "-p***", "--single-transaction", "--routines", "--triggers", db.DBName) if debug { logger.Printf("[DEBUG] DumpOne: mariadb-dump command: docker exec %s mariadb-dump -u root -p*** --single-transaction --routines --triggers %s", db.ContainerID[:12], db.DBName) } // Actual command with real password (not logged) cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID, "mariadb-dump", "-u", "root", "-p"+password, "--single-transaction", "--routines", "--triggers", db.DBName) default: result.Error = fmt.Errorf("unsupported DB type: %s", db.DBType) result.Duration = time.Since(start) return result } // Write output to tmp file tmpFile, err := os.Create(tmpPath) if err != nil { result.Error = fmt.Errorf("creating tmp file: %w", err) result.Duration = time.Since(start) return result } defer tmpFile.Close() cmd.Stdout = tmpFile var stderr strings.Builder cmd.Stderr = &stderr err = cmd.Run() if err != nil { os.Remove(tmpPath) errMsg := stderr.String() if len(errMsg) > 200 { errMsg = errMsg[:200] } result.Error = fmt.Errorf("dump failed: %v — %s", err, errMsg) result.Duration = time.Since(start) if debug { logger.Printf("[DEBUG] DumpOne: dump command failed for %s: %v", db.ContainerName, result.Error) } return result } // Close and sync tmpFile before rename to ensure data is flushed to disk (H8 fix). if err := tmpFile.Sync(); err != nil { os.Remove(tmpPath) result.Error = fmt.Errorf("syncing dump file: %w", err) result.Duration = time.Since(start) return result } if err := tmpFile.Close(); err != nil { os.Remove(tmpPath) result.Error = fmt.Errorf("closing dump file: %w", err) result.Duration = time.Since(start) return result } // Check file size stat, err := os.Stat(tmpPath) if err != nil || stat.Size() == 0 { os.Remove(tmpPath) result.Error = fmt.Errorf("dump produced empty file for %s", db.ContainerName) result.Duration = time.Since(start) if debug { logger.Printf("[DEBUG] DumpOne: dump produced empty file for %s", db.ContainerName) } return result } // Rename tmp to final if err := os.Rename(tmpPath, finalPath); err != nil { os.Remove(tmpPath) result.Error = fmt.Errorf("renaming dump file: %w", err) result.Duration = time.Since(start) return result } result.FilePath = finalPath result.Size = stat.Size() result.Duration = time.Since(start) // Run validation on the dump file result.Validation = ValidateDump(finalPath, db.DBType) if debug { logger.Printf("[DEBUG] DumpOne: completed %s → %s (size=%s, valid=%v, tables=%d, duration=%s)", db.ContainerName, filename, humanizeBytes(stat.Size()), result.Validation.Valid, result.Validation.TableCount, result.Duration.Round(time.Millisecond)) } logger.Printf("[INFO] DB dump: %s → %s (%s, %s, %d tables)", db.ContainerName, filename, humanizeBytes(stat.Size()), result.Duration.Round(time.Millisecond), result.Validation.TableCount) return result } // ValidateDump checks a SQL dump file for basic structural integrity. func ValidateDump(filePath string, dbType DBType) DumpValidation { log.Printf("[DEBUG] ValidateDump: %s (type=%s)", filePath, dbType) stat, err := os.Stat(filePath) if err != nil { return DumpValidation{Error: fmt.Sprintf("stat failed: %v", err)} } v := DumpValidation{ FileSize: stat.Size(), ModTime: stat.ModTime(), } if stat.Size() < 100 { v.Error = "dump file too small (< 100 bytes)" log.Printf("[WARN] ValidateDump FAIL: %s — %s", filePath, v.Error) return v } // H1: Use bufio.Scanner to read line-by-line instead of loading entire file into memory. // Large dumps (500MB+) would cause massive allocations on every 5-min cache refresh. f, err := os.Open(filePath) if err != nil { v.Error = fmt.Sprintf("read failed: %v", err) log.Printf("[WARN] ValidateDump FAIL: %s — %s", filePath, v.Error) return v } defer f.Close() // Use bufio.Reader instead of Scanner: ReadLine gracefully handles lines // longer than the buffer (isPrefix=true) so we can skip them. Only short // lines matter (headers, CREATE TABLE). Long COPY/INSERT data lines // (e.g., Immich's binary-encoded image data) are skipped without allocating. reader := bufio.NewReaderSize(f, 256*1024) lineNum := 0 headerFound := false tableCount := 0 for { lineBytes, isPrefix, err := reader.ReadLine() if err != nil { if err != io.EOF { v.Error = fmt.Sprintf("hiba az olvasás közben: %v", err) log.Printf("[WARN] ValidateDump FAIL: %s — read error: %v", filePath, err) return v } break // EOF } if isPrefix { // Line exceeds buffer — skip remainder (COPY data, large INSERTs) for isPrefix && err == nil { _, isPrefix, err = reader.ReadLine() } continue } line := string(lineBytes) lineNum++ // Header check — scan first 10 lines for expected dump header // MariaDB 11.4+ prepends a sandbox comment before the header line if lineNum <= 10 && !headerFound { switch dbType { case DBTypeMariaDB: if strings.HasPrefix(line, "-- MariaDB dump") || strings.HasPrefix(line, "-- MySQL dump") || strings.HasPrefix(line, "-- mysqldump") { headerFound = true } case DBTypePostgres: if strings.HasPrefix(line, "-- PostgreSQL database dump") { headerFound = true } } } // Count CREATE TABLE statements upper := strings.ToUpper(strings.TrimSpace(line)) if strings.HasPrefix(upper, "CREATE TABLE") { tableCount++ } } v.TableCount = tableCount if !headerFound { switch dbType { case DBTypeMariaDB: v.Error = "MariaDB dump missing comment header" case DBTypePostgres: v.Error = "PostgreSQL dump missing comment header" } log.Printf("[WARN] ValidateDump FAIL: %s — %s", filePath, v.Error) return v } if tableCount == 0 { v.Error = "no CREATE TABLE statements found" log.Printf("[WARN] ValidateDump FAIL: %s — %s (header was found, scanned %d lines)", filePath, v.Error, lineNum) return v } v.Valid = true log.Printf("[DEBUG] ValidateDump OK: %s — %d tables, header found", filePath, tableCount) return v } // ListDumpFiles returns info about SQL dump files on disk. func ListDumpFiles(dumpDir string) ([]DumpFileInfo, error) { entries, err := os.ReadDir(dumpDir) if err != nil { if os.IsNotExist(err) { return nil, nil } return nil, fmt.Errorf("reading dump dir: %w", err) } var files []DumpFileInfo for _, e := range entries { // M2: Check .tmp before .sql to correctly skip ".sql.tmp" temp files (was dead code before). if e.IsDir() || strings.HasSuffix(e.Name(), ".tmp") { continue } if !strings.HasSuffix(e.Name(), ".sql") { continue } info, err := e.Info() if err != nil { continue } f := DumpFileInfo{ FileName: e.Name(), Size: info.Size(), ModTime: info.ModTime(), } // Parse stack name and DB type from filename: "paperless-ngx-postgres.sql" base := strings.TrimSuffix(e.Name(), ".sql") if strings.HasSuffix(base, "-postgres") { f.StackName = strings.TrimSuffix(base, "-postgres") f.DBType = DBTypePostgres } else if strings.HasSuffix(base, "-mariadb") { f.StackName = strings.TrimSuffix(base, "-mariadb") f.DBType = DBTypeMariaDB } else { f.StackName = base } // Run validation on the file fullPath := filepath.Join(dumpDir, e.Name()) f.Validation = ValidateDump(fullPath, f.DBType) files = append(files, f) } return files, nil } func populateDBEnv(ctx context.Context, db *DiscoveredDB) error { cmd := exec.CommandContext(ctx, "docker", "inspect", db.ContainerID, "--format", "{{range .Config.Env}}{{println .}}{{end}}") out, err := cmd.Output() if err != nil { return err } env := make(map[string]string) for _, line := range strings.Split(string(out), "\n") { if idx := strings.IndexByte(line, '='); idx > 0 { env[line[:idx]] = line[idx+1:] } } switch db.DBType { case DBTypePostgres: db.DBUser = env["POSTGRES_USER"] if db.DBUser == "" { db.DBUser = "postgres" } db.DBName = env["POSTGRES_DB"] if db.DBName == "" { db.DBName = db.DBUser } case DBTypeMariaDB: db.DBName = env["MYSQL_DATABASE"] if db.DBName == "" { db.DBName = env["MARIADB_DATABASE"] } if db.DBName == "" { db.DBName = "mysql" // fallback to dump all } db.DBUser = "root" } return nil } func getMariaDBPassword(ctx context.Context, containerID string) string { cmd := exec.CommandContext(ctx, "docker", "inspect", containerID, "--format", "{{range .Config.Env}}{{println .}}{{end}}") out, err := cmd.Output() if err != nil { return "" } for _, line := range strings.Split(string(out), "\n") { if strings.HasPrefix(line, "MYSQL_ROOT_PASSWORD=") { return strings.TrimPrefix(line, "MYSQL_ROOT_PASSWORD=") } if strings.HasPrefix(line, "MARIADB_ROOT_PASSWORD=") { return strings.TrimPrefix(line, "MARIADB_ROOT_PASSWORD=") } } return "" } // deriveStackName strips known DB suffixes from container name. func deriveStackName(containerName string) string { knownSuffixes := []string{"postgres", "db", "mariadb", "mysql", "database", "redis", "cache"} parts := strings.Split(containerName, "-") if len(parts) <= 1 { return containerName } last := strings.ToLower(parts[len(parts)-1]) for _, suffix := range knownSuffixes { if last == suffix { return strings.Join(parts[:len(parts)-1], "-") } } return containerName } func cleanupTmpFiles(dumpDir string, logger *log.Logger) { entries, err := os.ReadDir(dumpDir) if err != nil { return } cutoff := time.Now().Add(-1 * time.Hour) for _, e := range entries { if !strings.HasSuffix(e.Name(), ".tmp") { continue } info, err := e.Info() if err != nil { continue } if info.ModTime().Before(cutoff) { path := filepath.Join(dumpDir, e.Name()) os.Remove(path) logger.Printf("[INFO] Cleaned up stale tmp file: %s", e.Name()) } } } // M1: formatBytes removed — use humanizeBytes() from appdata.go (same package, no duplication).