v0.24.0 — Pre-testing observability: debug logging, diagnostic dump, startup self-test

- Add [DEBUG] logging across all modules (backup, storage, sync, selfupdate,
  monitor, notify, report, assets, setup) gated behind logging.level: "debug"
- Add /api/debug/dump endpoint returning full controller state JSON (debug only)
- Add startup self-test validating 9 subsystems (Docker, dirs, storage, hub,
  restic repos, metrics DB) with pass/warn/fail summary
- New packages: internal/selftest, internal/util
- Constructor/signature changes: debug bool params, logger params on
  RunHealthCheck and BuildReport, smart watchdog probe logging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 18:32:26 +01:00
parent 6f02536243
commit be7803c0ac
30 changed files with 1281 additions and 67 deletions
+68 -4
View File
@@ -11,6 +11,8 @@ import (
"path/filepath"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/util"
)
// DBType represents a database engine type.
@@ -61,14 +63,22 @@ type DumpFileInfo struct {
}
// DiscoverDatabases finds running database containers via docker ps.
func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB, error) {
func DiscoverDatabases(ctx context.Context, logger *log.Logger, debug bool) ([]DiscoveredDB, error) {
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: running docker ps to find database containers")
}
cmd := exec.CommandContext(ctx, "docker", "ps", "--format", "{{.ID}}\t{{.Names}}\t{{.Image}}", "--filter", "status=running")
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("docker ps failed: %w", err)
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: docker ps output: %s", util.TruncateStr(strings.TrimSpace(string(out)), 500))
}
var dbs []DiscoveredDB
var skipped int
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
if line == "" {
@@ -87,9 +97,17 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB,
} else if strings.Contains(image, "mariadb") || strings.Contains(image, "mysql") {
dbType = DBTypeMariaDB
} else {
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: skipping container %s (image=%s, not a database)", name, image)
}
skipped++
continue
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: found %s container: %s (id=%s)", dbType, name, id[:12])
}
db := DiscoveredDB{
ContainerID: id,
ContainerName: name,
@@ -100,33 +118,49 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB,
// Get env vars from container
if err := populateDBEnv(ctx, &db); err != nil {
logger.Printf("[WARN] Could not read env vars for %s: %v", name, err)
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: skipping %s — env read failed", name)
}
continue
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: %s → stack=%s, dbUser=%s, dbName=%s", name, db.StackName, db.DBUser, db.DBName)
}
dbs = append(dbs, db)
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: found %d database(s), skipped %d non-DB container(s)", len(dbs), skipped)
}
return dbs, nil
}
// DumpAll dumps all discovered databases.
func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger) []DumpResult {
func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) []DumpResult {
// Clean up old .tmp files (older than 1 hour)
cleanupTmpFiles(dumpDir, logger)
var results []DumpResult
for _, db := range dbs {
result := DumpOne(ctx, db, dumpDir, logger)
result := DumpOne(ctx, db, dumpDir, logger, debug)
results = append(results, result)
}
return results
}
// DumpOne dumps a single database.
func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger) DumpResult {
func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) DumpResult {
start := time.Now()
result := DumpResult{DB: db}
if debug {
logger.Printf("[DEBUG] DumpOne: starting dump for container=%s, stack=%s, dbType=%s, dumpDir=%s",
db.ContainerName, db.StackName, db.DBType, dumpDir)
}
// Ensure dump directory exists
if err := os.MkdirAll(dumpDir, 0755); err != nil {
result.Error = fmt.Errorf("creating dump dir: %w", err)
@@ -148,6 +182,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
if err != nil || strings.TrimSpace(string(checkOut)) != "true" {
result.Error = fmt.Errorf("container %s no longer running", db.ContainerName)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: container %s is no longer running — skipping", db.ContainerName)
}
return result
}
@@ -158,14 +195,29 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
"pg_dump", "-U", db.DBUser, "-d", db.DBName,
"--clean", "--if-exists", "--no-owner", "--no-privileges")
if debug {
logger.Printf("[DEBUG] DumpOne: pg_dump command: docker exec %s pg_dump -U %s -d %s --clean --if-exists --no-owner --no-privileges",
db.ContainerID[:12], db.DBUser, db.DBName)
}
case DBTypeMariaDB:
// Get root password from container env
password := getMariaDBPassword(dumpCtx, db.ContainerID)
if password == "" {
result.Error = fmt.Errorf("could not determine MariaDB root password for %s", db.ContainerName)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: MariaDB root password not found for %s — skipping", db.ContainerName)
}
return result
}
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
"mariadb-dump", "-u", "root", "-p***",
"--single-transaction", "--routines", "--triggers", db.DBName)
if debug {
logger.Printf("[DEBUG] DumpOne: mariadb-dump command: docker exec %s mariadb-dump -u root -p*** --single-transaction --routines --triggers %s",
db.ContainerID[:12], db.DBName)
}
// Actual command with real password (not logged)
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
"mariadb-dump", "-u", "root", "-p"+password,
"--single-transaction", "--routines", "--triggers", db.DBName)
@@ -198,6 +250,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
}
result.Error = fmt.Errorf("dump failed: %v — %s", err, errMsg)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: dump command failed for %s: %v", db.ContainerName, result.Error)
}
return result
}
@@ -207,6 +262,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
os.Remove(tmpPath)
result.Error = fmt.Errorf("dump produced empty file for %s", db.ContainerName)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: dump produced empty file for %s", db.ContainerName)
}
return result
}
@@ -225,6 +283,12 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
// Run validation on the dump file
result.Validation = ValidateDump(finalPath, db.DBType)
if debug {
logger.Printf("[DEBUG] DumpOne: completed %s → %s (size=%s, valid=%v, tables=%d, duration=%s)",
db.ContainerName, filename, humanizeBytes(stat.Size()),
result.Validation.Valid, result.Validation.TableCount, result.Duration.Round(time.Millisecond))
}
logger.Printf("[INFO] DB dump: %s → %s (%s, %s, %d tables)", db.ContainerName, filename,
humanizeBytes(stat.Size()), result.Duration.Round(time.Millisecond), result.Validation.TableCount)