v0.24.0 — Pre-testing observability: debug logging, diagnostic dump, startup self-test

- Add [DEBUG] logging across all modules (backup, storage, sync, selfupdate,
  monitor, notify, report, assets, setup) gated behind logging.level: "debug"
- Add /api/debug/dump endpoint returning full controller state JSON (debug only)
- Add startup self-test validating 9 subsystems (Docker, dirs, storage, hub,
  restic repos, metrics DB) with pass/warn/fail summary
- New packages: internal/selftest, internal/util
- Constructor/signature changes: debug bool params, logger params on
  RunHealthCheck and BuildReport, smart watchdog probe logging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 18:32:26 +01:00
parent 6f02536243
commit be7803c0ac
30 changed files with 1281 additions and 67 deletions
+65 -5
View File
@@ -190,6 +190,15 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
drive := m.GetAppDrivePath(stack.Name)
result[drive] = append(result[drive], stack)
}
if m.isDebug() {
for drive, stacks := range result {
names := make([]string, len(stacks))
for i, s := range stacks {
names[i] = s.Name
}
m.logger.Printf("[DEBUG] groupStacksByDrive: %s → [%s]", drive, strings.Join(names, ", "))
}
}
return result
}
@@ -197,10 +206,18 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
func (m *Manager) activeDrives() []string {
groups := m.groupStacksByDrive()
var drives []string
var disconnected []string
for d := range groups {
if m.settings != nil && (m.settings.IsDisconnected(d) || m.settings.IsDecommissioned(d)) {
disconnected = append(disconnected, d)
}
drives = append(drives, d)
}
sort.Strings(drives)
if m.isDebug() {
m.logger.Printf("[DEBUG] activeDrives: %d total (%s), %d disconnected/decommissioned",
len(drives), strings.Join(drives, ", "), len(disconnected))
}
return drives
}
@@ -218,7 +235,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
start := time.Now()
m.logger.Printf("[INFO] Starting database dump run")
dbs, err := DiscoverDatabases(ctx, m.logger)
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
if err != nil {
m.logger.Printf("[ERROR] Database discovery failed: %v", err)
return err
@@ -261,7 +278,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
dumpDir := AppDBDumpPath(drivePath, db.StackName)
result := DumpOne(ctx, db, dumpDir, m.logger)
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
results = append(results, result)
if result.Error != nil {
@@ -354,6 +371,9 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
driveCount := 0
for drivePath, stacks := range driveStacks {
if m.isDebug() {
m.logger.Printf("[DEBUG] runBackupInternal: processing drive %s (%d stacks)", drivePath, len(stacks))
}
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
if err != nil {
anyErr = err
@@ -473,6 +493,13 @@ func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []St
// Deduplicate paths
paths = dedup(paths)
if m.isDebug() {
m.logger.Printf("[DEBUG] backupDrive %s: repo=%s, %d include paths:", drivePath, repoPath, len(paths))
for _, p := range paths {
m.logger.Printf("[DEBUG] %s", p)
}
}
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
@@ -549,15 +576,27 @@ func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
return nil
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking %d drives", len(drives))
}
var checkErr error
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: skipping %s (repo does not exist)", repoPath)
}
continue
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking repo %s", repoPath)
}
if err := m.restic.Check(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err)
checkErr = err
} else if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: repo %s OK", repoPath)
}
}
@@ -587,12 +626,28 @@ func (m *Manager) RunFullBackup(ctx context.Context) error {
}
defer m.releaseRunning()
if m.isDebug() {
drives := m.activeDrives()
driveStacks := m.groupStacksByDrive()
totalStacks := 0
for _, s := range driveStacks {
totalStacks += len(s)
}
m.logger.Printf("[DEBUG] RunFullBackup: starting full backup — %d active drives, %d stacks", len(drives), totalStacks)
}
// Step 1: DB dumps
if m.isDebug() {
m.logger.Printf("[DEBUG] RunFullBackup: phase 1 — database dumps")
}
if err := m.runDBDumpsInternal(ctx); err != nil {
m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway")
}
// Step 2: Restic backup
if m.isDebug() {
m.logger.Printf("[DEBUG] RunFullBackup: phase 2 — restic snapshots")
}
return m.runBackupInternal(ctx)
}
@@ -737,7 +792,7 @@ func (m *Manager) GetStackHDDMounts(name string) []string {
// DumpStackDB runs a database dump for containers belonging to a specific stack.
// Dumps to the stack's home drive: <drive>/backups/primary/<stack>/db-dumps/.
func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
dbs, err := DiscoverDatabases(ctx, m.logger)
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
if err != nil {
return fmt.Errorf("database discovery failed: %w", err)
}
@@ -762,7 +817,7 @@ func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir)
for _, db := range stackDBs {
result := DumpOne(ctx, db, dumpDir, m.logger)
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
if result.Error != nil {
return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error)
}
@@ -1019,7 +1074,7 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil {
if dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()); err == nil {
status.DiscoveredDBs = dbs
}
@@ -1172,6 +1227,11 @@ func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupSta
}
}
// isDebug returns true if logging level is "debug".
func (m *Manager) isDebug() bool {
return m.cfg.Logging.Level == "debug"
}
func dbNames(dbs []DiscoveredDB) string {
var names []string
for _, db := range dbs {
+101 -2
View File
@@ -13,6 +13,7 @@ import (
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
"gitea.dooplex.hu/admin/felhom-controller/internal/util"
)
// DBDumper can run a database dump for a specific stack.
@@ -29,12 +30,13 @@ type CrossDriveRunner struct {
stacksDir string // path to stacks dir (for infra backup)
controllerYAMLPath string // path to controller.yaml (for infra backup)
logger *log.Logger
debug bool
mu sync.Mutex
running map[string]bool // per-app running state
}
// NewCrossDriveRunner creates a new CrossDriveRunner.
func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger) *CrossDriveRunner {
func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger, debug bool) *CrossDriveRunner {
return &CrossDriveRunner{
sett: sett,
stackProvider: provider,
@@ -42,6 +44,7 @@ func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, sy
stacksDir: stacksDir,
controllerYAMLPath: "/opt/docker/felhom-controller/controller.yaml",
logger: logger,
debug: debug,
running: make(map[string]bool),
}
}
@@ -67,6 +70,11 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
return fmt.Errorf("cross-drive backup not configured or disabled for %s", stackName)
}
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: starting for %s, dest=%s, schedule=%s, method=%s",
stackName, cfg.DestinationPath, cfg.Schedule, cfg.Method)
}
// Prevent concurrent runs for the same app
r.mu.Lock()
if r.running[stackName] {
@@ -84,12 +92,18 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
// Check if source or destination drive is disconnected
srcDrive := r.stackProvider.GetStackHDDPath(stackName)
if srcDrive != "" && r.sett.IsDisconnected(srcDrive) {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: source drive disconnected for %s: %s", stackName, srcDrive)
}
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
return fmt.Errorf("source drive disconnected: %s", srcDrive)
}
if r.sett.IsDisconnected(cfg.DestinationPath) {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: destination drive disconnected for %s: %s", stackName, cfg.DestinationPath)
}
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
@@ -107,6 +121,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
// Trigger fresh DB dump for this app before cross-drive backup
if r.dbDumper != nil {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: triggering pre-backup DB dump for %s", stackName)
}
if err := r.dbDumper.DumpStackDB(ctx, stackName); err != nil {
r.logger.Printf("[WARN] Pre-backup DB dump failed for %s: %v — proceeding with user data backup", stackName, err)
// Non-fatal: user data backup is still valuable without fresh dump
@@ -120,6 +137,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
// Resolve HDD mounts for this app (may be empty for config-only apps)
mounts := r.stackProvider.GetStackHDDMounts(stackName)
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: %s has %d HDD mount(s): %v", stackName, len(mounts), mounts)
}
// Safety: destination must not overlap with any source
for _, m := range mounts {
@@ -145,6 +165,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
destDir := AppSecondaryRsyncPath(cfg.DestinationPath, stackName)
if sz, err := dirSizeBytes(destDir); err == nil {
sizeHuman = humanizeBytes(sz)
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: %s backup size at destination: %s", stackName, sizeHuman)
}
}
r.logger.Printf("[INFO] Cross-drive backup completed: %s (%s)", stackName, duration.Round(time.Second))
@@ -155,6 +178,10 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
// RunAllScheduled runs cross-drive backups for all apps matching the schedule.
// Runs sequentially (disk I/O bound).
func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) error {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: starting for schedule=%s", schedule)
}
// Auto-enable Tier 2 for small apps (no HDD mounts) before running backups
r.AutoEnableSmallApps()
@@ -163,18 +190,39 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string)
configs := r.sett.GetAllCrossDriveConfigs()
if len(configs) == 0 {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: no cross-drive configs found")
}
return nil
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: %d total cross-drive config(s) found", len(configs))
}
var errs []string
var scheduled, skippedDisabled, skippedWrongSchedule int
for stackName, cfg := range configs {
if !cfg.Enabled {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — disabled", stackName)
}
skippedDisabled++
continue
}
if cfg.Schedule != schedule {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — schedule mismatch (has=%s, want=%s)", stackName, cfg.Schedule, schedule)
}
skippedWrongSchedule++
continue
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: queuing %s for backup (dest=%s)", stackName, cfg.DestinationPath)
}
scheduled++
select {
case <-ctx.Done():
return ctx.Err()
@@ -186,6 +234,11 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string)
}
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: done — %d scheduled, %d disabled, %d wrong schedule, %d errors",
scheduled, skippedDisabled, skippedWrongSchedule, len(errs))
}
if len(errs) > 0 {
return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; "))
}
@@ -216,6 +269,9 @@ func (r *CrossDriveRunner) AnyRunning() bool {
// (≥10 GB free, <90% used) to protect OS stability; external drives just need
// ≥100 MB. Non-mount-point destinations are allowed with a logged warning.
func (r *CrossDriveRunner) ValidateDestination(path string) error {
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: checking path=%s", path)
}
if path == "" {
return fmt.Errorf("destination path is empty")
}
@@ -226,6 +282,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error {
return fmt.Errorf("destination %s does not exist", path)
}
onSystemDrive := !system.IsMountPoint(path)
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s, isMountPoint=%v", path, !onSystemDrive)
}
if onSystemDrive {
r.logger.Printf("[WARN] Destination %s is not a separate mount point (system drive) — backup will proceed but data is not protected against drive failure", path)
}
@@ -237,6 +296,10 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error {
r.logger.Printf("[WARN] Cannot determine disk usage for %s — proceeding without space verification", path)
return nil
}
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s, availGB=%.1f, usedPct=%.0f%%, onSystemDrive=%v",
path, di.AvailGB, di.UsedPercent, onSystemDrive)
}
if onSystemDrive {
// System drive: protect OS stability — require ≥10 GB free and <90% used
if di.AvailGB < 10 {
@@ -251,6 +314,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error {
return fmt.Errorf("destination %s has insufficient free space (%.1f GB free)", path, di.AvailGB)
}
}
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s passed all checks", path)
}
return nil
}
@@ -258,6 +324,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error {
func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
destDir := AppSecondaryRsyncPath(destBase, stackName)
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: stack=%s, destBase=%s, destDir=%s, %d mount(s)", stackName, destBase, destDir, len(mounts))
}
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("creating rsync dest dir: %w", err)
}
@@ -296,9 +365,16 @@ func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBa
"--exclude", "backups/*.dump",
src, dst)
r.logger.Printf("[DEBUG] rsync: %s → %s", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
out, err := cmd.CombinedOutput()
if err != nil {
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: rsync failed for %s: %s", srcMount, util.TruncateStr(strings.TrimSpace(string(out)), 500))
}
return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out)))
}
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: rsync OK for mount %s → %s", src, dst)
}
}
// --- Copy DB dumps for this stack from its home drive ---
@@ -423,20 +499,35 @@ func (r *CrossDriveRunner) syncInfraConfig(ctx context.Context) {
func (r *CrossDriveRunner) AutoEnableSmallApps() {
storagePaths := r.sett.GetStoragePaths()
if len(storagePaths) < 2 {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: fewer than 2 storage paths (%d) — skipping", len(storagePaths))
}
return // no secondary drive available
}
deployed := r.stackProvider.ListDeployedStacks()
existingConfigs := r.sett.GetAllCrossDriveConfigs()
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: %d deployed stacks, %d existing configs, %d storage paths",
len(deployed), len(existingConfigs), len(storagePaths))
}
var autoEnabled int
for _, stack := range deployed {
// Skip if already has cross-drive config (user has touched it)
if _, exists := existingConfigs[stack.Name]; exists {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — already has cross-drive config", stack.Name)
}
continue
}
// Skip if app has HDD mounts (large user data — needs manual config)
if mounts := r.stackProvider.GetStackHDDMounts(stack.Name); len(mounts) > 0 {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — has %d HDD mount(s)", stack.Name, len(mounts))
}
continue
}
@@ -450,6 +541,9 @@ func (r *CrossDriveRunner) AutoEnableSmallApps() {
}
}
if destPath == "" {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — no suitable destination found", stack.Name)
}
continue // no suitable destination found
}
@@ -464,8 +558,13 @@ func (r *CrossDriveRunner) AutoEnableSmallApps() {
r.logger.Printf("[WARN] Auto-enable Tier 2 failed for %s: %v", stack.Name, err)
continue
}
autoEnabled++
r.logger.Printf("[INFO] Auto-enabled Tier 2 backup for %s → %s (no HDD mounts, daily rsync)", stack.Name, destPath)
}
if r.debug && autoEnabled > 0 {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: auto-enabled %d app(s)", autoEnabled)
}
}
// --- helpers ---
+68 -4
View File
@@ -11,6 +11,8 @@ import (
"path/filepath"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/util"
)
// DBType represents a database engine type.
@@ -61,14 +63,22 @@ type DumpFileInfo struct {
}
// DiscoverDatabases finds running database containers via docker ps.
func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB, error) {
func DiscoverDatabases(ctx context.Context, logger *log.Logger, debug bool) ([]DiscoveredDB, error) {
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: running docker ps to find database containers")
}
cmd := exec.CommandContext(ctx, "docker", "ps", "--format", "{{.ID}}\t{{.Names}}\t{{.Image}}", "--filter", "status=running")
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("docker ps failed: %w", err)
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: docker ps output: %s", util.TruncateStr(strings.TrimSpace(string(out)), 500))
}
var dbs []DiscoveredDB
var skipped int
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
if line == "" {
@@ -87,9 +97,17 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB,
} else if strings.Contains(image, "mariadb") || strings.Contains(image, "mysql") {
dbType = DBTypeMariaDB
} else {
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: skipping container %s (image=%s, not a database)", name, image)
}
skipped++
continue
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: found %s container: %s (id=%s)", dbType, name, id[:12])
}
db := DiscoveredDB{
ContainerID: id,
ContainerName: name,
@@ -100,33 +118,49 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB,
// Get env vars from container
if err := populateDBEnv(ctx, &db); err != nil {
logger.Printf("[WARN] Could not read env vars for %s: %v", name, err)
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: skipping %s — env read failed", name)
}
continue
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: %s → stack=%s, dbUser=%s, dbName=%s", name, db.StackName, db.DBUser, db.DBName)
}
dbs = append(dbs, db)
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: found %d database(s), skipped %d non-DB container(s)", len(dbs), skipped)
}
return dbs, nil
}
// DumpAll dumps all discovered databases.
func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger) []DumpResult {
func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) []DumpResult {
// Clean up old .tmp files (older than 1 hour)
cleanupTmpFiles(dumpDir, logger)
var results []DumpResult
for _, db := range dbs {
result := DumpOne(ctx, db, dumpDir, logger)
result := DumpOne(ctx, db, dumpDir, logger, debug)
results = append(results, result)
}
return results
}
// DumpOne dumps a single database.
func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger) DumpResult {
func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) DumpResult {
start := time.Now()
result := DumpResult{DB: db}
if debug {
logger.Printf("[DEBUG] DumpOne: starting dump for container=%s, stack=%s, dbType=%s, dumpDir=%s",
db.ContainerName, db.StackName, db.DBType, dumpDir)
}
// Ensure dump directory exists
if err := os.MkdirAll(dumpDir, 0755); err != nil {
result.Error = fmt.Errorf("creating dump dir: %w", err)
@@ -148,6 +182,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
if err != nil || strings.TrimSpace(string(checkOut)) != "true" {
result.Error = fmt.Errorf("container %s no longer running", db.ContainerName)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: container %s is no longer running — skipping", db.ContainerName)
}
return result
}
@@ -158,14 +195,29 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
"pg_dump", "-U", db.DBUser, "-d", db.DBName,
"--clean", "--if-exists", "--no-owner", "--no-privileges")
if debug {
logger.Printf("[DEBUG] DumpOne: pg_dump command: docker exec %s pg_dump -U %s -d %s --clean --if-exists --no-owner --no-privileges",
db.ContainerID[:12], db.DBUser, db.DBName)
}
case DBTypeMariaDB:
// Get root password from container env
password := getMariaDBPassword(dumpCtx, db.ContainerID)
if password == "" {
result.Error = fmt.Errorf("could not determine MariaDB root password for %s", db.ContainerName)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: MariaDB root password not found for %s — skipping", db.ContainerName)
}
return result
}
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
"mariadb-dump", "-u", "root", "-p***",
"--single-transaction", "--routines", "--triggers", db.DBName)
if debug {
logger.Printf("[DEBUG] DumpOne: mariadb-dump command: docker exec %s mariadb-dump -u root -p*** --single-transaction --routines --triggers %s",
db.ContainerID[:12], db.DBName)
}
// Actual command with real password (not logged)
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
"mariadb-dump", "-u", "root", "-p"+password,
"--single-transaction", "--routines", "--triggers", db.DBName)
@@ -198,6 +250,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
}
result.Error = fmt.Errorf("dump failed: %v — %s", err, errMsg)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: dump command failed for %s: %v", db.ContainerName, result.Error)
}
return result
}
@@ -207,6 +262,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
os.Remove(tmpPath)
result.Error = fmt.Errorf("dump produced empty file for %s", db.ContainerName)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: dump produced empty file for %s", db.ContainerName)
}
return result
}
@@ -225,6 +283,12 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
// Run validation on the dump file
result.Validation = ValidateDump(finalPath, db.DBType)
if debug {
logger.Printf("[DEBUG] DumpOne: completed %s → %s (size=%s, valid=%v, tables=%d, duration=%s)",
db.ContainerName, filename, humanizeBytes(stat.Size()),
result.Validation.Valid, result.Validation.TableCount, result.Duration.Round(time.Millisecond))
}
logger.Printf("[INFO] DB dump: %s → %s (%s, %s, %d tables)", db.ContainerName, filename,
humanizeBytes(stat.Size()), result.Duration.Round(time.Millisecond), result.Validation.TableCount)
+11 -1
View File
@@ -24,12 +24,16 @@ type InfraMetadata struct {
// WriteLocalInfraBackup writes the infra backup to .felhom-infra-backup/ on each drive.
// Individual drive failures are logged but not returned — the function is best-effort.
func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, timestamp string, drives []string, logger *log.Logger) {
func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, timestamp string, drives []string, logger *log.Logger, debug bool) {
if len(drives) == 0 {
logger.Printf("[DEBUG] No drives configured for local infra backup")
return
}
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: payload size=%d bytes, %d target drive(s): %v", len(backupJSON), len(drives), drives)
}
// Compute checksum of backup data
hash := sha256.Sum256(backupJSON)
checksum := hex.EncodeToString(hash[:])
@@ -51,10 +55,16 @@ func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, tim
written := 0
for _, drive := range drives {
dir := InfraBackupDir(drive)
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: writing to drive=%s, dir=%s", drive, dir)
}
if err := writeInfraToDir(dir, backupJSON, metaJSON); err != nil {
logger.Printf("[WARN] Local infra backup: failed to write to %s: %v", drive, err)
continue
}
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: write OK to %s", drive)
}
written++
}
@@ -20,7 +20,7 @@ func TestWriteAndReadLocalInfraBackup(t *testing.T) {
backupJSON := []byte(`{"customer_id":"test-123","domain":"test.hu","controller_version":"v0.21.0","timestamp":"2026-02-21T10:00:00Z"}`)
logger := testLogger(t)
WriteLocalInfraBackup(backupJSON, "test-123", "v0.21.0", "2026-02-21T10:00:00Z", []string{drive}, logger)
WriteLocalInfraBackup(backupJSON, "test-123", "v0.21.0", "2026-02-21T10:00:00Z", []string{drive}, logger, false)
// Verify files exist
dir := InfraBackupDir(drive)
@@ -124,7 +124,7 @@ func TestWriteLocalInfraBackup_MultipleDrives(t *testing.T) {
backupJSON := []byte(`{"test":"multi"}`)
logger := testLogger(t)
WriteLocalInfraBackup(backupJSON, "multi-test", "v1.0", "2026-01-01T00:00:00Z", drives, logger)
WriteLocalInfraBackup(backupJSON, "multi-test", "v1.0", "2026-01-01T00:00:00Z", drives, logger, false)
// All 3 should succeed
for _, d := range drives {
@@ -142,7 +142,7 @@ func TestWriteLocalInfraBackup_MultipleDrives(t *testing.T) {
func TestWriteLocalInfraBackup_NoDrives(t *testing.T) {
logger := testLogger(t)
// Should not panic
WriteLocalInfraBackup([]byte(`{}`), "test", "v1.0", "2026-01-01T00:00:00Z", nil, logger)
WriteLocalInfraBackup([]byte(`{}`), "test", "v1.0", "2026-01-01T00:00:00Z", nil, logger, false)
}
func contains(s, substr string) bool {
+36
View File
@@ -21,6 +21,10 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
return fmt.Errorf("invalid snapshot ID: must be 8-64 lowercase hex characters")
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: stack=%s, snapshotID=%s", stackName, snapshotID)
}
// Prevent concurrent operations
m.mu.Lock()
if m.running {
@@ -39,6 +43,10 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
hddMounts := m.stackProvider.GetStackHDDMounts(stackName)
hasHDD := len(hddMounts) > 0
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: %s has %d HDD mount(s), hasHDD=%v", stackName, len(hddMounts), hasHDD)
}
// Build list of paths to restore from the snapshot
var restorePaths []string
@@ -47,16 +55,25 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
if ok {
stackDir := filepath.Dir(composePath)
restorePaths = append(restorePaths, stackDir)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: will restore config dir: %s", stackDir)
}
}
// Restore DB dump files for this stack (per-drive path)
drivePath := m.GetAppDrivePath(stackName)
dumpDir := AppDBDumpPath(drivePath, stackName)
restorePaths = append(restorePaths, dumpDir)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: will restore DB dump dir: %s", dumpDir)
}
// Restore HDD data (always included for apps that have it — backup is mandatory)
if hasHDD {
restorePaths = append(restorePaths, hddMounts...)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: will restore HDD data: %v", hddMounts)
}
}
if len(restorePaths) == 0 {
@@ -66,17 +83,30 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
// Use the app's primary restic repo
repoPath := PrimaryResticRepoPath(drivePath)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: using repo=%s, %d restore path(s)", repoPath, len(restorePaths))
}
m.logger.Printf("[WARN] RESTORE starting: stack=%s, snapshot=%s, repo=%s, paths=%v, hasHDD=%v",
stackName, snapshotID, repoPath, restorePaths, hasHDD)
// Stop the app before restore
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 1/4 — stopping app %s", stackName)
}
if err := m.stackProvider.StopStack(stackName); err != nil {
m.logger.Printf("[WARN] RESTORE could not stop %s: %v (proceeding anyway)", stackName, err)
}
// Execute restore via restic
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 2/4 — restoring data from snapshot %s", snapshotID)
}
if err := m.restic.RestoreAppData(repoPath, snapshotID, restorePaths); err != nil {
m.logger.Printf("[ERROR] RESTORE failed for %s: %v", stackName, err)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 3/4 — restarting app %s after failure", stackName)
}
if startErr := m.stackProvider.StartStack(stackName); startErr != nil {
m.logger.Printf("[WARN] RESTORE could not restart %s after failure: %v", stackName, startErr)
}
@@ -84,6 +114,9 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
}
// Restart the app
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 3/4 — restarting app %s after successful restore", stackName)
}
if err := m.stackProvider.StartStack(stackName); err != nil {
m.logger.Printf("[WARN] RESTORE could not restart %s after restore: %v", stackName, err)
}
@@ -92,6 +125,9 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
if hasHDD {
restoreType = "full (config+DB+userdata)"
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 4/4 — restore completed, type=%s", restoreType)
}
m.logger.Printf("[INFO] RESTORE completed: stack=%s, snapshot=%s, type=%s", stackName, snapshotID, restoreType)
return nil
}