v0.24.0 — Pre-testing observability: debug logging, diagnostic dump, startup self-test

- Add [DEBUG] logging across all modules (backup, storage, sync, selfupdate,
  monitor, notify, report, assets, setup) gated behind logging.level: "debug"
- Add /api/debug/dump endpoint returning full controller state JSON (debug only)
- Add startup self-test validating 9 subsystems (Docker, dirs, storage, hub,
  restic repos, metrics DB) with pass/warn/fail summary
- New packages: internal/selftest, internal/util
- Constructor/signature changes: debug bool params, logger params on
  RunHealthCheck and BuildReport, smart watchdog probe logging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 18:32:26 +01:00
parent 6f02536243
commit be7803c0ac
30 changed files with 1281 additions and 67 deletions
+65 -5
View File
@@ -190,6 +190,15 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
drive := m.GetAppDrivePath(stack.Name)
result[drive] = append(result[drive], stack)
}
if m.isDebug() {
for drive, stacks := range result {
names := make([]string, len(stacks))
for i, s := range stacks {
names[i] = s.Name
}
m.logger.Printf("[DEBUG] groupStacksByDrive: %s → [%s]", drive, strings.Join(names, ", "))
}
}
return result
}
@@ -197,10 +206,18 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
func (m *Manager) activeDrives() []string {
groups := m.groupStacksByDrive()
var drives []string
var disconnected []string
for d := range groups {
if m.settings != nil && (m.settings.IsDisconnected(d) || m.settings.IsDecommissioned(d)) {
disconnected = append(disconnected, d)
}
drives = append(drives, d)
}
sort.Strings(drives)
if m.isDebug() {
m.logger.Printf("[DEBUG] activeDrives: %d total (%s), %d disconnected/decommissioned",
len(drives), strings.Join(drives, ", "), len(disconnected))
}
return drives
}
@@ -218,7 +235,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
start := time.Now()
m.logger.Printf("[INFO] Starting database dump run")
dbs, err := DiscoverDatabases(ctx, m.logger)
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
if err != nil {
m.logger.Printf("[ERROR] Database discovery failed: %v", err)
return err
@@ -261,7 +278,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
dumpDir := AppDBDumpPath(drivePath, db.StackName)
result := DumpOne(ctx, db, dumpDir, m.logger)
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
results = append(results, result)
if result.Error != nil {
@@ -354,6 +371,9 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
driveCount := 0
for drivePath, stacks := range driveStacks {
if m.isDebug() {
m.logger.Printf("[DEBUG] runBackupInternal: processing drive %s (%d stacks)", drivePath, len(stacks))
}
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
if err != nil {
anyErr = err
@@ -473,6 +493,13 @@ func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []St
// Deduplicate paths
paths = dedup(paths)
if m.isDebug() {
m.logger.Printf("[DEBUG] backupDrive %s: repo=%s, %d include paths:", drivePath, repoPath, len(paths))
for _, p := range paths {
m.logger.Printf("[DEBUG] %s", p)
}
}
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
@@ -549,15 +576,27 @@ func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
return nil
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking %d drives", len(drives))
}
var checkErr error
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: skipping %s (repo does not exist)", repoPath)
}
continue
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking repo %s", repoPath)
}
if err := m.restic.Check(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err)
checkErr = err
} else if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: repo %s OK", repoPath)
}
}
@@ -587,12 +626,28 @@ func (m *Manager) RunFullBackup(ctx context.Context) error {
}
defer m.releaseRunning()
if m.isDebug() {
drives := m.activeDrives()
driveStacks := m.groupStacksByDrive()
totalStacks := 0
for _, s := range driveStacks {
totalStacks += len(s)
}
m.logger.Printf("[DEBUG] RunFullBackup: starting full backup — %d active drives, %d stacks", len(drives), totalStacks)
}
// Step 1: DB dumps
if m.isDebug() {
m.logger.Printf("[DEBUG] RunFullBackup: phase 1 — database dumps")
}
if err := m.runDBDumpsInternal(ctx); err != nil {
m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway")
}
// Step 2: Restic backup
if m.isDebug() {
m.logger.Printf("[DEBUG] RunFullBackup: phase 2 — restic snapshots")
}
return m.runBackupInternal(ctx)
}
@@ -737,7 +792,7 @@ func (m *Manager) GetStackHDDMounts(name string) []string {
// DumpStackDB runs a database dump for containers belonging to a specific stack.
// Dumps to the stack's home drive: <drive>/backups/primary/<stack>/db-dumps/.
func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
dbs, err := DiscoverDatabases(ctx, m.logger)
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
if err != nil {
return fmt.Errorf("database discovery failed: %w", err)
}
@@ -762,7 +817,7 @@ func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir)
for _, db := range stackDBs {
result := DumpOne(ctx, db, dumpDir, m.logger)
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
if result.Error != nil {
return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error)
}
@@ -1019,7 +1074,7 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil {
if dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()); err == nil {
status.DiscoveredDBs = dbs
}
@@ -1172,6 +1227,11 @@ func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupSta
}
}
// isDebug returns true if logging level is "debug".
func (m *Manager) isDebug() bool {
return m.cfg.Logging.Level == "debug"
}
func dbNames(dbs []DiscoveredDB) string {
var names []string
for _, db := range dbs {