v0.24.0 — Pre-testing observability: debug logging, diagnostic dump, startup self-test
- Add [DEBUG] logging across all modules (backup, storage, sync, selfupdate, monitor, notify, report, assets, setup) gated behind logging.level: "debug" - Add /api/debug/dump endpoint returning full controller state JSON (debug only) - Add startup self-test validating 9 subsystems (Docker, dirs, storage, hub, restic repos, metrics DB) with pass/warn/fail summary - New packages: internal/selftest, internal/util - Constructor/signature changes: debug bool params, logger params on RunHealthCheck and BuildReport, smart watchdog probe logging Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -190,6 +190,15 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
|
||||
drive := m.GetAppDrivePath(stack.Name)
|
||||
result[drive] = append(result[drive], stack)
|
||||
}
|
||||
if m.isDebug() {
|
||||
for drive, stacks := range result {
|
||||
names := make([]string, len(stacks))
|
||||
for i, s := range stacks {
|
||||
names[i] = s.Name
|
||||
}
|
||||
m.logger.Printf("[DEBUG] groupStacksByDrive: %s → [%s]", drive, strings.Join(names, ", "))
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -197,10 +206,18 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
|
||||
func (m *Manager) activeDrives() []string {
|
||||
groups := m.groupStacksByDrive()
|
||||
var drives []string
|
||||
var disconnected []string
|
||||
for d := range groups {
|
||||
if m.settings != nil && (m.settings.IsDisconnected(d) || m.settings.IsDecommissioned(d)) {
|
||||
disconnected = append(disconnected, d)
|
||||
}
|
||||
drives = append(drives, d)
|
||||
}
|
||||
sort.Strings(drives)
|
||||
if m.isDebug() {
|
||||
m.logger.Printf("[DEBUG] activeDrives: %d total (%s), %d disconnected/decommissioned",
|
||||
len(drives), strings.Join(drives, ", "), len(disconnected))
|
||||
}
|
||||
return drives
|
||||
}
|
||||
|
||||
@@ -218,7 +235,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
|
||||
start := time.Now()
|
||||
m.logger.Printf("[INFO] Starting database dump run")
|
||||
|
||||
dbs, err := DiscoverDatabases(ctx, m.logger)
|
||||
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
|
||||
if err != nil {
|
||||
m.logger.Printf("[ERROR] Database discovery failed: %v", err)
|
||||
return err
|
||||
@@ -261,7 +278,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
|
||||
|
||||
dumpDir := AppDBDumpPath(drivePath, db.StackName)
|
||||
|
||||
result := DumpOne(ctx, db, dumpDir, m.logger)
|
||||
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
|
||||
results = append(results, result)
|
||||
|
||||
if result.Error != nil {
|
||||
@@ -354,6 +371,9 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
|
||||
driveCount := 0
|
||||
|
||||
for drivePath, stacks := range driveStacks {
|
||||
if m.isDebug() {
|
||||
m.logger.Printf("[DEBUG] runBackupInternal: processing drive %s (%d stacks)", drivePath, len(stacks))
|
||||
}
|
||||
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
|
||||
if err != nil {
|
||||
anyErr = err
|
||||
@@ -473,6 +493,13 @@ func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []St
|
||||
// Deduplicate paths
|
||||
paths = dedup(paths)
|
||||
|
||||
if m.isDebug() {
|
||||
m.logger.Printf("[DEBUG] backupDrive %s: repo=%s, %d include paths:", drivePath, repoPath, len(paths))
|
||||
for _, p := range paths {
|
||||
m.logger.Printf("[DEBUG] %s", p)
|
||||
}
|
||||
}
|
||||
|
||||
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
|
||||
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
|
||||
|
||||
@@ -549,15 +576,27 @@ func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if m.isDebug() {
|
||||
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking %d drives", len(drives))
|
||||
}
|
||||
|
||||
var checkErr error
|
||||
for _, drive := range drives {
|
||||
repoPath := PrimaryResticRepoPath(drive)
|
||||
if !m.restic.RepoExists(repoPath) {
|
||||
if m.isDebug() {
|
||||
m.logger.Printf("[DEBUG] RunIntegrityCheck: skipping %s (repo does not exist)", repoPath)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if m.isDebug() {
|
||||
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking repo %s", repoPath)
|
||||
}
|
||||
if err := m.restic.Check(repoPath); err != nil {
|
||||
m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err)
|
||||
checkErr = err
|
||||
} else if m.isDebug() {
|
||||
m.logger.Printf("[DEBUG] RunIntegrityCheck: repo %s OK", repoPath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -587,12 +626,28 @@ func (m *Manager) RunFullBackup(ctx context.Context) error {
|
||||
}
|
||||
defer m.releaseRunning()
|
||||
|
||||
if m.isDebug() {
|
||||
drives := m.activeDrives()
|
||||
driveStacks := m.groupStacksByDrive()
|
||||
totalStacks := 0
|
||||
for _, s := range driveStacks {
|
||||
totalStacks += len(s)
|
||||
}
|
||||
m.logger.Printf("[DEBUG] RunFullBackup: starting full backup — %d active drives, %d stacks", len(drives), totalStacks)
|
||||
}
|
||||
|
||||
// Step 1: DB dumps
|
||||
if m.isDebug() {
|
||||
m.logger.Printf("[DEBUG] RunFullBackup: phase 1 — database dumps")
|
||||
}
|
||||
if err := m.runDBDumpsInternal(ctx); err != nil {
|
||||
m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway")
|
||||
}
|
||||
|
||||
// Step 2: Restic backup
|
||||
if m.isDebug() {
|
||||
m.logger.Printf("[DEBUG] RunFullBackup: phase 2 — restic snapshots")
|
||||
}
|
||||
return m.runBackupInternal(ctx)
|
||||
}
|
||||
|
||||
@@ -737,7 +792,7 @@ func (m *Manager) GetStackHDDMounts(name string) []string {
|
||||
// DumpStackDB runs a database dump for containers belonging to a specific stack.
|
||||
// Dumps to the stack's home drive: <drive>/backups/primary/<stack>/db-dumps/.
|
||||
func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
|
||||
dbs, err := DiscoverDatabases(ctx, m.logger)
|
||||
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
|
||||
if err != nil {
|
||||
return fmt.Errorf("database discovery failed: %w", err)
|
||||
}
|
||||
@@ -762,7 +817,7 @@ func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
|
||||
m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir)
|
||||
|
||||
for _, db := range stackDBs {
|
||||
result := DumpOne(ctx, db, dumpDir, m.logger)
|
||||
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
|
||||
if result.Error != nil {
|
||||
return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error)
|
||||
}
|
||||
@@ -1019,7 +1074,7 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil {
|
||||
if dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()); err == nil {
|
||||
status.DiscoveredDBs = dbs
|
||||
}
|
||||
|
||||
@@ -1172,6 +1227,11 @@ func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupSta
|
||||
}
|
||||
}
|
||||
|
||||
// isDebug returns true if logging level is "debug".
|
||||
func (m *Manager) isDebug() bool {
|
||||
return m.cfg.Logging.Level == "debug"
|
||||
}
|
||||
|
||||
func dbNames(dbs []DiscoveredDB) string {
|
||||
var names []string
|
||||
for _, db := range dbs {
|
||||
|
||||
Reference in New Issue
Block a user