feat: comprehensive INFO/WARN/ERROR logging across all controller modules

Add structured operational logging at INFO, WARN, and ERROR levels to
every controller module. Standardize custom prefixes ([GEO], [SCHED],
[SYNC]) to use [INFO/WARN/ERROR] [module] format. Fix misleveled logs
(WARN->ERROR for data loss scenarios, WARN->INFO for routine operations).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 19:58:27 +01:00
parent 95c821deb2
commit 8e61cd7ec4
44 changed files with 326 additions and 44 deletions
+6
View File
@@ -856,6 +856,7 @@ func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
}
stats, err := m.restic.Stats(repoPath)
if err != nil {
m.logger.Printf("[WARN] [backup] perDriveRepoStats: failed to get stats for %s: %v", drive, err)
continue
}
infos = append(infos, DriveRepoInfo{
@@ -866,6 +867,7 @@ func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
LatestSnapshot: stats.LatestSnapshot,
})
}
m.logger.Printf("[INFO] [backup] perDriveRepoStats: collected stats for %d drives", len(infos))
return infos
}
@@ -918,6 +920,7 @@ func (m *Manager) aggregateRepoStats() *RepoStats {
if totalBytes > 0 {
agg.TotalSize = humanizeBytes(totalBytes)
}
m.logger.Printf("[INFO] [backup] Aggregated repo stats: %d snapshots, total size %s", agg.SnapshotCount, agg.TotalSize)
return agg
}
@@ -932,6 +935,7 @@ func (m *Manager) listAllDumpFiles() []DumpFileInfo {
}
}
}
m.logger.Printf("[INFO] [backup] Found %d DB dump files across drives", len(allFiles))
return allFiles
}
@@ -979,7 +983,9 @@ func (m *Manager) saveSnapshotHistory() {
}
if err := os.Rename(tmp, m.snapshotHistoryFile); err != nil {
m.logger.Printf("[WARN] Could not rename snapshot history file: %v", err)
return
}
m.logger.Printf("[INFO] [backup] Saved snapshot history (%d entries)", len(m.snapshotHistory))
}
// loadSnapshotHistoryFromFile reads the persisted snapshot history from disk.
+8
View File
@@ -203,6 +203,7 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string)
var errs []string
var scheduled, skippedDisabled, skippedWrongSchedule int
r.logger.Printf("[INFO] [backup] Cross-drive backup: starting scheduled run for %d configured app(s), schedule=%s", len(configs), schedule)
for stackName, cfg := range configs {
if !cfg.Enabled {
if r.debug {
@@ -240,6 +241,8 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string)
scheduled, skippedDisabled, skippedWrongSchedule, len(errs))
}
r.logger.Printf("[INFO] [backup] Cross-drive backup complete: %d succeeded, %d failed", scheduled-len(errs), len(errs))
if len(errs) > 0 {
return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; "))
}
@@ -263,6 +266,7 @@ func (r *CrossDriveRunner) RunAllConfigured(ctx context.Context) error {
var errs []string
var ran int
r.logger.Printf("[INFO] [backup] Cross-drive backup: starting all configured app(s), %d total", len(configs))
for stackName, cfg := range configs {
if !cfg.Enabled {
continue
@@ -281,6 +285,7 @@ func (r *CrossDriveRunner) RunAllConfigured(ctx context.Context) error {
if r.debug {
r.logger.Printf("[DEBUG] RunAllConfigured: done — %d ran, %d errors", ran, len(errs))
}
r.logger.Printf("[INFO] [backup] Cross-drive backup complete: %d succeeded, %d failed", ran-len(errs), len(errs))
if len(errs) > 0 {
return fmt.Errorf("cross-drive errors: %s", strings.Join(errs, "; "))
}
@@ -418,6 +423,7 @@ func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBa
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: rsync failed for %s: %s", srcMount, util.TruncateStr(strings.TrimSpace(string(out)), 500))
}
r.logger.Printf("[ERROR] [backup] Rsync backup for %s failed: %v", stackName, err)
return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out)))
}
if r.debug {
@@ -452,6 +458,7 @@ func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBa
}
}
r.logger.Printf("[INFO] [backup] Rsync backup for %s to %s complete", stackName, destDir)
return nil
}
@@ -484,6 +491,7 @@ func (r *CrossDriveRunner) copyStackDBDumps(stackName, destDir string) error {
if copied > 0 {
r.logger.Printf("[DEBUG] Copied %d DB dump file(s) to %s", copied, destDir)
}
r.logger.Printf("[INFO] [backup] Copied %d DB dumps for %s", copied, stackName)
return nil
}
+7
View File
@@ -135,6 +135,7 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger, debug bool) ([]D
logger.Printf("[DEBUG] DiscoverDatabases: found %d database(s), skipped %d non-DB container(s)", len(dbs), skipped)
}
logger.Printf("[INFO] [backup] Discovered %d databases", len(dbs))
return dbs, nil
}
@@ -143,11 +144,17 @@ func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *lo
// Clean up old .tmp files (older than 1 hour)
cleanupTmpFiles(dumpDir, logger)
logger.Printf("[INFO] [backup] Starting DB dump for %d databases", len(dbs))
var results []DumpResult
var failed int
for _, db := range dbs {
result := DumpOne(ctx, db, dumpDir, logger, debug)
results = append(results, result)
if result.Error != nil {
failed++
}
}
logger.Printf("[INFO] [backup] DB dump complete: %d succeeded, %d failed", len(results)-failed, failed)
return results
}
@@ -187,6 +187,9 @@ func pruneLocalHistory(histDir string, maxKeep int, logger *log.Logger) {
logger.Printf("[DEBUG] Local infra history: pruned old version %s", ts)
}
}
if logger != nil && toDelete > 0 {
logger.Printf("[INFO] [backup] Pruning old backup versions: kept %d, removed %d", maxKeep, toDelete)
}
}
// sanitizeTimestamp converts an RFC3339 timestamp to a filename-safe format.
+2
View File
@@ -207,6 +207,7 @@ func (r *ResticManager) Snapshot(repoPath string, paths []string, tags []string)
result.Duration, result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded)
}
r.logger.Printf("[INFO] [backup] Restic snapshot complete for %s", repoPath)
return result, nil
}
@@ -264,6 +265,7 @@ func (r *ResticManager) Check(repoPath string) error {
if r.debug {
r.logger.Printf("[DEBUG] [restic] Check: repo=%s OK, completed in %s", repoPath, time.Since(start))
}
r.logger.Printf("[INFO] [backup] Restic check passed for repo %s", repoPath)
return nil
}
+1 -1
View File
@@ -87,7 +87,7 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
m.logger.Printf("[DEBUG] RestoreApp: using repo=%s, %d restore path(s)", repoPath, len(restorePaths))
}
m.logger.Printf("[WARN] RESTORE starting: stack=%s, snapshot=%s, repo=%s, paths=%v, hasHDD=%v",
m.logger.Printf("[INFO] [backup] Starting restore for %s (snapshot=%s, repo=%s, paths=%v, hasHDD=%v)",
stackName, snapshotID, repoPath, restorePaths, hasHDD)
// Stop the app before restore
@@ -156,14 +156,14 @@ func restoreUserData(ctx context.Context, app *RestorableApp, logger *log.Logger
if e.IsDir() {
src = strings.TrimRight(src, "/") + "/"
if err := os.MkdirAll(dst, 0755); err != nil {
logger.Printf("[WARN] Cannot create %s: %v", dst, err)
logger.Printf("[ERROR] [backup] Cannot create %s: %v", dst, err)
continue
}
dst = strings.TrimRight(dst, "/") + "/"
logger.Printf("[DEBUG] [backup] restoreUserData: %s — rsync dir %s → %s", app.Name, src, dst)
cmd := exec.CommandContext(ctx, "rsync", "-a", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
logger.Printf("[WARN] rsync data %s: %v (%s)", name, err, strings.TrimSpace(string(out)))
logger.Printf("[ERROR] [backup] rsync data %s: %v (%s)", name, err, strings.TrimSpace(string(out)))
} else {
restored++
}
@@ -171,12 +171,12 @@ func restoreUserData(ctx context.Context, app *RestorableApp, logger *log.Logger
// Single file — copy directly
data, err := os.ReadFile(src)
if err != nil {
logger.Printf("[WARN] Cannot read %s: %v", src, err)
logger.Printf("[ERROR] [backup] Cannot read %s: %v", src, err)
continue
}
logger.Printf("[DEBUG] [backup] restoreUserData: %s — copying file %s (%d bytes)", app.Name, name, len(data))
if err := os.WriteFile(dst, data, 0644); err != nil {
logger.Printf("[WARN] Cannot write %s: %v", dst, err)
logger.Printf("[ERROR] [backup] Cannot write %s: %v", dst, err)
} else {
restored++
}
@@ -224,12 +224,12 @@ func restoreDBDumps(app *RestorableApp, logger *log.Logger) error {
dst := filepath.Join(destDir, e.Name())
data, err := os.ReadFile(src)
if err != nil {
logger.Printf("[WARN] Cannot read dump %s: %v", e.Name(), err)
logger.Printf("[ERROR] [backup] Cannot read dump %s: %v", e.Name(), err)
continue
}
logger.Printf("[DEBUG] [backup] restoreDBDumps: %s — copying %s (%d bytes)", app.Name, e.Name(), len(data))
if err := os.WriteFile(dst, data, 0644); err != nil {
logger.Printf("[WARN] Cannot write dump %s: %v", e.Name(), err)
logger.Printf("[ERROR] [backup] Cannot write dump %s: %v", e.Name(), err)
} else {
copied++
}
@@ -239,6 +239,7 @@ func mountRawAndBind(ctx context.Context, device string, dm DiskMount, logger *l
func addDRFstabEntries(dm DiskMount, logger *log.Logger) error {
const fstabPath = "/host-fstab"
logger.Printf("[INFO] [backup] Adding fstab entries for disaster recovery (%s, UUID=%s)", dm.Label, dm.UUID)
logger.Printf("[DEBUG] [backup] addDRFstabEntries: checking fstab for %s (UUID=%s)", dm.Label, dm.UUID)
data, err := os.ReadFile(fstabPath)
@@ -257,20 +258,24 @@ func addDRFstabEntries(dm DiskMount, logger *log.Logger) error {
var additions strings.Builder
additions.WriteString("\n# Restored by felhom-controller DR\n")
entryCount := 0
if dm.RawMount != "" {
// Raw mount entry
additions.WriteString(fmt.Sprintf("UUID=%s\t%s\t%s\t%s\t0 2\n",
dm.UUID, dm.RawMount, dm.FSType, dm.FstabOptions))
entryCount++
}
if dm.BindSubdir != "" && dm.RawMount != "" {
// Bind mount entry
additions.WriteString(fmt.Sprintf("%s/%s\t%s\tnone\tbind,nofail\t0 0\n",
dm.RawMount, dm.BindSubdir, dm.MountPoint))
entryCount++
} else if dm.RawMount == "" {
// Direct mount entry (no bind)
additions.WriteString(fmt.Sprintf("UUID=%s\t%s\t%s\t%s\t0 2\n",
dm.UUID, dm.MountPoint, dm.FSType, dm.FstabOptions))
entryCount++
}
newContent := content + additions.String()
@@ -288,6 +293,7 @@ func addDRFstabEntries(dm DiskMount, logger *log.Logger) error {
}
}
logger.Printf("[INFO] [backup] Added %d fstab entries for %s", entryCount, dm.Label)
return nil
}