feat: comprehensive INFO/WARN/ERROR logging across all controller modules
Add structured operational logging at INFO, WARN, and ERROR levels to every controller module. Standardize custom prefixes ([GEO], [SCHED], [SYNC]) to use [INFO/WARN/ERROR] [module] format. Fix misleveled logs (WARN->ERROR for data loss scenarios, WARN->INFO for routine operations). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -856,6 +856,7 @@ func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
|
||||
}
|
||||
stats, err := m.restic.Stats(repoPath)
|
||||
if err != nil {
|
||||
m.logger.Printf("[WARN] [backup] perDriveRepoStats: failed to get stats for %s: %v", drive, err)
|
||||
continue
|
||||
}
|
||||
infos = append(infos, DriveRepoInfo{
|
||||
@@ -866,6 +867,7 @@ func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
|
||||
LatestSnapshot: stats.LatestSnapshot,
|
||||
})
|
||||
}
|
||||
m.logger.Printf("[INFO] [backup] perDriveRepoStats: collected stats for %d drives", len(infos))
|
||||
return infos
|
||||
}
|
||||
|
||||
@@ -918,6 +920,7 @@ func (m *Manager) aggregateRepoStats() *RepoStats {
|
||||
if totalBytes > 0 {
|
||||
agg.TotalSize = humanizeBytes(totalBytes)
|
||||
}
|
||||
m.logger.Printf("[INFO] [backup] Aggregated repo stats: %d snapshots, total size %s", agg.SnapshotCount, agg.TotalSize)
|
||||
return agg
|
||||
}
|
||||
|
||||
@@ -932,6 +935,7 @@ func (m *Manager) listAllDumpFiles() []DumpFileInfo {
|
||||
}
|
||||
}
|
||||
}
|
||||
m.logger.Printf("[INFO] [backup] Found %d DB dump files across drives", len(allFiles))
|
||||
return allFiles
|
||||
}
|
||||
|
||||
@@ -979,7 +983,9 @@ func (m *Manager) saveSnapshotHistory() {
|
||||
}
|
||||
if err := os.Rename(tmp, m.snapshotHistoryFile); err != nil {
|
||||
m.logger.Printf("[WARN] Could not rename snapshot history file: %v", err)
|
||||
return
|
||||
}
|
||||
m.logger.Printf("[INFO] [backup] Saved snapshot history (%d entries)", len(m.snapshotHistory))
|
||||
}
|
||||
|
||||
// loadSnapshotHistoryFromFile reads the persisted snapshot history from disk.
|
||||
|
||||
@@ -203,6 +203,7 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string)
|
||||
|
||||
var errs []string
|
||||
var scheduled, skippedDisabled, skippedWrongSchedule int
|
||||
r.logger.Printf("[INFO] [backup] Cross-drive backup: starting scheduled run for %d configured app(s), schedule=%s", len(configs), schedule)
|
||||
for stackName, cfg := range configs {
|
||||
if !cfg.Enabled {
|
||||
if r.debug {
|
||||
@@ -240,6 +241,8 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string)
|
||||
scheduled, skippedDisabled, skippedWrongSchedule, len(errs))
|
||||
}
|
||||
|
||||
r.logger.Printf("[INFO] [backup] Cross-drive backup complete: %d succeeded, %d failed", scheduled-len(errs), len(errs))
|
||||
|
||||
if len(errs) > 0 {
|
||||
return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; "))
|
||||
}
|
||||
@@ -263,6 +266,7 @@ func (r *CrossDriveRunner) RunAllConfigured(ctx context.Context) error {
|
||||
|
||||
var errs []string
|
||||
var ran int
|
||||
r.logger.Printf("[INFO] [backup] Cross-drive backup: starting all configured app(s), %d total", len(configs))
|
||||
for stackName, cfg := range configs {
|
||||
if !cfg.Enabled {
|
||||
continue
|
||||
@@ -281,6 +285,7 @@ func (r *CrossDriveRunner) RunAllConfigured(ctx context.Context) error {
|
||||
if r.debug {
|
||||
r.logger.Printf("[DEBUG] RunAllConfigured: done — %d ran, %d errors", ran, len(errs))
|
||||
}
|
||||
r.logger.Printf("[INFO] [backup] Cross-drive backup complete: %d succeeded, %d failed", ran-len(errs), len(errs))
|
||||
if len(errs) > 0 {
|
||||
return fmt.Errorf("cross-drive errors: %s", strings.Join(errs, "; "))
|
||||
}
|
||||
@@ -418,6 +423,7 @@ func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBa
|
||||
if r.debug {
|
||||
r.logger.Printf("[DEBUG] runRsyncBackup: rsync failed for %s: %s", srcMount, util.TruncateStr(strings.TrimSpace(string(out)), 500))
|
||||
}
|
||||
r.logger.Printf("[ERROR] [backup] Rsync backup for %s failed: %v", stackName, err)
|
||||
return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out)))
|
||||
}
|
||||
if r.debug {
|
||||
@@ -452,6 +458,7 @@ func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBa
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.Printf("[INFO] [backup] Rsync backup for %s to %s complete", stackName, destDir)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -484,6 +491,7 @@ func (r *CrossDriveRunner) copyStackDBDumps(stackName, destDir string) error {
|
||||
if copied > 0 {
|
||||
r.logger.Printf("[DEBUG] Copied %d DB dump file(s) to %s", copied, destDir)
|
||||
}
|
||||
r.logger.Printf("[INFO] [backup] Copied %d DB dumps for %s", copied, stackName)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -135,6 +135,7 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger, debug bool) ([]D
|
||||
logger.Printf("[DEBUG] DiscoverDatabases: found %d database(s), skipped %d non-DB container(s)", len(dbs), skipped)
|
||||
}
|
||||
|
||||
logger.Printf("[INFO] [backup] Discovered %d databases", len(dbs))
|
||||
return dbs, nil
|
||||
}
|
||||
|
||||
@@ -143,11 +144,17 @@ func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *lo
|
||||
// Clean up old .tmp files (older than 1 hour)
|
||||
cleanupTmpFiles(dumpDir, logger)
|
||||
|
||||
logger.Printf("[INFO] [backup] Starting DB dump for %d databases", len(dbs))
|
||||
var results []DumpResult
|
||||
var failed int
|
||||
for _, db := range dbs {
|
||||
result := DumpOne(ctx, db, dumpDir, logger, debug)
|
||||
results = append(results, result)
|
||||
if result.Error != nil {
|
||||
failed++
|
||||
}
|
||||
}
|
||||
logger.Printf("[INFO] [backup] DB dump complete: %d succeeded, %d failed", len(results)-failed, failed)
|
||||
return results
|
||||
}
|
||||
|
||||
|
||||
@@ -187,6 +187,9 @@ func pruneLocalHistory(histDir string, maxKeep int, logger *log.Logger) {
|
||||
logger.Printf("[DEBUG] Local infra history: pruned old version %s", ts)
|
||||
}
|
||||
}
|
||||
if logger != nil && toDelete > 0 {
|
||||
logger.Printf("[INFO] [backup] Pruning old backup versions: kept %d, removed %d", maxKeep, toDelete)
|
||||
}
|
||||
}
|
||||
|
||||
// sanitizeTimestamp converts an RFC3339 timestamp to a filename-safe format.
|
||||
|
||||
@@ -207,6 +207,7 @@ func (r *ResticManager) Snapshot(repoPath string, paths []string, tags []string)
|
||||
result.Duration, result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded)
|
||||
}
|
||||
|
||||
r.logger.Printf("[INFO] [backup] Restic snapshot complete for %s", repoPath)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -264,6 +265,7 @@ func (r *ResticManager) Check(repoPath string) error {
|
||||
if r.debug {
|
||||
r.logger.Printf("[DEBUG] [restic] Check: repo=%s OK, completed in %s", repoPath, time.Since(start))
|
||||
}
|
||||
r.logger.Printf("[INFO] [backup] Restic check passed for repo %s", repoPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
|
||||
m.logger.Printf("[DEBUG] RestoreApp: using repo=%s, %d restore path(s)", repoPath, len(restorePaths))
|
||||
}
|
||||
|
||||
m.logger.Printf("[WARN] RESTORE starting: stack=%s, snapshot=%s, repo=%s, paths=%v, hasHDD=%v",
|
||||
m.logger.Printf("[INFO] [backup] Starting restore for %s (snapshot=%s, repo=%s, paths=%v, hasHDD=%v)",
|
||||
stackName, snapshotID, repoPath, restorePaths, hasHDD)
|
||||
|
||||
// Stop the app before restore
|
||||
|
||||
@@ -156,14 +156,14 @@ func restoreUserData(ctx context.Context, app *RestorableApp, logger *log.Logger
|
||||
if e.IsDir() {
|
||||
src = strings.TrimRight(src, "/") + "/"
|
||||
if err := os.MkdirAll(dst, 0755); err != nil {
|
||||
logger.Printf("[WARN] Cannot create %s: %v", dst, err)
|
||||
logger.Printf("[ERROR] [backup] Cannot create %s: %v", dst, err)
|
||||
continue
|
||||
}
|
||||
dst = strings.TrimRight(dst, "/") + "/"
|
||||
logger.Printf("[DEBUG] [backup] restoreUserData: %s — rsync dir %s → %s", app.Name, src, dst)
|
||||
cmd := exec.CommandContext(ctx, "rsync", "-a", src, dst)
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
logger.Printf("[WARN] rsync data %s: %v (%s)", name, err, strings.TrimSpace(string(out)))
|
||||
logger.Printf("[ERROR] [backup] rsync data %s: %v (%s)", name, err, strings.TrimSpace(string(out)))
|
||||
} else {
|
||||
restored++
|
||||
}
|
||||
@@ -171,12 +171,12 @@ func restoreUserData(ctx context.Context, app *RestorableApp, logger *log.Logger
|
||||
// Single file — copy directly
|
||||
data, err := os.ReadFile(src)
|
||||
if err != nil {
|
||||
logger.Printf("[WARN] Cannot read %s: %v", src, err)
|
||||
logger.Printf("[ERROR] [backup] Cannot read %s: %v", src, err)
|
||||
continue
|
||||
}
|
||||
logger.Printf("[DEBUG] [backup] restoreUserData: %s — copying file %s (%d bytes)", app.Name, name, len(data))
|
||||
if err := os.WriteFile(dst, data, 0644); err != nil {
|
||||
logger.Printf("[WARN] Cannot write %s: %v", dst, err)
|
||||
logger.Printf("[ERROR] [backup] Cannot write %s: %v", dst, err)
|
||||
} else {
|
||||
restored++
|
||||
}
|
||||
@@ -224,12 +224,12 @@ func restoreDBDumps(app *RestorableApp, logger *log.Logger) error {
|
||||
dst := filepath.Join(destDir, e.Name())
|
||||
data, err := os.ReadFile(src)
|
||||
if err != nil {
|
||||
logger.Printf("[WARN] Cannot read dump %s: %v", e.Name(), err)
|
||||
logger.Printf("[ERROR] [backup] Cannot read dump %s: %v", e.Name(), err)
|
||||
continue
|
||||
}
|
||||
logger.Printf("[DEBUG] [backup] restoreDBDumps: %s — copying %s (%d bytes)", app.Name, e.Name(), len(data))
|
||||
if err := os.WriteFile(dst, data, 0644); err != nil {
|
||||
logger.Printf("[WARN] Cannot write dump %s: %v", e.Name(), err)
|
||||
logger.Printf("[ERROR] [backup] Cannot write dump %s: %v", e.Name(), err)
|
||||
} else {
|
||||
copied++
|
||||
}
|
||||
|
||||
@@ -239,6 +239,7 @@ func mountRawAndBind(ctx context.Context, device string, dm DiskMount, logger *l
|
||||
func addDRFstabEntries(dm DiskMount, logger *log.Logger) error {
|
||||
const fstabPath = "/host-fstab"
|
||||
|
||||
logger.Printf("[INFO] [backup] Adding fstab entries for disaster recovery (%s, UUID=%s)", dm.Label, dm.UUID)
|
||||
logger.Printf("[DEBUG] [backup] addDRFstabEntries: checking fstab for %s (UUID=%s)", dm.Label, dm.UUID)
|
||||
|
||||
data, err := os.ReadFile(fstabPath)
|
||||
@@ -257,20 +258,24 @@ func addDRFstabEntries(dm DiskMount, logger *log.Logger) error {
|
||||
var additions strings.Builder
|
||||
additions.WriteString("\n# Restored by felhom-controller DR\n")
|
||||
|
||||
entryCount := 0
|
||||
if dm.RawMount != "" {
|
||||
// Raw mount entry
|
||||
additions.WriteString(fmt.Sprintf("UUID=%s\t%s\t%s\t%s\t0 2\n",
|
||||
dm.UUID, dm.RawMount, dm.FSType, dm.FstabOptions))
|
||||
entryCount++
|
||||
}
|
||||
|
||||
if dm.BindSubdir != "" && dm.RawMount != "" {
|
||||
// Bind mount entry
|
||||
additions.WriteString(fmt.Sprintf("%s/%s\t%s\tnone\tbind,nofail\t0 0\n",
|
||||
dm.RawMount, dm.BindSubdir, dm.MountPoint))
|
||||
entryCount++
|
||||
} else if dm.RawMount == "" {
|
||||
// Direct mount entry (no bind)
|
||||
additions.WriteString(fmt.Sprintf("UUID=%s\t%s\t%s\t%s\t0 2\n",
|
||||
dm.UUID, dm.MountPoint, dm.FSType, dm.FstabOptions))
|
||||
entryCount++
|
||||
}
|
||||
|
||||
newContent := content + additions.String()
|
||||
@@ -288,6 +293,7 @@ func addDRFstabEntries(dm DiskMount, logger *log.Logger) error {
|
||||
}
|
||||
}
|
||||
|
||||
logger.Printf("[INFO] [backup] Added %d fstab entries for %s", entryCount, dm.Label)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user