feat: comprehensive INFO/WARN/ERROR logging across all controller modules

Add structured operational logging at INFO, WARN, and ERROR levels to
every controller module. Standardize custom prefixes ([GEO], [SCHED],
[SYNC]) to use [INFO/WARN/ERROR] [module] format. Fix misleveled logs
(WARN->ERROR for data loss scenarios, WARN->INFO for routine operations).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 19:58:27 +01:00
parent 95c821deb2
commit 8e61cd7ec4
44 changed files with 326 additions and 44 deletions
@@ -48,11 +48,17 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
if sysInfo.DiskPercent > 0 {
if sysInfo.DiskPercent >= float64(cfg.Monitoring.Thresholds.DiskCritPercent) {
report.Issues = append(report.Issues, fmt.Sprintf("SSD disk usage critical: %.0f%%", sysInfo.DiskPercent))
if logger != nil {
logger.Printf("[WARN] [monitor] Disk (SSD) threshold breached: %.0f%% (limit: %d%%)", sysInfo.DiskPercent, cfg.Monitoring.Thresholds.DiskCritPercent)
}
if debug {
logger.Printf("[DEBUG] [HEALTH] SSD disk: CRITICAL (%.0f%% >= %d%%)", sysInfo.DiskPercent, cfg.Monitoring.Thresholds.DiskCritPercent)
}
} else if sysInfo.DiskPercent >= float64(cfg.Monitoring.Thresholds.DiskWarnPercent) {
report.Warnings = append(report.Warnings, fmt.Sprintf("SSD disk usage high: %.0f%%", sysInfo.DiskPercent))
if logger != nil {
logger.Printf("[WARN] [monitor] Disk (SSD) threshold breached: %.0f%% (limit: %d%%)", sysInfo.DiskPercent, cfg.Monitoring.Thresholds.DiskWarnPercent)
}
if debug {
logger.Printf("[DEBUG] [HEALTH] SSD disk: WARN (%.0f%% >= %d%%)", sysInfo.DiskPercent, cfg.Monitoring.Thresholds.DiskWarnPercent)
}
@@ -68,8 +74,14 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
if sysInfo.HDDConfigured && sysInfo.HDDPercent > 0 {
if sysInfo.HDDPercent >= float64(cfg.Monitoring.Thresholds.DiskCritPercent) {
report.Issues = append(report.Issues, fmt.Sprintf("HDD disk usage critical: %.0f%%", sysInfo.HDDPercent))
if logger != nil {
logger.Printf("[WARN] [monitor] Disk (HDD) threshold breached: %.0f%% (limit: %d%%)", sysInfo.HDDPercent, cfg.Monitoring.Thresholds.DiskCritPercent)
}
} else if sysInfo.HDDPercent >= float64(cfg.Monitoring.Thresholds.DiskWarnPercent) {
report.Warnings = append(report.Warnings, fmt.Sprintf("HDD disk usage high: %.0f%%", sysInfo.HDDPercent))
if logger != nil {
logger.Printf("[WARN] [monitor] Disk (HDD) threshold breached: %.0f%% (limit: %d%%)", sysInfo.HDDPercent, cfg.Monitoring.Thresholds.DiskWarnPercent)
}
}
}
@@ -77,6 +89,9 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
if sysInfo.MemPercent > 0 {
if sysInfo.MemPercent >= float64(cfg.Monitoring.Thresholds.MemoryWarnPercent) {
report.Warnings = append(report.Warnings, fmt.Sprintf("Memory usage high: %.0f%%", sysInfo.MemPercent))
if logger != nil {
logger.Printf("[WARN] [monitor] Memory threshold breached: %.0f%% (limit: %d%%)", sysInfo.MemPercent, cfg.Monitoring.Thresholds.MemoryWarnPercent)
}
if debug {
logger.Printf("[DEBUG] [HEALTH] Memory: WARN (%.0f%% >= %d%%)", sysInfo.MemPercent, cfg.Monitoring.Thresholds.MemoryWarnPercent)
}
@@ -92,6 +107,9 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
if sysInfo.CPUPercent > 0 {
if sysInfo.CPUPercent >= float64(cfg.Monitoring.Thresholds.CPUWarnPercent) {
report.Warnings = append(report.Warnings, fmt.Sprintf("CPU usage high: %.0f%%", sysInfo.CPUPercent))
if logger != nil {
logger.Printf("[WARN] [monitor] CPU threshold breached: %.0f%% (limit: %d%%)", sysInfo.CPUPercent, cfg.Monitoring.Thresholds.CPUWarnPercent)
}
if debug {
logger.Printf("[DEBUG] [HEALTH] CPU: WARN (%.0f%% >= %d%%)", sysInfo.CPUPercent, cfg.Monitoring.Thresholds.CPUWarnPercent)
}
@@ -107,6 +125,9 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
if sysInfo.TemperatureCelsius > 0 {
if sysInfo.TemperatureCelsius >= float64(cfg.Monitoring.Thresholds.TemperatureWarnCelsius) {
report.Warnings = append(report.Warnings, fmt.Sprintf("Temperature high: %.0f°C (%s)", sysInfo.TemperatureCelsius, sysInfo.TemperatureSource))
if logger != nil {
logger.Printf("[WARN] [monitor] Temperature threshold breached: %.0f°C (limit: %d°C)", sysInfo.TemperatureCelsius, cfg.Monitoring.Thresholds.TemperatureWarnCelsius)
}
if debug {
logger.Printf("[DEBUG] [HEALTH] Temperature: WARN (%.0f°C >= %d°C)", sysInfo.TemperatureCelsius, cfg.Monitoring.Thresholds.TemperatureWarnCelsius)
}
@@ -159,6 +180,10 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
report.Status = "warn"
}
if logger != nil {
logger.Printf("[INFO] [monitor] Health check: status=%s", report.Status)
}
if debug {
logger.Printf("[DEBUG] [HEALTH] Final status: %s (issues=%d, warnings=%d, info=%d)",
report.Status, len(report.Issues), len(report.Warnings), len(report.Info))