v0.24.0 — Pre-testing observability: debug logging, diagnostic dump, startup self-test

- Add [DEBUG] logging across all modules (backup, storage, sync, selfupdate,
  monitor, notify, report, assets, setup) gated behind logging.level: "debug"
- Add /api/debug/dump endpoint returning full controller state JSON (debug only)
- Add startup self-test validating 9 subsystems (Docker, dirs, storage, hub,
  restic repos, metrics DB) with pass/warn/fail summary
- New packages: internal/selftest, internal/util
- Constructor/signature changes: debug bool params, logger params on
  RunHealthCheck and BuildReport, smart watchdog probe logging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 18:32:26 +01:00
parent 6f02536243
commit be7803c0ac
30 changed files with 1281 additions and 67 deletions
+214
View File
@@ -2,6 +2,8 @@ package api
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
@@ -17,12 +19,16 @@ import (
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
"gitea.dooplex.hu/admin/felhom-controller/internal/notify"
"gitea.dooplex.hu/admin/felhom-controller/internal/report"
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
"gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
)
// Router handles all /api/* requests.
@@ -45,6 +51,13 @@ type Router struct {
// Asset syncer for on-demand Hub asset sync
assetsSyncer *assets.Syncer
// Debug dump dependencies (set via setters)
scheduler *scheduler.Scheduler
hubPusher *report.Pusher
alertMgr *web.AlertManager
version string
startTime time.Time
}
// SetAssetsSyncer sets the Hub asset syncer for on-demand sync triggers.
@@ -52,6 +65,15 @@ func (r *Router) SetAssetsSyncer(as *assets.Syncer) {
r.assetsSyncer = as
}
// SetDebugDumpDeps sets optional dependencies for the /api/debug/dump endpoint.
func (r *Router) SetDebugDumpDeps(sched *scheduler.Scheduler, pusher *report.Pusher, alertMgr *web.AlertManager, version string, startTime time.Time) {
r.scheduler = sched
r.hubPusher = pusher
r.alertMgr = alertMgr
r.version = version
r.startTime = startTime
}
func NewRouter(cfg *config.Config, configPath string, sett *settings.Settings, stackMgr *stacks.Manager, syncer *catalogsync.Syncer, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, metricsStore *metrics.MetricsStore, updater *selfupdate.Updater, notif *notify.Notifier, logger *log.Logger) *Router {
return &Router{cfg: cfg, configPath: configPath, sett: sett, stackMgr: stackMgr, syncer: syncer, cpuCollector: cpuCollector, backupMgr: backupMgr, crossDriveRunner: crossDrive, metricsStore: metricsStore, updater: updater, notifier: notif, logger: logger}
}
@@ -214,6 +236,10 @@ func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
case path == "/assets/status" && req.Method == http.MethodGet:
r.assetSyncStatus(w, req)
// GET /api/debug/dump — diagnostic JSON dump (debug mode only)
case path == "/debug/dump" && req.Method == http.MethodGet:
r.debugDump(w, req)
default:
writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"})
}
@@ -1046,6 +1072,194 @@ func (r *Router) assetSyncStatus(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: r.assetsSyncer.Status()})
}
// --- Debug dump handler ---
func (r *Router) debugDump(w http.ResponseWriter, req *http.Request) {
if r.cfg.Logging.Level != "debug" {
writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"})
return
}
dump := make(map[string]interface{})
// Controller info
configHash := ""
if data, err := os.ReadFile(r.configPath); err == nil {
h := sha256.Sum256(data)
configHash = hex.EncodeToString(h[:])
}
dump["controller"] = map[string]interface{}{
"version": r.version,
"uptime_seconds": int(time.Since(r.startTime).Seconds()),
"config_hash": configHash,
"logging_level": r.cfg.Logging.Level,
"pid": os.Getpid(),
}
// Storage
storagePaths := r.sett.GetStoragePaths()
storageEntries := make([]map[string]interface{}, 0, len(storagePaths))
for _, sp := range storagePaths {
entry := map[string]interface{}{
"path": sp.Path,
"label": sp.Label,
"disconnected": sp.Disconnected,
"decommissioned": sp.Decommissioned,
}
if !sp.Disconnected && !sp.Decommissioned {
if di := system.GetDiskUsage(sp.Path); di != nil {
entry["total_gb"] = di.TotalGB
entry["used_gb"] = di.UsedGB
entry["used_percent"] = di.UsedPercent
}
}
storageEntries = append(storageEntries, entry)
}
dump["storage"] = storageEntries
// Stacks
allStacks := r.stackMgr.GetStacks()
deployed := 0
running := 0
stopped := 0
stackList := make([]map[string]interface{}, 0)
for _, s := range allStacks {
if !s.Deployed {
continue
}
deployed++
info := map[string]interface{}{
"name": s.Name,
"state": string(s.State),
}
if s.Meta.DisplayName != "" {
info["display_name"] = s.Meta.DisplayName
}
containerNames := make([]string, 0, len(s.Containers))
for _, c := range s.Containers {
containerNames = append(containerNames, c.Name)
switch c.State {
case stacks.StateRunning, stacks.StateStarting, stacks.StateUnhealthy:
running++
default:
stopped++
}
}
info["containers"] = containerNames
stackList = append(stackList, info)
}
dump["stacks"] = map[string]interface{}{
"deployed": deployed,
"running": running,
"stopped": stopped,
"list": stackList,
}
// Backup
if r.backupMgr != nil {
backupInfo := map[string]interface{}{
"enabled": true,
"running": r.backupMgr.IsRunning(),
}
dbDump, backupSt := r.backupMgr.GetStatus()
if dbDump != nil {
backupInfo["last_db_dump"] = map[string]interface{}{
"time": dbDump.LastRun,
"success": dbDump.Success,
}
}
if backupSt != nil {
backupInfo["last_backup"] = map[string]interface{}{
"time": backupSt.LastRun,
"success": backupSt.Success,
}
if backupSt.RepoStats != nil {
backupInfo["repo_size"] = backupSt.RepoStats.TotalSize
backupInfo["snapshot_count"] = backupSt.RepoStats.SnapshotCount
}
}
dump["backup"] = backupInfo
} else {
dump["backup"] = map[string]interface{}{"enabled": false}
}
// Hub
hubInfo := map[string]interface{}{
"url": r.cfg.Hub.URL,
"enabled": r.cfg.Hub.Enabled,
}
if r.hubPusher != nil {
s := r.hubPusher.GetStatus()
hubInfo["last_attempt"] = s.LastAttempt
hubInfo["last_success"] = s.LastSuccess
hubInfo["last_error"] = s.LastError
hubInfo["consecutive_failures"] = s.Consecutive
}
dump["hub"] = hubInfo
// Scheduler
if r.scheduler != nil {
jobs := r.scheduler.GetJobs()
jobList := make([]map[string]interface{}, 0, len(jobs))
for _, j := range jobs {
entry := map[string]interface{}{
"name": j.Name,
"running": j.Running,
}
if j.Interval > 0 {
entry["type"] = "every"
entry["interval"] = j.Interval.String()
} else if j.Schedule != "" {
entry["type"] = "daily"
entry["schedule"] = j.Schedule
}
if !j.LastRun.IsZero() {
entry["last_run"] = j.LastRun
}
if j.LastErr != nil {
entry["last_error"] = j.LastErr.Error()
}
jobList = append(jobList, entry)
}
dump["scheduler"] = jobList
}
// Health (fresh check)
healthReport := monitor.RunHealthCheck(r.cfg, r.cpuCollector, storagePaths, r.logger)
dump["health"] = map[string]interface{}{
"status": healthReport.Status,
"issues": healthReport.Issues,
"warnings": healthReport.Warnings,
}
// Notifications
prefs := r.sett.GetNotificationPrefs()
dump["notifications"] = map[string]interface{}{
"email": prefs.Email,
"enabled_events": prefs.EnabledEvents,
"cooldown_hours": prefs.CooldownHours,
}
// Self-update
if r.updater != nil {
status := r.updater.GetStatus()
dump["self_update"] = map[string]interface{}{
"enabled": true,
"auto": r.cfg.SelfUpdate.AutoUpdate,
"last_check": status.LastCheck,
}
} else {
dump["self_update"] = map[string]interface{}{"enabled": false}
}
// Alerts
if r.alertMgr != nil {
dump["alerts"] = r.alertMgr.GetAlerts()
}
writeJSON(w, http.StatusOK, dump)
}
func writeJSON(w http.ResponseWriter, status int, v interface{}) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)