v0.24.0 — Pre-testing observability: debug logging, diagnostic dump, startup self-test
- Add [DEBUG] logging across all modules (backup, storage, sync, selfupdate, monitor, notify, report, assets, setup) gated behind logging.level: "debug" - Add /api/debug/dump endpoint returning full controller state JSON (debug only) - Add startup self-test validating 9 subsystems (Docker, dirs, storage, hub, restic repos, metrics DB) with pass/warn/fail summary - New packages: internal/selftest, internal/util - Constructor/signature changes: debug bool params, logger params on RunHealthCheck and BuildReport, smart watchdog probe logging Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,8 @@ package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -17,12 +19,16 @@ import (
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/notify"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/report"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
|
||||
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
|
||||
)
|
||||
|
||||
// Router handles all /api/* requests.
|
||||
@@ -45,6 +51,13 @@ type Router struct {
|
||||
|
||||
// Asset syncer for on-demand Hub asset sync
|
||||
assetsSyncer *assets.Syncer
|
||||
|
||||
// Debug dump dependencies (set via setters)
|
||||
scheduler *scheduler.Scheduler
|
||||
hubPusher *report.Pusher
|
||||
alertMgr *web.AlertManager
|
||||
version string
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
// SetAssetsSyncer sets the Hub asset syncer for on-demand sync triggers.
|
||||
@@ -52,6 +65,15 @@ func (r *Router) SetAssetsSyncer(as *assets.Syncer) {
|
||||
r.assetsSyncer = as
|
||||
}
|
||||
|
||||
// SetDebugDumpDeps sets optional dependencies for the /api/debug/dump endpoint.
|
||||
func (r *Router) SetDebugDumpDeps(sched *scheduler.Scheduler, pusher *report.Pusher, alertMgr *web.AlertManager, version string, startTime time.Time) {
|
||||
r.scheduler = sched
|
||||
r.hubPusher = pusher
|
||||
r.alertMgr = alertMgr
|
||||
r.version = version
|
||||
r.startTime = startTime
|
||||
}
|
||||
|
||||
func NewRouter(cfg *config.Config, configPath string, sett *settings.Settings, stackMgr *stacks.Manager, syncer *catalogsync.Syncer, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, metricsStore *metrics.MetricsStore, updater *selfupdate.Updater, notif *notify.Notifier, logger *log.Logger) *Router {
|
||||
return &Router{cfg: cfg, configPath: configPath, sett: sett, stackMgr: stackMgr, syncer: syncer, cpuCollector: cpuCollector, backupMgr: backupMgr, crossDriveRunner: crossDrive, metricsStore: metricsStore, updater: updater, notifier: notif, logger: logger}
|
||||
}
|
||||
@@ -214,6 +236,10 @@ func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
case path == "/assets/status" && req.Method == http.MethodGet:
|
||||
r.assetSyncStatus(w, req)
|
||||
|
||||
// GET /api/debug/dump — diagnostic JSON dump (debug mode only)
|
||||
case path == "/debug/dump" && req.Method == http.MethodGet:
|
||||
r.debugDump(w, req)
|
||||
|
||||
default:
|
||||
writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"})
|
||||
}
|
||||
@@ -1046,6 +1072,194 @@ func (r *Router) assetSyncStatus(w http.ResponseWriter, _ *http.Request) {
|
||||
writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: r.assetsSyncer.Status()})
|
||||
}
|
||||
|
||||
// --- Debug dump handler ---
|
||||
|
||||
func (r *Router) debugDump(w http.ResponseWriter, req *http.Request) {
|
||||
if r.cfg.Logging.Level != "debug" {
|
||||
writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"})
|
||||
return
|
||||
}
|
||||
|
||||
dump := make(map[string]interface{})
|
||||
|
||||
// Controller info
|
||||
configHash := ""
|
||||
if data, err := os.ReadFile(r.configPath); err == nil {
|
||||
h := sha256.Sum256(data)
|
||||
configHash = hex.EncodeToString(h[:])
|
||||
}
|
||||
dump["controller"] = map[string]interface{}{
|
||||
"version": r.version,
|
||||
"uptime_seconds": int(time.Since(r.startTime).Seconds()),
|
||||
"config_hash": configHash,
|
||||
"logging_level": r.cfg.Logging.Level,
|
||||
"pid": os.Getpid(),
|
||||
}
|
||||
|
||||
// Storage
|
||||
storagePaths := r.sett.GetStoragePaths()
|
||||
storageEntries := make([]map[string]interface{}, 0, len(storagePaths))
|
||||
for _, sp := range storagePaths {
|
||||
entry := map[string]interface{}{
|
||||
"path": sp.Path,
|
||||
"label": sp.Label,
|
||||
"disconnected": sp.Disconnected,
|
||||
"decommissioned": sp.Decommissioned,
|
||||
}
|
||||
if !sp.Disconnected && !sp.Decommissioned {
|
||||
if di := system.GetDiskUsage(sp.Path); di != nil {
|
||||
entry["total_gb"] = di.TotalGB
|
||||
entry["used_gb"] = di.UsedGB
|
||||
entry["used_percent"] = di.UsedPercent
|
||||
}
|
||||
}
|
||||
storageEntries = append(storageEntries, entry)
|
||||
}
|
||||
dump["storage"] = storageEntries
|
||||
|
||||
// Stacks
|
||||
allStacks := r.stackMgr.GetStacks()
|
||||
deployed := 0
|
||||
running := 0
|
||||
stopped := 0
|
||||
stackList := make([]map[string]interface{}, 0)
|
||||
for _, s := range allStacks {
|
||||
if !s.Deployed {
|
||||
continue
|
||||
}
|
||||
deployed++
|
||||
info := map[string]interface{}{
|
||||
"name": s.Name,
|
||||
"state": string(s.State),
|
||||
}
|
||||
if s.Meta.DisplayName != "" {
|
||||
info["display_name"] = s.Meta.DisplayName
|
||||
}
|
||||
containerNames := make([]string, 0, len(s.Containers))
|
||||
for _, c := range s.Containers {
|
||||
containerNames = append(containerNames, c.Name)
|
||||
switch c.State {
|
||||
case stacks.StateRunning, stacks.StateStarting, stacks.StateUnhealthy:
|
||||
running++
|
||||
default:
|
||||
stopped++
|
||||
}
|
||||
}
|
||||
info["containers"] = containerNames
|
||||
stackList = append(stackList, info)
|
||||
}
|
||||
dump["stacks"] = map[string]interface{}{
|
||||
"deployed": deployed,
|
||||
"running": running,
|
||||
"stopped": stopped,
|
||||
"list": stackList,
|
||||
}
|
||||
|
||||
// Backup
|
||||
if r.backupMgr != nil {
|
||||
backupInfo := map[string]interface{}{
|
||||
"enabled": true,
|
||||
"running": r.backupMgr.IsRunning(),
|
||||
}
|
||||
dbDump, backupSt := r.backupMgr.GetStatus()
|
||||
if dbDump != nil {
|
||||
backupInfo["last_db_dump"] = map[string]interface{}{
|
||||
"time": dbDump.LastRun,
|
||||
"success": dbDump.Success,
|
||||
}
|
||||
}
|
||||
if backupSt != nil {
|
||||
backupInfo["last_backup"] = map[string]interface{}{
|
||||
"time": backupSt.LastRun,
|
||||
"success": backupSt.Success,
|
||||
}
|
||||
if backupSt.RepoStats != nil {
|
||||
backupInfo["repo_size"] = backupSt.RepoStats.TotalSize
|
||||
backupInfo["snapshot_count"] = backupSt.RepoStats.SnapshotCount
|
||||
}
|
||||
}
|
||||
dump["backup"] = backupInfo
|
||||
} else {
|
||||
dump["backup"] = map[string]interface{}{"enabled": false}
|
||||
}
|
||||
|
||||
// Hub
|
||||
hubInfo := map[string]interface{}{
|
||||
"url": r.cfg.Hub.URL,
|
||||
"enabled": r.cfg.Hub.Enabled,
|
||||
}
|
||||
if r.hubPusher != nil {
|
||||
s := r.hubPusher.GetStatus()
|
||||
hubInfo["last_attempt"] = s.LastAttempt
|
||||
hubInfo["last_success"] = s.LastSuccess
|
||||
hubInfo["last_error"] = s.LastError
|
||||
hubInfo["consecutive_failures"] = s.Consecutive
|
||||
}
|
||||
dump["hub"] = hubInfo
|
||||
|
||||
// Scheduler
|
||||
if r.scheduler != nil {
|
||||
jobs := r.scheduler.GetJobs()
|
||||
jobList := make([]map[string]interface{}, 0, len(jobs))
|
||||
for _, j := range jobs {
|
||||
entry := map[string]interface{}{
|
||||
"name": j.Name,
|
||||
"running": j.Running,
|
||||
}
|
||||
if j.Interval > 0 {
|
||||
entry["type"] = "every"
|
||||
entry["interval"] = j.Interval.String()
|
||||
} else if j.Schedule != "" {
|
||||
entry["type"] = "daily"
|
||||
entry["schedule"] = j.Schedule
|
||||
}
|
||||
if !j.LastRun.IsZero() {
|
||||
entry["last_run"] = j.LastRun
|
||||
}
|
||||
if j.LastErr != nil {
|
||||
entry["last_error"] = j.LastErr.Error()
|
||||
}
|
||||
jobList = append(jobList, entry)
|
||||
}
|
||||
dump["scheduler"] = jobList
|
||||
}
|
||||
|
||||
// Health (fresh check)
|
||||
healthReport := monitor.RunHealthCheck(r.cfg, r.cpuCollector, storagePaths, r.logger)
|
||||
dump["health"] = map[string]interface{}{
|
||||
"status": healthReport.Status,
|
||||
"issues": healthReport.Issues,
|
||||
"warnings": healthReport.Warnings,
|
||||
}
|
||||
|
||||
// Notifications
|
||||
prefs := r.sett.GetNotificationPrefs()
|
||||
dump["notifications"] = map[string]interface{}{
|
||||
"email": prefs.Email,
|
||||
"enabled_events": prefs.EnabledEvents,
|
||||
"cooldown_hours": prefs.CooldownHours,
|
||||
}
|
||||
|
||||
// Self-update
|
||||
if r.updater != nil {
|
||||
status := r.updater.GetStatus()
|
||||
dump["self_update"] = map[string]interface{}{
|
||||
"enabled": true,
|
||||
"auto": r.cfg.SelfUpdate.AutoUpdate,
|
||||
"last_check": status.LastCheck,
|
||||
}
|
||||
} else {
|
||||
dump["self_update"] = map[string]interface{}{"enabled": false}
|
||||
}
|
||||
|
||||
// Alerts
|
||||
if r.alertMgr != nil {
|
||||
dump["alerts"] = r.alertMgr.GetAlerts()
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, dump)
|
||||
}
|
||||
|
||||
func writeJSON(w http.ResponseWriter, status int, v interface{}) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
|
||||
Reference in New Issue
Block a user