diff --git a/CHANGELOG.md b/CHANGELOG.md index 40f9556..56c55c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ ## Changelog +### v0.25.0 — Debug Page: Operator Testing & Diagnostics Dashboard (2026-02-21) + +**Full debug dashboard with 8 sections for testing all controller subsystems in debug mode.** + +Only available when `logging.level: "debug"` — sidebar link, page, and all `/api/debug/*` endpoints return 404 otherwise. + +#### New files +- `internal/web/logbuffer.go` — Ring buffer (1000 entries) implementing `io.Writer` for capturing log output. Parses Go standard log format (with/without `Lshortfile`), extracts level/source/timestamp. Supports filtered retrieval by level and timestamp. +- `internal/web/handler_debug.go` — Debug page handler + 20 API endpoint handlers organized in 8 sections. `DebugCallbacks` struct (6 fields) for wiring main.go closures. +- `internal/web/templates/debug.html` — Full debug dashboard template with 8 collapsible sections, complete JS framework (lazy-load, polling, action buttons, log viewer with filter/auto-refresh). + +#### Debug page sections +1. **Rendszer diagnosztika** — Diagnostic dump (migrated from `api/router.go`) with structured UI rendering: controller info, storage paths, deployed stacks, scheduler jobs, alerts. JSON download button. +2. **Értesítés teszt** — Send test events with configurable type/severity, view event history ring buffer (last 50 events, newest first). +3. **Mentés teszt** — Trigger individual backup phases: full backup, DB dump only, cross-drive only, restic integrity check, infrastructure backup. +4. **Tárhely teszt** — Storage watchdog status table with per-path probe state. Simulate disconnect (stops apps, marks disconnected, skips unmount) and reconnect (cleans locks, clears state). 5s auto-refresh. +5. **Hub & Kapcsolatok** — Hub report push, infra backup push, Hub/Gitea connectivity tests with latency, preference sync. +6. **Önfrissítés teszt** — Version check + dry-run (shows current/new image lines, compose writability, backup status). +7. **DR / Telepítő varázsló** — Infra backup status per drive (files, timestamps). "RESET" confirmation + infra backup pre-check before triggering setup mode via marker file. +8. **Naplóviewer** — In-memory log viewer with level filter (DEBUG/INFO/WARN/ERROR), 2s auto-refresh, color-coded entries, clear display. + +#### Module additions +- `notify/notifier.go`: `PushTestEventSync()` (synchronous, returns Hub status), `GetEventHistory()` (ring buffer), `recordHistory()` for debug page. +- `backup/crossdrive.go`: `RunAllConfigured()` — runs all enabled apps ignoring schedule filter. +- `selfupdate/updater.go`: `DryRun()` — checks update availability, compose writability, backup status without performing changes. +- `monitor/watchdog.go`: `SimulateDisconnect()` / `SimulateReconnect()` with `simulatedPaths` map, `GetDebugStatus()` for per-path probe state. Watchdog `Check()` skips simulated paths. +- `setup/setup.go`: `NeedsSetup()` now checks `.needs-setup` marker file. `ClearSetupMarker()` for cleanup. + +#### Routing changes +- **Mux carve-out**: `/api/debug/` routes to web server (same pattern as `/api/storage/`), with auth + CSRF. +- **Removed** `SetDebugDumpDeps()` from `api/router.go` and the `/api/debug/dump` route — dump handler migrated to `handler_debug.go` using Server's existing fields. + +#### Infrastructure +- `setupLogger()` now returns `(*log.Logger, *web.LogBuffer)`. In debug mode, creates `io.MultiWriter(os.Stdout, logBuffer)` so all log output is captured from the start. +- Debug CSS: ~170 lines of styles for sections, result badges, log viewer, confirm input, danger button, spinner. + ### v0.24.0 — Pre-Testing Observability (2026-02-21) **Three features for pre-testing diagnostics: verbose debug logging, diagnostic dump endpoint, and startup self-test.** diff --git a/controller/README.md b/controller/README.md index 67ebc42..899071a 100644 --- a/controller/README.md +++ b/controller/README.md @@ -4,7 +4,7 @@ A single, lightweight Go container that replaces Portainer + scattered systemd scripts with a unified, Hungarian-language web dashboard for managing Docker Compose stacks, backups, storage, monitoring, and notifications on customer hardware. -**Current version: v0.24.0** +**Current version: v0.25.0** --- @@ -23,6 +23,7 @@ A single, lightweight Go container that replaces Portainer + scattered systemd s - [Setup Wizard](#9-first-run-setup-wizard) - [Disaster Recovery](#10-disaster-recovery) - [Asset Sync](#11-asset-sync) + - [Debug Mode](#12-debug-mode) - [Repository Layout](#repository-layout) - [Configuration](#configuration) - [REST API](#rest-api) @@ -993,6 +994,33 @@ The Hub serves three asset types per app: --- +### 12. Debug Mode + +When `logging.level: "debug"` is set in `controller.yaml`, the controller exposes a full diagnostic dashboard at `/debug` with 8 testing sections. All debug endpoints are gated — at `info` level, the sidebar link disappears and all `/api/debug/*` routes return 404. + +#### Debug Page Sections + +| # | Section | Endpoints | Description | +|---|---------|-----------|-------------| +| 1 | Rendszer diagnosztika | `GET /api/debug/dump` | Full state dump: controller info, storage, stacks, scheduler, health, alerts. JSON download. | +| 2 | Értesítés teszt | `POST /api/debug/event/test`, `GET /api/debug/event/history` | Send test events with configurable type/severity, view event history ring buffer. | +| 3 | Mentés teszt | `POST /api/debug/backup/{dbdump,crossdrive,integrity,infra}` | Trigger individual backup phases independently. | +| 4 | Tárhely teszt | `POST /api/debug/storage/simulate-{disconnect,reconnect}`, `GET /api/debug/storage/watchdog-status` | Simulate drive disconnect/reconnect without unmounting. Per-path probe state with 5s auto-refresh. | +| 5 | Hub & Kapcsolatok | `POST /api/debug/hub/{push,infra-push,test-connectivity,preferences-sync}`, `POST /api/debug/gitea/test-connectivity` | Test Hub/Gitea connectivity with latency. Push reports and sync preferences. | +| 6 | Önfrissítés teszt | `POST /api/debug/selfupdate/dry-run` | Dry-run update check: current vs new image lines, compose writability, backup state. | +| 7 | DR / Telepítő varázsló | `POST /api/debug/dr/trigger-setup`, `GET /api/debug/dr/infra-status` | Infra backup status per drive. Trigger setup mode via marker file (requires "RESET" + infra backup pre-check). | +| 8 | Naplóviewer | `GET /api/debug/logs?level=&limit=&after=` | In-memory log viewer (last 1000 entries), level filter, 2s auto-refresh, color-coded entries. | + +#### Key Implementation Details + +- **Log buffer** (`internal/web/logbuffer.go`): Ring buffer implementing `io.Writer`, created before all modules via `io.MultiWriter(os.Stdout, logBuffer)`. Parses `[DEBUG]`/`[INFO]`/`[WARN]`/`[ERROR]` tags from standard log format. +- **Storage simulation**: `simulatedPaths` map in watchdog prevents the watchdog from re-probing simulated-disconnected paths. Disconnect runs all real steps except `lazyUnmount` (drive stays physically mounted). +- **DR trigger safety**: Uses marker file (`data/.needs-setup`) instead of modifying controller.yaml. Pre-checks that infra backup exists on at least one drive. +- **Routing**: `/api/debug/` carved out in HTTP mux (same pattern as `/api/storage/`), routed to web server with auth + CSRF. +- **DebugCallbacks**: 6 closures wired from main.go for operations needing modules not on Server struct (hub push, infra backup, connectivity tests). + +--- + ## Repository Layout ``` @@ -1064,11 +1092,13 @@ controller/ │ ├── csrf.go # CsrfProtect middleware, csrfToken/csrfField helpers │ ├── handlers.go # Page handlers (dashboard, stacks, deploy, backups, etc.) │ ├── handler_restore.go # DR: restore page handler + APIs (scan, restore all, skip) +│ ├── handler_debug.go # Debug page handler + 20 debug API endpoints (debug-mode only) +│ ├── logbuffer.go # Ring buffer (io.Writer) for in-memory log capture │ ├── storage_handlers.go # Storage API handlers (scan, format, attach, migrate, cleanup, disconnect/reconnect) │ ├── alerts.go # State-based alert generation │ ├── funcmap.go # Template functions (state colors, Hungarian formatting) │ ├── embed.go # go:embed for templates + Chart.js -│ └── templates/ # 13 HTML files + style.css (Hungarian UI) +│ └── templates/ # 14 HTML files + style.css (Hungarian UI, incl. debug.html) ├── configs/ │ ├── controller.yaml.example # Full config reference │ └── example-felhom-metadata.yml # .felhom.yml format reference diff --git a/controller/cmd/controller/main.go b/controller/cmd/controller/main.go index 4333c50..84e9609 100644 --- a/controller/cmd/controller/main.go +++ b/controller/cmd/controller/main.go @@ -5,6 +5,7 @@ import ( "encoding/json" "flag" "fmt" + "io" "log" "net/http" "os" @@ -64,7 +65,7 @@ func main() { log.Printf("[WARN] Config load failed (%s), using defaults: %v", *configPath, err) } - logger := setupLogger(cfg) + logger, logBuffer := setupLogger(cfg) // --- Setup mode: if no customer ID configured, run setup wizard --- if setup.NeedsSetup(cfg) { @@ -583,8 +584,6 @@ func main() { if assetsSyncer != nil { apiRouter.SetAssetsSyncer(assetsSyncer) } - apiRouter.SetDebugDumpDeps(sched, hubPusher, alertMgr, Version, startTime) - // --- Initialize web server --- webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, crossDriveRunner, sched, sett, alertMgr, notifier, updater, logger, Version) webServer.SetStorageWatchdog(storageWatchdog) @@ -602,6 +601,53 @@ func main() { } }) } + if logBuffer != nil { + webServer.SetLogBuffer(logBuffer) + } + webServer.SetStartTime(startTime) + + // Wire debug callbacks (only in debug mode) + if cfg.Logging.Level == "debug" { + dc := &web.DebugCallbacks{} + if hubPusher != nil { + dc.TriggerHubReportPush = func() error { + r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger) + return hubPusher.Push(r) + } + dc.TriggerHubInfraPush = func() error { + pushInfraBackup(cfg, sett, stackProv, hubPusher, logger) + return nil + } + } + dc.TriggerLocalInfraWrite = func() error { + writeLocalInfraBackup(cfg, sett, stackProv, logger) + return nil + } + dc.HubConnectivityTest = func() (int, int64, error) { + start := time.Now() + resp, err := http.Get(cfg.Hub.URL + "/healthz") + latency := time.Since(start).Milliseconds() + if err != nil { + return 0, latency, err + } + resp.Body.Close() + return resp.StatusCode, latency, nil + } + if cfg.Git.RepoURL != "" { + dc.GiteaConnectivityTest = func() (int, int64, error) { + start := time.Now() + client := &http.Client{Timeout: 5 * time.Second} + resp, err := client.Head(cfg.Git.RepoURL) + latency := time.Since(start).Milliseconds() + if err != nil { + return 0, latency, err + } + resp.Body.Close() + return resp.StatusCode, latency, nil + } + } + webServer.SetDebugCallbacks(dc) + } // --- Initialize drive migrator --- driveMigrator := &storage.DriveMigrator{ @@ -652,6 +698,8 @@ func main() { mux.HandleFunc("/api/health", apiRouter.HealthHandler) // Storage API routes handled by web server (longer prefix takes precedence over /api/) mux.Handle("/api/storage/", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(webServer.ServeStorageAPI)))) + // Debug API routes handled by web server (debug-mode gating inside handler) + mux.Handle("/api/debug/", webServer.RequireAuth(webServer.CsrfProtect(http.HandlerFunc(webServer.ServeDebugAPI)))) // Self-update API — accepts session auth OR hub API key (for external triggering) // CsrfProtect exempts Bearer-token requests automatically. mux.Handle("/api/selfupdate/", selfUpdateAuthMiddleware(cfg, webServer, webServer.CsrfProtect(http.HandlerFunc(apiRouter.ServeHTTP)))) @@ -711,15 +759,13 @@ func selfUpdateAuthMiddleware(cfg *config.Config, webServer *web.Server, next ht }) } -func setupLogger(cfg *config.Config) *log.Logger { - // For now, log to stdout. File logging will be added later. - logger := log.New(os.Stdout, "", log.LstdFlags) - +func setupLogger(cfg *config.Config) (*log.Logger, *web.LogBuffer) { if cfg.Logging.Level == "debug" { - logger.SetFlags(log.LstdFlags | log.Lshortfile) + logBuffer := web.NewLogBuffer(1000) + logger := log.New(io.MultiWriter(os.Stdout, logBuffer), "", log.LstdFlags|log.Lshortfile) + return logger, logBuffer } - - return logger + return log.New(os.Stdout, "", log.LstdFlags), nil } // stackAdapter implements backup.StackDataProvider using stacks.Manager. diff --git a/controller/internal/api/router.go b/controller/internal/api/router.go index 6a07909..6fa15a5 100644 --- a/controller/internal/api/router.go +++ b/controller/internal/api/router.go @@ -2,8 +2,6 @@ package api import ( "context" - "crypto/sha256" - "encoding/hex" "encoding/json" "fmt" "io" @@ -19,16 +17,12 @@ import ( "gitea.dooplex.hu/admin/felhom-controller/internal/backup" "gitea.dooplex.hu/admin/felhom-controller/internal/config" "gitea.dooplex.hu/admin/felhom-controller/internal/metrics" - "gitea.dooplex.hu/admin/felhom-controller/internal/monitor" "gitea.dooplex.hu/admin/felhom-controller/internal/notify" - "gitea.dooplex.hu/admin/felhom-controller/internal/report" - "gitea.dooplex.hu/admin/felhom-controller/internal/scheduler" "gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate" "gitea.dooplex.hu/admin/felhom-controller/internal/settings" "gitea.dooplex.hu/admin/felhom-controller/internal/stacks" catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync" "gitea.dooplex.hu/admin/felhom-controller/internal/system" - "gitea.dooplex.hu/admin/felhom-controller/internal/web" ) // Router handles all /api/* requests. @@ -52,12 +46,6 @@ type Router struct { // Asset syncer for on-demand Hub asset sync assetsSyncer *assets.Syncer - // Debug dump dependencies (set via setters) - scheduler *scheduler.Scheduler - hubPusher *report.Pusher - alertMgr *web.AlertManager - version string - startTime time.Time } // SetAssetsSyncer sets the Hub asset syncer for on-demand sync triggers. @@ -65,15 +53,6 @@ func (r *Router) SetAssetsSyncer(as *assets.Syncer) { r.assetsSyncer = as } -// SetDebugDumpDeps sets optional dependencies for the /api/debug/dump endpoint. -func (r *Router) SetDebugDumpDeps(sched *scheduler.Scheduler, pusher *report.Pusher, alertMgr *web.AlertManager, version string, startTime time.Time) { - r.scheduler = sched - r.hubPusher = pusher - r.alertMgr = alertMgr - r.version = version - r.startTime = startTime -} - func NewRouter(cfg *config.Config, configPath string, sett *settings.Settings, stackMgr *stacks.Manager, syncer *catalogsync.Syncer, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, metricsStore *metrics.MetricsStore, updater *selfupdate.Updater, notif *notify.Notifier, logger *log.Logger) *Router { return &Router{cfg: cfg, configPath: configPath, sett: sett, stackMgr: stackMgr, syncer: syncer, cpuCollector: cpuCollector, backupMgr: backupMgr, crossDriveRunner: crossDrive, metricsStore: metricsStore, updater: updater, notifier: notif, logger: logger} } @@ -236,10 +215,6 @@ func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) { case path == "/assets/status" && req.Method == http.MethodGet: r.assetSyncStatus(w, req) - // GET /api/debug/dump — diagnostic JSON dump (debug mode only) - case path == "/debug/dump" && req.Method == http.MethodGet: - r.debugDump(w, req) - default: writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"}) } @@ -1072,194 +1047,6 @@ func (r *Router) assetSyncStatus(w http.ResponseWriter, _ *http.Request) { writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: r.assetsSyncer.Status()}) } -// --- Debug dump handler --- - -func (r *Router) debugDump(w http.ResponseWriter, req *http.Request) { - if r.cfg.Logging.Level != "debug" { - writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"}) - return - } - - dump := make(map[string]interface{}) - - // Controller info - configHash := "" - if data, err := os.ReadFile(r.configPath); err == nil { - h := sha256.Sum256(data) - configHash = hex.EncodeToString(h[:]) - } - dump["controller"] = map[string]interface{}{ - "version": r.version, - "uptime_seconds": int(time.Since(r.startTime).Seconds()), - "config_hash": configHash, - "logging_level": r.cfg.Logging.Level, - "pid": os.Getpid(), - } - - // Storage - storagePaths := r.sett.GetStoragePaths() - storageEntries := make([]map[string]interface{}, 0, len(storagePaths)) - for _, sp := range storagePaths { - entry := map[string]interface{}{ - "path": sp.Path, - "label": sp.Label, - "disconnected": sp.Disconnected, - "decommissioned": sp.Decommissioned, - } - if !sp.Disconnected && !sp.Decommissioned { - if di := system.GetDiskUsage(sp.Path); di != nil { - entry["total_gb"] = di.TotalGB - entry["used_gb"] = di.UsedGB - entry["used_percent"] = di.UsedPercent - } - } - storageEntries = append(storageEntries, entry) - } - dump["storage"] = storageEntries - - // Stacks - allStacks := r.stackMgr.GetStacks() - deployed := 0 - running := 0 - stopped := 0 - stackList := make([]map[string]interface{}, 0) - for _, s := range allStacks { - if !s.Deployed { - continue - } - deployed++ - info := map[string]interface{}{ - "name": s.Name, - "state": string(s.State), - } - if s.Meta.DisplayName != "" { - info["display_name"] = s.Meta.DisplayName - } - containerNames := make([]string, 0, len(s.Containers)) - for _, c := range s.Containers { - containerNames = append(containerNames, c.Name) - switch c.State { - case stacks.StateRunning, stacks.StateStarting, stacks.StateUnhealthy: - running++ - default: - stopped++ - } - } - info["containers"] = containerNames - stackList = append(stackList, info) - } - dump["stacks"] = map[string]interface{}{ - "deployed": deployed, - "running": running, - "stopped": stopped, - "list": stackList, - } - - // Backup - if r.backupMgr != nil { - backupInfo := map[string]interface{}{ - "enabled": true, - "running": r.backupMgr.IsRunning(), - } - dbDump, backupSt := r.backupMgr.GetStatus() - if dbDump != nil { - backupInfo["last_db_dump"] = map[string]interface{}{ - "time": dbDump.LastRun, - "success": dbDump.Success, - } - } - if backupSt != nil { - backupInfo["last_backup"] = map[string]interface{}{ - "time": backupSt.LastRun, - "success": backupSt.Success, - } - if backupSt.RepoStats != nil { - backupInfo["repo_size"] = backupSt.RepoStats.TotalSize - backupInfo["snapshot_count"] = backupSt.RepoStats.SnapshotCount - } - } - dump["backup"] = backupInfo - } else { - dump["backup"] = map[string]interface{}{"enabled": false} - } - - // Hub - hubInfo := map[string]interface{}{ - "url": r.cfg.Hub.URL, - "enabled": r.cfg.Hub.Enabled, - } - if r.hubPusher != nil { - s := r.hubPusher.GetStatus() - hubInfo["last_attempt"] = s.LastAttempt - hubInfo["last_success"] = s.LastSuccess - hubInfo["last_error"] = s.LastError - hubInfo["consecutive_failures"] = s.Consecutive - } - dump["hub"] = hubInfo - - // Scheduler - if r.scheduler != nil { - jobs := r.scheduler.GetJobs() - jobList := make([]map[string]interface{}, 0, len(jobs)) - for _, j := range jobs { - entry := map[string]interface{}{ - "name": j.Name, - "running": j.Running, - } - if j.Interval > 0 { - entry["type"] = "every" - entry["interval"] = j.Interval.String() - } else if j.Schedule != "" { - entry["type"] = "daily" - entry["schedule"] = j.Schedule - } - if !j.LastRun.IsZero() { - entry["last_run"] = j.LastRun - } - if j.LastErr != nil { - entry["last_error"] = j.LastErr.Error() - } - jobList = append(jobList, entry) - } - dump["scheduler"] = jobList - } - - // Health (fresh check) - healthReport := monitor.RunHealthCheck(r.cfg, r.cpuCollector, storagePaths, r.logger) - dump["health"] = map[string]interface{}{ - "status": healthReport.Status, - "issues": healthReport.Issues, - "warnings": healthReport.Warnings, - } - - // Notifications - prefs := r.sett.GetNotificationPrefs() - dump["notifications"] = map[string]interface{}{ - "email": prefs.Email, - "enabled_events": prefs.EnabledEvents, - "cooldown_hours": prefs.CooldownHours, - } - - // Self-update - if r.updater != nil { - status := r.updater.GetStatus() - dump["self_update"] = map[string]interface{}{ - "enabled": true, - "auto": r.cfg.SelfUpdate.AutoUpdate, - "last_check": status.LastCheck, - } - } else { - dump["self_update"] = map[string]interface{}{"enabled": false} - } - - // Alerts - if r.alertMgr != nil { - dump["alerts"] = r.alertMgr.GetAlerts() - } - - writeJSON(w, http.StatusOK, dump) -} - func writeJSON(w http.ResponseWriter, status int, v interface{}) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(status) diff --git a/controller/internal/backup/crossdrive.go b/controller/internal/backup/crossdrive.go index 81873ed..73f6a82 100644 --- a/controller/internal/backup/crossdrive.go +++ b/controller/internal/backup/crossdrive.go @@ -245,6 +245,47 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) return nil } +// RunAllConfigured runs cross-drive backup for all enabled apps, ignoring schedule. +// Used by the debug page to trigger all backups regardless of their configured schedule. +func (r *CrossDriveRunner) RunAllConfigured(ctx context.Context) error { + if r.debug { + r.logger.Printf("[DEBUG] RunAllConfigured: starting for all enabled apps") + } + + r.AutoEnableSmallApps() + r.syncInfraConfig(ctx) + + configs := r.sett.GetAllCrossDriveConfigs() + if len(configs) == 0 { + return nil + } + + var errs []string + var ran int + for stackName, cfg := range configs { + if !cfg.Enabled { + continue + } + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + ran++ + if err := r.RunAppBackup(ctx, stackName); err != nil { + errs = append(errs, fmt.Sprintf("%s: %v", stackName, err)) + } + } + + if r.debug { + r.logger.Printf("[DEBUG] RunAllConfigured: done — %d ran, %d errors", ran, len(errs)) + } + if len(errs) > 0 { + return fmt.Errorf("cross-drive errors: %s", strings.Join(errs, "; ")) + } + return nil +} + // IsRunning returns true if the given app's backup is currently running. func (r *CrossDriveRunner) IsRunning(stackName string) bool { r.mu.Lock() diff --git a/controller/internal/monitor/watchdog.go b/controller/internal/monitor/watchdog.go index 6cd3c18..d7ba8e7 100644 --- a/controller/internal/monitor/watchdog.go +++ b/controller/internal/monitor/watchdog.go @@ -80,6 +80,10 @@ type StorageWatchdog struct { mu sync.Mutex pathState map[string]*pathProbeState + + // Debug simulation state + simulatedMu sync.RWMutex + simulatedPaths map[string]bool } // NewStorageWatchdog creates a new storage watchdog. @@ -91,12 +95,13 @@ func NewStorageWatchdog( logger *log.Logger, ) *StorageWatchdog { return &StorageWatchdog{ - settings: sett, - stackProvider: stackProvider, - notifier: notifier, - cfg: cfg, - logger: logger, - pathState: make(map[string]*pathProbeState), + settings: sett, + stackProvider: stackProvider, + notifier: notifier, + cfg: cfg, + logger: logger, + pathState: make(map[string]*pathProbeState), + simulatedPaths: make(map[string]bool), } } @@ -146,6 +151,11 @@ func (w *StorageWatchdog) Check(ctx context.Context) error { continue } + // Skip simulated-disconnected paths (handled by debug UI) + if w.isSimulated(sp.Path) { + continue + } + if sp.Disconnected { w.handleReconnectCheck(ctx, sp) } else { @@ -663,6 +673,196 @@ func (w *StorageWatchdog) RestartStoppedApps(path string) (started, failed []str return started, failed } +// ── Debug simulation methods ───────────────────────────────────────── + +// isSimulated returns true if the path is in simulated-disconnect state. +func (w *StorageWatchdog) isSimulated(path string) bool { + w.simulatedMu.RLock() + defer w.simulatedMu.RUnlock() + return w.simulatedPaths[path] +} + +// SimulateDisconnect simulates a drive disconnection without actually unmounting. +// Runs disconnect steps 1,2,4,5,6,7 (skips step 3: lazyUnmount). +// Returns the list of stopped stacks. +func (w *StorageWatchdog) SimulateDisconnect(ctx context.Context, path string) ([]string, error) { + sp := w.findStoragePath(path) + if sp == nil { + return nil, fmt.Errorf("storage path %q not found", path) + } + if sp.Disconnected { + return nil, fmt.Errorf("drive already disconnected") + } + if sp.Decommissioned { + return nil, fmt.Errorf("drive is decommissioned") + } + + label := sp.Label + if label == "" { + label = sp.Path + } + w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Simulating disconnect: %s (%s)", path, label) + + // Mark as simulated so the watchdog skips probing this path + w.simulatedMu.Lock() + w.simulatedPaths[path] = true + w.simulatedMu.Unlock() + + // Step 1: Stop affected stacks + stoppedStacks := w.stopAffectedStacks(path) + + // Step 2: Mark disconnected in settings + if err := w.settings.SetDisconnected(path, true, stoppedStacks); err != nil { + w.logger.Printf("[ERROR] [STORAGE] [DEBUG-SIM] Failed to mark disconnected: %v", err) + } + + // Step 3: SKIPPED (no lazyUnmount — drive stays physically mounted) + + // Step 4: Update in-memory state + state := w.getOrCreateState(path) + state.lastStatus = "disconnected" + state.probeInterval = disconnectedProbeInterval + state.consecutiveFailures = 0 + + // Step 5: Trigger alert refresh + if w.alertRefresh != nil { + w.alertRefresh() + } + + // Step 6: Send notification + w.notifier.NotifyStorageDisconnected(label, stoppedStacks) + + // Step 7: Push hub report + if w.pushHubReport != nil { + go w.pushHubReport() + } + + w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Disconnect simulated: %s — %d stack(s) stopped", path, len(stoppedStacks)) + return stoppedStacks, nil +} + +// SimulateReconnect undoes a simulated disconnection. +func (w *StorageWatchdog) SimulateReconnect(ctx context.Context, path string) error { + if !w.isSimulated(path) { + return fmt.Errorf("path %q is not in simulated-disconnect state", path) + } + + sp := w.findStoragePath(path) + if sp == nil { + return fmt.Errorf("storage path %q not found", path) + } + + label := sp.Label + if label == "" { + label = sp.Path + } + w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Simulating reconnect: %s (%s)", path, label) + + // Remove from simulated set + w.simulatedMu.Lock() + delete(w.simulatedPaths, path) + w.simulatedMu.Unlock() + + // Verify drive is actually still mounted (it should be since we never unmounted) + verifyResult := system.ProbeStoragePath(path) + if verifyResult.Status != system.ProbeConnected { + return fmt.Errorf("drive probe failed after simulation clear: %v", verifyResult.Err) + } + + // Clean restic locks + w.cleanResticLocks(ctx, path) + + // Validate stopped stacks + filteredStacks := w.filterStoppedStacks(sp.StoppedStacks) + + // Clear disconnected, preserve stopped stacks for restart UI + if err := w.settings.SetDisconnected(path, false, filteredStacks); err != nil { + w.logger.Printf("[ERROR] [STORAGE] [DEBUG-SIM] Failed to clear disconnected: %v", err) + } + + // Update in-memory state + state := w.getOrCreateState(path) + state.lastStatus = "connected" + state.probeInterval = defaultProbeInterval + state.consecutiveFailures = 0 + + // Trigger alert refresh + if w.alertRefresh != nil { + w.alertRefresh() + } + + // Send notification + w.notifier.NotifyStorageReconnected(label) + if w.pushHubReport != nil { + go w.pushHubReport() + } + + w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Reconnect simulated: %s", path) + return nil +} + +// PathDebugStatus holds per-path probe state for the debug page. +type PathDebugStatus struct { + Path string `json:"path"` + Label string `json:"label"` + Status string `json:"status"` + Simulated bool `json:"simulated"` + ProbeOK bool `json:"probe_ok"` + DebounceCount int `json:"debounce_count"` + DebounceMax int `json:"debounce_max"` + LastProbe time.Time `json:"last_probe"` + AvgLatencyMs float64 `json:"avg_latency_ms"` + ProbeCount int `json:"probe_count"` + ProbeOKCount int `json:"probe_ok_count"` +} + +// GetDebugStatus returns per-path probe state for the debug page. +func (w *StorageWatchdog) GetDebugStatus() []PathDebugStatus { + paths := w.settings.GetStoragePaths() + result := make([]PathDebugStatus, 0, len(paths)) + + w.mu.Lock() + defer w.mu.Unlock() + + for _, sp := range paths { + if sp.Decommissioned { + continue + } + ds := PathDebugStatus{ + Path: sp.Path, + Label: sp.Label, + DebounceMax: probeThreshold, + } + if sp.Disconnected { + ds.Status = "disconnected" + } else { + ds.Status = "connected" + } + ds.Simulated = w.isSimulatedLocked(sp.Path) + + if state, ok := w.pathState[sp.Path]; ok { + ds.DebounceCount = state.consecutiveFailures + ds.LastProbe = state.lastProbeTime + ds.ProbeOK = state.lastStatus == "connected" + ds.ProbeCount = state.probeCount + ds.ProbeOKCount = state.probeOKCount + if state.probeCount > 0 { + ds.AvgLatencyMs = float64(state.totalLatency.Milliseconds()) / float64(state.probeCount) + } + } + result = append(result, ds) + } + return result +} + +// isSimulatedLocked checks simulation state without acquiring simulatedMu +// (caller must hold w.mu or be ok with a racy read for debug display). +func (w *StorageWatchdog) isSimulatedLocked(path string) bool { + w.simulatedMu.RLock() + defer w.simulatedMu.RUnlock() + return w.simulatedPaths[path] +} + // findStoragePath returns the storage path entry for a given path, or nil. func (w *StorageWatchdog) findStoragePath(path string) *settings.StoragePath { for _, sp := range w.settings.GetStoragePaths() { diff --git a/controller/internal/notify/notifier.go b/controller/internal/notify/notifier.go index 9d068a9..7d6e8ed 100644 --- a/controller/internal/notify/notifier.go +++ b/controller/internal/notify/notifier.go @@ -16,6 +16,16 @@ import ( // Notifier sends structured events to the hub via /api/v1/event. // Non-blocking: fires requests in goroutines, logs errors but doesn't retry aggressively. // Cooldown logic is handled by the Hub — the controller sends all events unconditionally. +// EventHistoryEntry records a sent event for the debug page. +type EventHistoryEntry struct { + Timestamp time.Time `json:"timestamp"` + EventType string `json:"event_type"` + Severity string `json:"severity"` + Message string `json:"message"` + HubStatus int `json:"hub_status"` + HubError string `json:"hub_error,omitempty"` +} + type Notifier struct { hubURL string apiKey string @@ -28,6 +38,12 @@ type Notifier struct { mu sync.Mutex prevHealthStatus string // tracks previous health check status for change detection + + // Event history ring buffer (debug page) + historyMu sync.RWMutex + history [50]EventHistoryEntry + histPos int + histFull bool } // New creates a new Notifier. Returns a no-op notifier if hub is not enabled. @@ -454,6 +470,95 @@ func (n *Notifier) SendTest() error { return nil } +// ── Debug event testing ─────────────────────────────────────────────── + +// PushTestEventSync sends a test event synchronously and returns the Hub HTTP status code. +// Used by the debug page for event testing with configurable type/severity. +func (n *Notifier) PushTestEventSync(eventType, severity, message string) (statusCode int, err error) { + if !n.enabled { + return 0, fmt.Errorf("hub nem konfigurált") + } + + payload := eventRequest{ + CustomerID: n.customerID, + EventType: eventType, + Severity: severity, + Message: message, + } + + jsonData, err := json.Marshal(payload) + if err != nil { + return 0, fmt.Errorf("marshal: %w", err) + } + + url := n.hubURL + "/api/v1/event" + req, err := http.NewRequest("POST", url, bytes.NewReader(jsonData)) + if err != nil { + return 0, fmt.Errorf("request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+n.apiKey) + req.Header.Set("Content-Type", "application/json") + + resp, err := n.httpClient.Do(req) + if err != nil { + n.recordHistory(eventType, severity, message, 0, err.Error()) + return 0, fmt.Errorf("send: %w", err) + } + io.Copy(io.Discard, resp.Body) + resp.Body.Close() + + if resp.StatusCode >= 400 { + n.recordHistory(eventType, severity, message, resp.StatusCode, fmt.Sprintf("HTTP %d", resp.StatusCode)) + return resp.StatusCode, fmt.Errorf("hub returned %d", resp.StatusCode) + } + + n.recordHistory(eventType, severity, message, resp.StatusCode, "") + return resp.StatusCode, nil +} + +// GetEventHistory returns the last N event history entries (newest first). +func (n *Notifier) GetEventHistory(limit int) []EventHistoryEntry { + n.historyMu.RLock() + defer n.historyMu.RUnlock() + + total := n.histPos + if n.histFull { + total = len(n.history) + } + if limit <= 0 || limit > total { + limit = total + } + + result := make([]EventHistoryEntry, 0, limit) + for i := 0; i < limit; i++ { + idx := n.histPos - 1 - i + if idx < 0 { + idx += len(n.history) + } + result = append(result, n.history[idx]) + } + return result +} + +// recordHistory appends an entry to the event history ring buffer. +func (n *Notifier) recordHistory(eventType, severity, message string, hubStatus int, hubError string) { + n.historyMu.Lock() + defer n.historyMu.Unlock() + n.history[n.histPos] = EventHistoryEntry{ + Timestamp: time.Now(), + EventType: eventType, + Severity: severity, + Message: message, + HubStatus: hubStatus, + HubError: hubError, + } + n.histPos++ + if n.histPos >= len(n.history) { + n.histPos = 0 + n.histFull = true + } +} + // ── Backward compatibility ─────────────────────────────────────────── // notifyRequest is the JSON payload for the legacy /api/v1/notify endpoint. diff --git a/controller/internal/selfupdate/updater.go b/controller/internal/selfupdate/updater.go index fe9b370..f4256a2 100644 --- a/controller/internal/selfupdate/updater.go +++ b/controller/internal/selfupdate/updater.go @@ -234,6 +234,67 @@ func imageName(image string) string { return parts[len(parts)-1] } +// DryRunResult holds the result of a self-update dry run. +type DryRunResult struct { + CurrentVersion string `json:"current_version"` + LatestVersion string `json:"latest_version"` + UpdateAvailable bool `json:"update_available"` + ComposeWritable bool `json:"compose_writable"` + CurrentImageLine string `json:"current_image_line"` + NewImageLine string `json:"new_image_line"` + BackupRunning bool `json:"backup_running"` + Error string `json:"error,omitempty"` +} + +// DryRun checks for updates and reports what would happen without performing any changes. +func (u *Updater) DryRun() *DryRunResult { + result := &DryRunResult{ + CurrentVersion: u.currentVer, + } + + // Check for update + check := u.CheckForUpdate() + result.LatestVersion = check.LatestVersion + result.UpdateAvailable = check.UpdateAvailable + if check.Error != "" { + result.Error = check.Error + return result + } + + // Check compose file + data, err := os.ReadFile(u.composePath) + if err != nil { + result.Error = fmt.Sprintf("Compose fájl nem olvasható: %v", err) + return result + } + + // Find current image line + re := regexp.MustCompile(`(image:\s*)gitea\.dooplex\.hu/admin/felhom-controller:\S+`) + match := re.Find(data) + if match != nil { + result.CurrentImageLine = string(match) + } + + // Build new image line + if check.UpdateAvailable { + result.NewImageLine = fmt.Sprintf("image: %s:%s", u.cfg.Image, check.LatestVersion) + } + + // Check writability + f, err := os.OpenFile(u.composePath, os.O_WRONLY, 0) + if err == nil { + f.Close() + result.ComposeWritable = true + } + + // Check backup running + if u.backupRunning != nil { + result.BackupRunning = u.backupRunning() + } + + return result +} + // TriggerUpdate starts the self-update process. Returns error immediately if // preconditions fail. The actual update runs in a goroutine. func (u *Updater) TriggerUpdate(initiatedBy string) error { diff --git a/controller/internal/setup/setup.go b/controller/internal/setup/setup.go index b0012f7..a3ab09e 100644 --- a/controller/internal/setup/setup.go +++ b/controller/internal/setup/setup.go @@ -12,9 +12,21 @@ import ( ) // NeedsSetup checks whether the controller should enter setup mode. -// Setup is needed when no customer ID has been configured (empty string). +// Setup is needed when no customer ID has been configured (empty string) +// or when a debug-triggered setup marker file exists. func NeedsSetup(cfg *config.Config) bool { - return cfg.Customer.ID == "" + if cfg.Customer.ID == "" { + return true + } + if _, err := os.Stat(filepath.Join(cfg.Paths.DataDir, ".needs-setup")); err == nil { + return true + } + return false +} + +// ClearSetupMarker removes the debug-triggered setup marker file. +func ClearSetupMarker(dataDir string) { + os.Remove(filepath.Join(dataDir, ".needs-setup")) } // SetupState persists wizard progress to survive browser crashes. diff --git a/controller/internal/web/handler_debug.go b/controller/internal/web/handler_debug.go new file mode 100644 index 0000000..8323603 --- /dev/null +++ b/controller/internal/web/handler_debug.go @@ -0,0 +1,687 @@ +package web + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "gitea.dooplex.hu/admin/felhom-controller/internal/backup" + "gitea.dooplex.hu/admin/felhom-controller/internal/monitor" + "gitea.dooplex.hu/admin/felhom-controller/internal/stacks" + "gitea.dooplex.hu/admin/felhom-controller/internal/system" +) + +// DebugCallbacks holds functions that need main.go wiring (modules not directly on Server). +type DebugCallbacks struct { + TriggerHubReportPush func() error + TriggerHubInfraPush func() error + TriggerLocalInfraWrite func() error + TriggerSetupMode func() error + HubConnectivityTest func() (statusCode int, latencyMs int64, err error) + GiteaConnectivityTest func() (statusCode int, latencyMs int64, err error) +} + +// debugPageHandler renders the debug dashboard page. +func (s *Server) debugPageHandler(w http.ResponseWriter, r *http.Request) { + data := s.baseData("debug", "Debug") + s.executeTemplate(w, r, "debug", data) +} + +// handleDebugAPI dispatches /api/debug/* routes. +func (s *Server) handleDebugAPI(w http.ResponseWriter, r *http.Request) { + subpath := strings.TrimPrefix(r.URL.Path, "/api/debug/") + + switch { + // Section 1: Diagnostic dump + case subpath == "dump" && r.Method == http.MethodGet: + s.debugDump(w, r) + + // Section 2: Notification & Event testing + case subpath == "event/test" && r.Method == http.MethodPost: + s.debugTestEvent(w, r) + case subpath == "event/history" && r.Method == http.MethodGet: + s.debugEventHistory(w, r) + + // Section 3: Backup testing + case subpath == "backup/dbdump" && r.Method == http.MethodPost: + s.debugTriggerDBDump(w, r) + case subpath == "backup/crossdrive" && r.Method == http.MethodPost: + s.debugTriggerCrossDrive(w, r) + case subpath == "backup/integrity" && r.Method == http.MethodPost: + s.debugTriggerIntegrity(w, r) + case subpath == "backup/infra" && r.Method == http.MethodPost: + s.debugTriggerInfraBackup(w, r) + + // Section 4: Storage simulation + case subpath == "storage/simulate-disconnect" && r.Method == http.MethodPost: + s.debugSimulateDisconnect(w, r) + case subpath == "storage/simulate-reconnect" && r.Method == http.MethodPost: + s.debugSimulateReconnect(w, r) + case subpath == "storage/watchdog-status" && r.Method == http.MethodGet: + s.debugWatchdogStatus(w, r) + + // Section 5: Hub & connectivity + case subpath == "hub/push" && r.Method == http.MethodPost: + s.debugHubPush(w, r) + case subpath == "hub/infra-push" && r.Method == http.MethodPost: + s.debugHubInfraPush(w, r) + case subpath == "hub/test-connectivity" && r.Method == http.MethodPost: + s.debugHubConnectivity(w, r) + case subpath == "hub/preferences-sync" && r.Method == http.MethodPost: + s.debugPreferencesSync(w, r) + case subpath == "gitea/test-connectivity" && r.Method == http.MethodPost: + s.debugGiteaConnectivity(w, r) + + // Section 6: Self-update + case subpath == "selfupdate/dry-run" && r.Method == http.MethodPost: + s.debugSelfUpdateDryRun(w, r) + + // Section 7: DR / Setup + case subpath == "dr/trigger-setup" && r.Method == http.MethodPost: + s.debugTriggerSetupWizard(w, r) + case subpath == "dr/infra-status" && r.Method == http.MethodGet: + s.debugInfraBackupStatus(w, r) + + // Section 8: Log viewer + case subpath == "logs" && r.Method == http.MethodGet: + s.debugLogBuffer(w, r) + + default: + http.NotFound(w, r) + } +} + +// writeDebugJSON writes a standard JSON response for debug endpoints. +func writeDebugJSON(w http.ResponseWriter, status int, ok bool, message string, data interface{}) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + resp := map[string]interface{}{ + "ok": ok, + } + if message != "" { + if ok { + resp["message"] = message + } else { + resp["error"] = message + } + } + if data != nil { + resp["data"] = data + } + json.NewEncoder(w).Encode(resp) +} + +// ── Section 1: Diagnostic dump ────────────────────────────────────── + +func (s *Server) debugDump(w http.ResponseWriter, r *http.Request) { + dump := make(map[string]interface{}) + + // Controller info + configHash := "" + configPath := s.cfg.Paths.DataDir // approximate; configPath isn't on Server + if data, err := os.ReadFile(filepath.Join(filepath.Dir(configPath), "controller.yaml")); err == nil { + h := sha256.Sum256(data) + configHash = hex.EncodeToString(h[:]) + } + dump["controller"] = map[string]interface{}{ + "version": s.version, + "uptime_seconds": int(time.Since(s.startTime).Seconds()), + "config_hash": configHash, + "logging_level": s.cfg.Logging.Level, + "pid": os.Getpid(), + } + + // Storage + storagePaths := s.settings.GetStoragePaths() + storageEntries := make([]map[string]interface{}, 0, len(storagePaths)) + for _, sp := range storagePaths { + entry := map[string]interface{}{ + "path": sp.Path, + "label": sp.Label, + "disconnected": sp.Disconnected, + "decommissioned": sp.Decommissioned, + } + if !sp.Disconnected && !sp.Decommissioned { + if di := system.GetDiskUsage(sp.Path); di != nil { + entry["total_gb"] = di.TotalGB + entry["used_gb"] = di.UsedGB + entry["used_percent"] = di.UsedPercent + } + } + storageEntries = append(storageEntries, entry) + } + dump["storage"] = storageEntries + + // Stacks + allStacks := s.stackMgr.GetStacks() + deployed := 0 + running := 0 + stopped := 0 + stackList := make([]map[string]interface{}, 0) + for _, st := range allStacks { + if !st.Deployed { + continue + } + deployed++ + info := map[string]interface{}{ + "name": st.Name, + "state": string(st.State), + } + if st.Meta.DisplayName != "" { + info["display_name"] = st.Meta.DisplayName + } + containerNames := make([]string, 0, len(st.Containers)) + for _, c := range st.Containers { + containerNames = append(containerNames, c.Name) + switch c.State { + case stacks.StateRunning, stacks.StateStarting, stacks.StateUnhealthy: + running++ + default: + stopped++ + } + } + info["containers"] = containerNames + stackList = append(stackList, info) + } + dump["stacks"] = map[string]interface{}{ + "deployed": deployed, + "running": running, + "stopped": stopped, + "list": stackList, + } + + // Backup + if s.backupMgr != nil { + backupInfo := map[string]interface{}{ + "enabled": true, + "running": s.backupMgr.IsRunning(), + } + dbDump, backupSt := s.backupMgr.GetStatus() + if dbDump != nil { + backupInfo["last_db_dump"] = map[string]interface{}{ + "time": dbDump.LastRun, + "success": dbDump.Success, + } + } + if backupSt != nil { + backupInfo["last_backup"] = map[string]interface{}{ + "time": backupSt.LastRun, + "success": backupSt.Success, + } + if backupSt.RepoStats != nil { + backupInfo["repo_size"] = backupSt.RepoStats.TotalSize + backupInfo["snapshot_count"] = backupSt.RepoStats.SnapshotCount + } + } + dump["backup"] = backupInfo + } else { + dump["backup"] = map[string]interface{}{"enabled": false} + } + + // Hub + hubInfo := map[string]interface{}{ + "url": s.cfg.Hub.URL, + "enabled": s.cfg.Hub.Enabled, + } + if s.hubPushStatusFn != nil { + st := s.hubPushStatusFn() + hubInfo["last_attempt"] = st.LastAttempt + hubInfo["last_success"] = st.LastSuccess + hubInfo["last_error"] = st.LastError + hubInfo["consecutive_failures"] = st.Consecutive + } + dump["hub"] = hubInfo + + // Scheduler + if s.scheduler != nil { + jobs := s.scheduler.GetJobs() + jobList := make([]map[string]interface{}, 0, len(jobs)) + for _, j := range jobs { + entry := map[string]interface{}{ + "name": j.Name, + "running": j.Running, + } + if j.Interval > 0 { + entry["type"] = "every" + entry["interval"] = j.Interval.String() + } else if j.Schedule != "" { + entry["type"] = "daily" + entry["schedule"] = j.Schedule + } + if !j.LastRun.IsZero() { + entry["last_run"] = j.LastRun + } + if j.LastErr != nil { + entry["last_error"] = j.LastErr.Error() + } + jobList = append(jobList, entry) + } + dump["scheduler"] = jobList + } + + // Health + healthReport := monitor.RunHealthCheck(s.cfg, s.cpuCollector, storagePaths, s.logger) + dump["health"] = map[string]interface{}{ + "status": healthReport.Status, + "issues": healthReport.Issues, + "warnings": healthReport.Warnings, + } + + // Notifications + prefs := s.settings.GetNotificationPrefs() + dump["notifications"] = map[string]interface{}{ + "email": prefs.Email, + "enabled_events": prefs.EnabledEvents, + "cooldown_hours": prefs.CooldownHours, + } + + // Self-update + if s.updater != nil { + status := s.updater.GetStatus() + dump["self_update"] = map[string]interface{}{ + "enabled": true, + "auto": s.cfg.SelfUpdate.AutoUpdate, + "last_check": status.LastCheck, + } + } else { + dump["self_update"] = map[string]interface{}{"enabled": false} + } + + // Alerts + if s.alertManager != nil { + dump["alerts"] = s.alertManager.GetAlerts() + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(dump) +} + +// ── Section 2: Notification & Event testing ───────────────────────── + +func (s *Server) debugTestEvent(w http.ResponseWriter, r *http.Request) { + var req struct { + EventType string `json:"event_type"` + Severity string `json:"severity"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Érvénytelen kérés", nil) + return + } + if req.EventType == "" { + req.EventType = "test" + } + if req.Severity == "" { + req.Severity = "info" + } + + if s.notifier == nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Notifier nincs konfigurálva", nil) + return + } + + statusCode, err := s.notifier.PushTestEventSync(req.EventType, req.Severity, + fmt.Sprintf("Teszt esemény: %s (%s)", req.EventType, req.Severity)) + if err != nil { + writeDebugJSON(w, http.StatusOK, false, err.Error(), map[string]interface{}{ + "hub_status": statusCode, + }) + return + } + writeDebugJSON(w, http.StatusOK, true, fmt.Sprintf("Esemény elküldve (HTTP %d)", statusCode), + map[string]interface{}{"hub_status": statusCode}) +} + +func (s *Server) debugEventHistory(w http.ResponseWriter, r *http.Request) { + if s.notifier == nil { + writeDebugJSON(w, http.StatusOK, true, "", []interface{}{}) + return + } + history := s.notifier.GetEventHistory(20) + writeDebugJSON(w, http.StatusOK, true, "", history) +} + +// ── Section 3: Backup testing ─────────────────────────────────────── + +func (s *Server) debugTriggerDBDump(w http.ResponseWriter, r *http.Request) { + if s.backupMgr == nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Backup manager nincs konfigurálva", nil) + return + } + go func() { + if err := s.backupMgr.RunDBDumps(context.Background()); err != nil { + s.logger.Printf("[WARN] Debug DB dump failed: %v", err) + } + }() + writeDebugJSON(w, http.StatusOK, true, "DB dump elindítva", nil) +} + +func (s *Server) debugTriggerCrossDrive(w http.ResponseWriter, r *http.Request) { + if s.crossDriveRunner == nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Cross-drive runner nincs konfigurálva", nil) + return + } + go func() { + if err := s.crossDriveRunner.RunAllConfigured(context.Background()); err != nil { + s.logger.Printf("[WARN] Debug cross-drive failed: %v", err) + } + }() + writeDebugJSON(w, http.StatusOK, true, "Cross-drive mentés elindítva", nil) +} + +func (s *Server) debugTriggerIntegrity(w http.ResponseWriter, r *http.Request) { + if s.backupMgr == nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Backup manager nincs konfigurálva", nil) + return + } + go func() { + if err := s.backupMgr.RunIntegrityCheck(context.Background()); err != nil { + s.logger.Printf("[WARN] Debug integrity check failed: %v", err) + } + }() + writeDebugJSON(w, http.StatusOK, true, "Integritás ellenőrzés elindítva", nil) +} + +func (s *Server) debugTriggerInfraBackup(w http.ResponseWriter, r *http.Request) { + if s.debugCallbacks == nil || s.debugCallbacks.TriggerLocalInfraWrite == nil { + writeDebugJSON(w, http.StatusNotImplemented, false, "Nem bekötött", nil) + return + } + go func() { + if err := s.debugCallbacks.TriggerLocalInfraWrite(); err != nil { + s.logger.Printf("[WARN] Debug infra backup failed: %v", err) + } + }() + writeDebugJSON(w, http.StatusOK, true, "Infra mentés elindítva", nil) +} + +// ── Section 4: Storage simulation ─────────────────────────────────── + +func (s *Server) debugSimulateDisconnect(w http.ResponseWriter, r *http.Request) { + var req struct { + Path string `json:"path"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Path == "" { + writeDebugJSON(w, http.StatusBadRequest, false, "Érvénytelen kérés: path szükséges", nil) + return + } + if s.storageWatchdog == nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Storage watchdog nincs konfigurálva", nil) + return + } + stopped, err := s.storageWatchdog.SimulateDisconnect(r.Context(), req.Path) + if err != nil { + writeDebugJSON(w, http.StatusBadRequest, false, err.Error(), nil) + return + } + writeDebugJSON(w, http.StatusOK, true, + fmt.Sprintf("Leválasztás szimulálva: %s (%d app leállítva)", req.Path, len(stopped)), + map[string]interface{}{"stopped_stacks": stopped}) +} + +func (s *Server) debugSimulateReconnect(w http.ResponseWriter, r *http.Request) { + var req struct { + Path string `json:"path"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Path == "" { + writeDebugJSON(w, http.StatusBadRequest, false, "Érvénytelen kérés: path szükséges", nil) + return + } + if s.storageWatchdog == nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Storage watchdog nincs konfigurálva", nil) + return + } + if err := s.storageWatchdog.SimulateReconnect(r.Context(), req.Path); err != nil { + writeDebugJSON(w, http.StatusBadRequest, false, err.Error(), nil) + return + } + writeDebugJSON(w, http.StatusOK, true, + fmt.Sprintf("Visszacsatlakozás szimulálva: %s", req.Path), nil) +} + +func (s *Server) debugWatchdogStatus(w http.ResponseWriter, r *http.Request) { + if s.storageWatchdog == nil { + writeDebugJSON(w, http.StatusOK, true, "", []interface{}{}) + return + } + status := s.storageWatchdog.GetDebugStatus() + writeDebugJSON(w, http.StatusOK, true, "", status) +} + +// ── Section 5: Hub & connectivity ─────────────────────────────────── + +func (s *Server) debugHubPush(w http.ResponseWriter, r *http.Request) { + if s.debugCallbacks == nil || s.debugCallbacks.TriggerHubReportPush == nil { + writeDebugJSON(w, http.StatusNotImplemented, false, "Nem bekötött", nil) + return + } + start := time.Now() + err := s.debugCallbacks.TriggerHubReportPush() + latency := time.Since(start).Milliseconds() + if err != nil { + writeDebugJSON(w, http.StatusOK, false, err.Error(), map[string]interface{}{"latency_ms": latency}) + return + } + writeDebugJSON(w, http.StatusOK, true, "Hub jelentés elküldve", + map[string]interface{}{"latency_ms": latency}) +} + +func (s *Server) debugHubInfraPush(w http.ResponseWriter, r *http.Request) { + if s.debugCallbacks == nil || s.debugCallbacks.TriggerHubInfraPush == nil { + writeDebugJSON(w, http.StatusNotImplemented, false, "Nem bekötött", nil) + return + } + start := time.Now() + err := s.debugCallbacks.TriggerHubInfraPush() + latency := time.Since(start).Milliseconds() + if err != nil { + writeDebugJSON(w, http.StatusOK, false, err.Error(), map[string]interface{}{"latency_ms": latency}) + return + } + writeDebugJSON(w, http.StatusOK, true, "Infra backup elküldve a Hubra", + map[string]interface{}{"latency_ms": latency}) +} + +func (s *Server) debugHubConnectivity(w http.ResponseWriter, r *http.Request) { + if s.debugCallbacks == nil || s.debugCallbacks.HubConnectivityTest == nil { + writeDebugJSON(w, http.StatusNotImplemented, false, "Nem bekötött", nil) + return + } + statusCode, latency, err := s.debugCallbacks.HubConnectivityTest() + data := map[string]interface{}{ + "status_code": statusCode, + "latency_ms": latency, + } + if err != nil { + writeDebugJSON(w, http.StatusOK, false, err.Error(), data) + return + } + writeDebugJSON(w, http.StatusOK, true, + fmt.Sprintf("Hub elérhető (HTTP %d, %dms)", statusCode, latency), data) +} + +func (s *Server) debugPreferencesSync(w http.ResponseWriter, r *http.Request) { + if s.notifier == nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Notifier nincs konfigurálva", nil) + return + } + prefs := s.settings.GetNotificationPrefs() + if err := s.notifier.SyncPreferences(prefs.Email, prefs.EnabledEvents, prefs.CooldownHours); err != nil { + writeDebugJSON(w, http.StatusOK, false, err.Error(), nil) + return + } + writeDebugJSON(w, http.StatusOK, true, "Preferenciák szinkronizálva", nil) +} + +func (s *Server) debugGiteaConnectivity(w http.ResponseWriter, r *http.Request) { + if s.debugCallbacks == nil || s.debugCallbacks.GiteaConnectivityTest == nil { + writeDebugJSON(w, http.StatusNotImplemented, false, "Nem bekötött", nil) + return + } + statusCode, latency, err := s.debugCallbacks.GiteaConnectivityTest() + data := map[string]interface{}{ + "status_code": statusCode, + "latency_ms": latency, + } + if err != nil { + writeDebugJSON(w, http.StatusOK, false, err.Error(), data) + return + } + writeDebugJSON(w, http.StatusOK, true, + fmt.Sprintf("Gitea elérhető (HTTP %d, %dms)", statusCode, latency), data) +} + +// ── Section 6: Self-update ────────────────────────────────────────── + +func (s *Server) debugSelfUpdateDryRun(w http.ResponseWriter, r *http.Request) { + if s.updater == nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Self-update nincs konfigurálva", nil) + return + } + result := s.updater.DryRun() + writeDebugJSON(w, http.StatusOK, true, "", result) +} + +// ── Section 7: DR / Setup ─────────────────────────────────────────── + +func (s *Server) debugTriggerSetupWizard(w http.ResponseWriter, r *http.Request) { + var req struct { + Confirm string `json:"confirm"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeDebugJSON(w, http.StatusBadRequest, false, "Érvénytelen kérés", nil) + return + } + if req.Confirm != "RESET" { + writeDebugJSON(w, http.StatusBadRequest, false, "Érvénytelen megerősítés — írja be: RESET", nil) + return + } + + // Pre-check: verify infra backup exists on at least one drive + if !s.hasInfraBackupOnDrive() { + writeDebugJSON(w, http.StatusBadRequest, false, + "Nincs infra backup egyetlen meghajtón sem! Először készítsen infra backupot.", nil) + return + } + + // Write marker file + markerPath := filepath.Join(s.cfg.Paths.DataDir, ".needs-setup") + if err := os.WriteFile(markerPath, []byte("debug-triggered\n"), 0644); err != nil { + writeDebugJSON(w, http.StatusInternalServerError, false, + fmt.Sprintf("Marker fájl írási hiba: %v", err), nil) + return + } + + writeDebugJSON(w, http.StatusOK, true, "Controller újraindítása setup módba...", nil) + + // Exit after response is sent so the container restarts into setup mode + go func() { + time.Sleep(500 * time.Millisecond) + os.Exit(0) + }() +} + +func (s *Server) debugInfraBackupStatus(w http.ResponseWriter, r *http.Request) { + storagePaths := s.settings.GetStoragePaths() + drives := make([]map[string]interface{}, 0, len(storagePaths)) + + for _, sp := range storagePaths { + if sp.Decommissioned || sp.Disconnected { + continue + } + driveInfo := map[string]interface{}{ + "path": sp.Path, + "label": sp.Label, + "has_backup": false, + } + + infraDir := backup.InfraBackupDir(sp.Path) + info, err := os.Stat(infraDir) + if err == nil && info.IsDir() { + driveInfo["has_backup"] = true + driveInfo["last_modified"] = info.ModTime() + + // List files + entries, _ := os.ReadDir(infraDir) + files := make([]string, 0, len(entries)) + for _, e := range entries { + files = append(files, e.Name()) + } + driveInfo["files"] = files + } + + drives = append(drives, driveInfo) + } + + data := map[string]interface{}{ + "drives": drives, + } + if s.hubPushStatusFn != nil { + st := s.hubPushStatusFn() + data["hub_infra_push"] = map[string]interface{}{ + "last_attempt": st.LastAttempt, + "last_success": st.LastSuccess, + "last_error": st.LastError, + } + } + + writeDebugJSON(w, http.StatusOK, true, "", data) +} + +// hasInfraBackupOnDrive checks if any connected storage drive has an infra backup. +func (s *Server) hasInfraBackupOnDrive() bool { + for _, sp := range s.settings.GetStoragePaths() { + if sp.Decommissioned || sp.Disconnected { + continue + } + infraDir := backup.InfraBackupDir(sp.Path) + if info, err := os.Stat(infraDir); err == nil && info.IsDir() { + return true + } + } + return false +} + +// ── Section 8: Log viewer ─────────────────────────────────────────── + +func (s *Server) debugLogBuffer(w http.ResponseWriter, r *http.Request) { + if s.logBuffer == nil { + writeDebugJSON(w, http.StatusOK, true, "", map[string]interface{}{ + "entries": []interface{}{}, + "total": 0, + }) + return + } + + level := r.URL.Query().Get("level") + if level == "" { + level = "DEBUG" + } + + limit := 200 + if v := r.URL.Query().Get("limit"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 && n <= 1000 { + limit = n + } + } + + var after time.Time + if v := r.URL.Query().Get("after"); v != "" { + if t, err := time.Parse(time.RFC3339Nano, v); err == nil { + after = t + } + } + + entries, total := s.logBuffer.Entries(level, limit, after) + writeDebugJSON(w, http.StatusOK, true, "", map[string]interface{}{ + "entries": entries, + "total": total, + }) +} diff --git a/controller/internal/web/handlers.go b/controller/internal/web/handlers.go index 80076d2..3b50385 100644 --- a/controller/internal/web/handlers.go +++ b/controller/internal/web/handlers.go @@ -97,6 +97,7 @@ func (s *Server) baseData(page, title string) map[string]interface{} { "Domain": s.cfg.Customer.Domain, "Version": s.version, "AuthEnabled": s.authEnabled(), + "DebugMode": s.isDebug(), } if s.alertManager != nil { data["Alerts"] = s.alertManager.GetAlerts() diff --git a/controller/internal/web/logbuffer.go b/controller/internal/web/logbuffer.go new file mode 100644 index 0000000..7e18a7d --- /dev/null +++ b/controller/internal/web/logbuffer.go @@ -0,0 +1,186 @@ +package web + +import ( + "strings" + "sync" + "time" +) + +// LogEntry represents a single parsed log line. +type LogEntry struct { + Timestamp time.Time `json:"timestamp"` + Level string `json:"level"` // "DEBUG", "INFO", "WARN", "ERROR" + Message string `json:"message"` + Source string `json:"source"` // "file.go:123" if Lshortfile enabled +} + +// LogBuffer is a thread-safe ring buffer that captures log output. +// It implements io.Writer so it can be used with log.New(io.MultiWriter(...)). +type LogBuffer struct { + mu sync.RWMutex + entries []LogEntry + size int + pos int + full bool +} + +// NewLogBuffer creates a ring buffer that keeps the last `size` log entries. +func NewLogBuffer(size int) *LogBuffer { + return &LogBuffer{ + entries: make([]LogEntry, size), + size: size, + } +} + +// Write implements io.Writer. It parses Go's standard log output format. +// Handles two formats: +// - With Lshortfile: "2026/02/21 18:33:35 file.go:123: [LEVEL] message" +// - Without: "2026/02/21 18:33:35 [LEVEL] message" +func (lb *LogBuffer) Write(p []byte) (n int, err error) { + line := strings.TrimRight(string(p), "\n\r") + if line == "" { + return len(p), nil + } + + entry := parseLine(line) + + lb.mu.Lock() + lb.entries[lb.pos] = entry + lb.pos = (lb.pos + 1) % lb.size + if lb.pos == 0 && !lb.full { + lb.full = true + } + lb.mu.Unlock() + + return len(p), nil +} + +// Entries returns log entries filtered by minimum level, limited by count, +// and optionally filtered to entries after a given timestamp. +// Returns the matching entries and the total count in the buffer. +func (lb *LogBuffer) Entries(minLevel string, limit int, after time.Time) ([]LogEntry, int) { + lb.mu.RLock() + defer lb.mu.RUnlock() + + // Collect all entries in chronological order + total := lb.size + if !lb.full { + total = lb.pos + } + + if limit <= 0 || limit > 1000 { + limit = 200 + } + + levelOrder := levelPriority(minLevel) + + var result []LogEntry + start := 0 + if lb.full { + start = lb.pos + } + + for i := 0; i < total; i++ { + idx := (start + i) % lb.size + e := lb.entries[idx] + + // Filter by level + if levelPriority(e.Level) < levelOrder { + continue + } + // Filter by timestamp + if !after.IsZero() && !e.Timestamp.After(after) { + continue + } + result = append(result, e) + } + + // Apply limit (keep the most recent entries) + if len(result) > limit { + result = result[len(result)-limit:] + } + + return result, total +} + +// parseLine parses a single log line into a LogEntry. +func parseLine(line string) LogEntry { + entry := LogEntry{ + Level: "INFO", + Message: line, + } + + // Try to parse timestamp: "2006/01/02 15:04:05" + if len(line) >= 19 { + if t, err := time.Parse("2006/01/02 15:04:05", line[:19]); err == nil { + entry.Timestamp = t + rest := line[19:] + if len(rest) > 0 && rest[0] == ' ' { + rest = rest[1:] + } + // Check for source file (Lshortfile): "file.go:123: [LEVEL] ..." + if colonIdx := strings.Index(rest, ": "); colonIdx > 0 && colonIdx < 40 { + candidate := rest[:colonIdx] + // Source file pattern: contains ".go:" or ".go" before the colon + if strings.Contains(candidate, ".go:") || strings.HasSuffix(candidate, ".go") { + entry.Source = candidate + rest = rest[colonIdx+2:] + } + } + // Extract level tag: [DEBUG], [INFO], [WARN], [ERROR], [SYNC], [SCHED], etc. + entry.Level, entry.Message = extractLevel(rest) + } + } + + if entry.Timestamp.IsZero() { + entry.Timestamp = time.Now() + } + + return entry +} + +// extractLevel finds and removes a [LEVEL] tag from the beginning of a string. +func extractLevel(s string) (string, string) { + s = strings.TrimSpace(s) + if len(s) < 3 || s[0] != '[' { + return "INFO", s + } + end := strings.Index(s, "]") + if end < 0 || end > 20 { + return "INFO", s + } + tag := s[1:end] + msg := strings.TrimSpace(s[end+1:]) + + switch tag { + case "DEBUG": + return "DEBUG", msg + case "INFO": + return "INFO", msg + case "WARN": + return "WARN", msg + case "ERROR": + return "ERROR", msg + case "FATAL": + return "ERROR", msg + default: + // Tags like [SYNC], [SCHED], [STORAGE] etc. — treat as INFO, keep tag in message + return "INFO", s + } +} + +// levelPriority returns numeric priority for log levels. +func levelPriority(level string) int { + switch strings.ToUpper(level) { + case "DEBUG": + return 0 + case "INFO": + return 1 + case "WARN": + return 2 + case "ERROR": + return 3 + default: + return 0 + } +} diff --git a/controller/internal/web/server.go b/controller/internal/web/server.go index 114229e..9f016ed 100644 --- a/controller/internal/web/server.go +++ b/controller/internal/web/server.go @@ -65,6 +65,11 @@ type Server struct { // Asset syncer for Hub-managed assets (optional) assetsSyncer *assets.Syncer + + // Debug mode support + logBuffer *LogBuffer + debugCallbacks *DebugCallbacks + startTime time.Time } func NewServer(cfg *config.Config, stackMgr *stacks.Manager, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, sched *scheduler.Scheduler, sett *settings.Settings, alertMgr *AlertManager, notif *notify.Notifier, updater *selfupdate.Updater, logger *log.Logger, version string) *Server { @@ -143,6 +148,36 @@ func (s *Server) SetAssetsSyncer(as *assets.Syncer) { s.assetsSyncer = as } +// SetLogBuffer sets the in-memory log ring buffer for the debug log viewer. +func (s *Server) SetLogBuffer(lb *LogBuffer) { + s.logBuffer = lb +} + +// SetDebugCallbacks sets the callbacks for debug endpoints that need main.go wiring. +func (s *Server) SetDebugCallbacks(dc *DebugCallbacks) { + s.debugCallbacks = dc +} + +// SetStartTime records the controller start time for uptime calculation. +func (s *Server) SetStartTime(t time.Time) { + s.startTime = t +} + +// isDebug returns true if the controller is running in debug mode. +func (s *Server) isDebug() bool { + return s.cfg.Logging.Level == "debug" +} + +// ServeDebugAPI handles /api/debug/* routes (JSON API for debug operations). +// Called from the mux carve-out; debug mode check is done here. +func (s *Server) ServeDebugAPI(w http.ResponseWriter, r *http.Request) { + if !s.isDebug() { + http.NotFound(w, r) + return + } + s.handleDebugAPI(w, r) +} + // InRestoreMode returns true if the server is in DR restore mode. func (s *Server) InRestoreMode() bool { s.restoreMu.RLock() @@ -239,6 +274,12 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { case strings.HasPrefix(path, "/apps/"): slug := strings.TrimPrefix(path, "/apps/") s.appDetailHandler(w, r, slug) + case path == "/debug": + if !s.isDebug() { + http.NotFound(w, r) + return + } + s.debugPageHandler(w, r) default: http.NotFound(w, r) } diff --git a/controller/internal/web/templates/debug.html b/controller/internal/web/templates/debug.html new file mode 100644 index 0000000..bb0803d --- /dev/null +++ b/controller/internal/web/templates/debug.html @@ -0,0 +1,620 @@ +{{define "debug"}} +{{template "layout_start" .}} + +