From be7803c0acff3c747943f95720c04dfce05dbad9 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Sat, 21 Feb 2026 18:32:26 +0100 Subject: [PATCH] =?UTF-8?q?v0.24.0=20=E2=80=94=20Pre-testing=20observabili?= =?UTF-8?q?ty:=20debug=20logging,=20diagnostic=20dump,=20startup=20self-te?= =?UTF-8?q?st?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add [DEBUG] logging across all modules (backup, storage, sync, selfupdate, monitor, notify, report, assets, setup) gated behind logging.level: "debug" - Add /api/debug/dump endpoint returning full controller state JSON (debug only) - Add startup self-test validating 9 subsystems (Docker, dirs, storage, hub, restic repos, metrics DB) with pass/warn/fail summary - New packages: internal/selftest, internal/util - Constructor/signature changes: debug bool params, logger params on RunHealthCheck and BuildReport, smart watchdog probe logging Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 46 ++++ controller/README.md | 15 +- controller/cmd/controller/main.go | 43 +-- controller/internal/api/router.go | 214 +++++++++++++++ controller/internal/assets/syncer.go | 19 +- controller/internal/backup/backup.go | 70 ++++- controller/internal/backup/crossdrive.go | 103 ++++++- controller/internal/backup/dbdump.go | 72 ++++- controller/internal/backup/local_infra.go | 12 +- .../internal/backup/local_infra_test.go | 6 +- controller/internal/backup/restore.go | 36 +++ controller/internal/monitor/healthcheck.go | 60 +++- controller/internal/monitor/watchdog.go | 54 ++++ controller/internal/notify/notifier.go | 22 +- controller/internal/report/builder.go | 23 +- controller/internal/report/pusher.go | 13 +- controller/internal/selftest/selftest.go | 256 ++++++++++++++++++ controller/internal/selfupdate/updater.go | 29 +- controller/internal/setup/handlers.go | 18 ++ controller/internal/setup/scanner.go | 12 +- controller/internal/storage/attach.go | 14 +- controller/internal/storage/attach_linux.go | 17 ++ controller/internal/storage/format.go | 13 +- controller/internal/storage/format_linux.go | 31 +++ controller/internal/storage/migrate.go | 28 +- controller/internal/storage/scan_linux.go | 47 +++- controller/internal/storage/scan_other.go | 7 +- controller/internal/sync/sync.go | 46 ++++ controller/internal/util/strings.go | 12 + controller/internal/web/storage_handlers.go | 10 +- 30 files changed, 1281 insertions(+), 67 deletions(-) create mode 100644 controller/internal/selftest/selftest.go create mode 100644 controller/internal/util/strings.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 0462fd8..40f9556 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,51 @@ ## Changelog +### v0.24.0 — Pre-Testing Observability (2026-02-21) + +**Three features for pre-testing diagnostics: verbose debug logging, diagnostic dump endpoint, and startup self-test.** + +#### Feature 1: Debug logging across all modules + +All `[DEBUG]` log lines are gated behind `logging.level: "debug"` — zero overhead at `info` level. + +- **New** `internal/util/strings.go`: shared `TruncateStr()` for safely truncating command output in logs. +- **Backup** (`backup.go`, `dbdump.go`, `crossdrive.go`, `restore.go`, `local_infra.go`): added `isDebug()` method and per-operation debug logging. DB dump logs container discovery, per-dump command details (passwords masked as `***`), validation results. Cross-drive logs source/dest paths, rsync results, auto-enable decisions. Restore logs step-by-step progress. +- **Storage** (`scan_linux.go`, `format_linux.go`, `attach_linux.go`, `migrate.go`): added `Logger`/`Debug` fields to request structs. Logs raw lsblk output (truncated), per-disk classification, pipeline steps for format/attach, rsync progress for migrate. Updated `*_other.go` stubs. +- **Sync** (`sync.go`): logs masked clone URLs, per-file hash comparison, post-sync hook triggers. +- **Self-update** (`updater.go`): logs registry API calls, tag parsing, version comparison, compose file edits. +- **Monitor** (`watchdog.go`): smart logging — periodic 60-probe summaries (~5 min), immediate log on unexpected failures, reconnect attempt details. (`healthcheck.go`): logs raw check values and per-check results. +- **Notify** (`notifier.go`): logs event push URL/type/response, preference sync details. +- **Report** (`pusher.go`, `builder.go`): logs payload sizes, section summaries, push responses. +- **Assets** (`syncer.go`): logs manifest fetch, per-file hash comparison, download/removal actions. +- **Setup** (`scanner.go`, `handlers.go`): logs drive scan details, hub recovery/config write operations. + +#### Feature 2: Diagnostic dump endpoint (`GET /api/debug/dump`) + +Returns a comprehensive JSON snapshot of all controller state. Only available when `logging.level: "debug"` — returns 404 otherwise. + +- Sections: `controller` (version, uptime, config hash, PID), `storage` (per-path usage), `stacks` (deployed/running/stopped counts + list), `backup` (status, repo stats), `hub` (push status, consecutive failures), `scheduler` (all jobs with last_run/running/errors), `health` (fresh check), `notifications`, `self_update`, `alerts`. +- API router expanded with `SetDebugDumpDeps()` setter for scheduler, hub pusher, alert manager, version, and start time. + +#### Feature 3: Startup self-test + +- **New** `internal/selftest/selftest.go`: runs 9 diagnostic checks on boot with 5s timeout each. +- Checks: Docker socket, stacks directory, data directory (write test), system data path (mount point), storage paths (connected vs disconnected), git catalog (.felhom.yml files), Hub connectivity (/healthz), restic repos, metrics DB. +- Results logged in a clear block: `[PASS]`/`[WARN]`/`[FAIL]` per check, summary at end. +- Self-test summary (pass/warn/fail counts) sent to Hub via `NotifyControllerStarted` details map. +- Never blocks startup — purely diagnostic. + +#### Constructor/signature changes + +- `notify.New()`: added `debug bool` param. `NotifyControllerStarted()`: added `details map[string]interface{}` param. +- `report.NewPusher()`: added `debug bool` param. `BuildReport()`: added `logger *log.Logger` param. +- `monitor.RunHealthCheck()`: added `logger *log.Logger` param (5 call sites in main.go). +- `selfupdate.NewUpdater()`: added `debug bool` param. +- `assets.New()`: added `debug bool` param. +- `backup.NewCrossDriveRunner()`: added `debug bool` param. `WriteLocalInfraBackup()`: added `debug bool` param. +- `backup.DiscoverDatabases()`, `DumpOne()`: added `debug bool` param. +- `storage.ScanDisks()`: added `logger, debug` params. `FormatRequest`, `AttachRequest`, `MigrateRequest`: added `Logger`/`Debug` fields. +- `setup.ScanDrivesForInfraBackups()`: added `debug bool` param. + ### v0.23.0 — CSRF Protection (2026-02-21) **CSRF (Cross-Site Request Forgery) protection on all browser-facing POST endpoints — controller and hub.** diff --git a/controller/README.md b/controller/README.md index 8ec8d63..67ebc42 100644 --- a/controller/README.md +++ b/controller/README.md @@ -4,7 +4,7 @@ A single, lightweight Go container that replaces Portainer + scattered systemd scripts with a unified, Hungarian-language web dashboard for managing Docker Compose stacks, backups, storage, monitoring, and notifications on customer hardware. -**Current version: v0.23.0** +**Current version: v0.24.0** --- @@ -100,7 +100,9 @@ A single, lightweight Go container that replaces Portainer + scattered systemd s | **Notify** | `internal/notify/` | Email notifications via hub relay, preference sync, per-event cooldowns | | **Report** | `internal/report/` | Hub report builder + HTTP pusher (system, stacks, backup, health) | | **Assets** | `internal/assets/` | Hub-managed asset syncer: downloads logos/screenshots with SHA-256 change detection | -| **API** | `internal/api/` | REST JSON endpoints | +| **SelfTest** | `internal/selftest/` | Startup self-test: 9 diagnostic checks (Docker, dirs, storage, hub, restic, metrics) | +| **Util** | `internal/util/` | Shared utilities: `TruncateStr` for debug log output truncation | +| **API** | `internal/api/` | REST JSON endpoints, diagnostic dump (`/api/debug/dump`) | | **Web** | `internal/web/` | Hungarian dashboard, auth, page handlers, template functions, alerts | --- @@ -1265,6 +1267,12 @@ Config endpoints accept session auth OR `Authorization: Bearer ` (s | POST | `/api/assets/sync` | Trigger on-demand asset sync from Hub (async) | | GET | `/api/assets/status` | Asset sync status (last sync, file count, total bytes) | +### Debug (debug mode only) + +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/api/debug/dump` | Full diagnostic JSON dump (controller state, storage, stacks, backup, hub, scheduler, health, alerts). Returns 404 when `logging.level` is not `"debug"`. | + Response format: `{"ok": true/false, "data": ..., "error": "...", "message": "..."}` --- @@ -1355,6 +1363,9 @@ See `docker-compose.yml` for the full volume configuration. - [ ] Docker volume backup (`/var/lib/docker/volumes:ro`) - [ ] Raspberry Pi testing (pi-customer-1) - [x] CSRF protection on POST endpoints (v0.23.0) +- [x] Verbose debug logging across all modules (v0.24.0) +- [x] Diagnostic dump endpoint `/api/debug/dump` (v0.24.0) +- [x] Startup self-test with 9 subsystem checks (v0.24.0) - [ ] Login rate limiting --- diff --git a/controller/cmd/controller/main.go b/controller/cmd/controller/main.go index f84f5ff..4333c50 100644 --- a/controller/cmd/controller/main.go +++ b/controller/cmd/controller/main.go @@ -25,6 +25,7 @@ import ( "gitea.dooplex.hu/admin/felhom-controller/internal/recovery" "gitea.dooplex.hu/admin/felhom-controller/internal/report" "gitea.dooplex.hu/admin/felhom-controller/internal/scheduler" + "gitea.dooplex.hu/admin/felhom-controller/internal/selftest" "gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate" "gitea.dooplex.hu/admin/felhom-controller/internal/settings" "gitea.dooplex.hu/admin/felhom-controller/internal/setup" @@ -52,6 +53,8 @@ func main() { os.Exit(0) } + startTime := time.Now() + // --- Load configuration --- // Use LoadPermissive to tolerate minimal configs (e.g. only domain set by docker-setup.sh). // If even that fails (file missing/unreadable), fall back to defaults. @@ -164,7 +167,7 @@ func main() { } // --- Initialize cross-drive backup runner --- - crossDriveRunner := backup.NewCrossDriveRunner(sett, stackProv, cfg.Paths.SystemDataPath, cfg.Paths.StacksDir, logger) + crossDriveRunner := backup.NewCrossDriveRunner(sett, stackProv, cfg.Paths.SystemDataPath, cfg.Paths.StacksDir, logger, cfg.Logging.Level == "debug") // Wire cross-drive → backup manager for pre-backup DB dumps if backupMgr != nil { @@ -175,13 +178,13 @@ func main() { alertMgr := web.NewAlertManager(logger) // --- Initialize notifier --- - notifier := notify.New(cfg.Hub.URL, cfg.Hub.APIKey, cfg.Customer.ID, sett, logger) + notifier := notify.New(cfg.Hub.URL, cfg.Hub.APIKey, cfg.Customer.ID, sett, logger, cfg.Logging.Level == "debug") // --- Initialize self-updater --- var updater *selfupdate.Updater if cfg.SelfUpdate.Enabled { composePath := filepath.Join(filepath.Dir(cfg.Paths.DataDir), "docker-compose.yml") - updater = selfupdate.NewUpdater(&cfg.SelfUpdate, &cfg.Git, Version, cfg.Paths.DataDir, composePath, logger) + updater = selfupdate.NewUpdater(&cfg.SelfUpdate, &cfg.Git, Version, cfg.Paths.DataDir, composePath, logger, cfg.Logging.Level == "debug") updater.SetBackupRunningCheck(func() bool { return backupMgr != nil && backupMgr.IsRunning() }) @@ -216,7 +219,7 @@ func main() { healthInterval = 5 * time.Minute } sched.Every("system-health", healthInterval, func(ctx context.Context) error { - healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths()) + healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger) body := healthReport.FormatMessage() healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth if healthReport.Status == "fail" { @@ -243,7 +246,7 @@ func main() { // --- Central hub pusher (declared early so backup closure can reference it) --- var hubPusher *report.Pusher if cfg.Hub.URL != "" && cfg.Hub.APIKey != "" { - hubPusher = report.NewPusher(&cfg.Hub, logger) + hubPusher = report.NewPusher(&cfg.Hub, logger, cfg.Logging.Level == "debug") // Wire hub verification: update settings when hub reports customer status hubPusher.OnPushResponse = func(resp *report.PushResponse) { if resp.CustomerBlocked { @@ -347,7 +350,7 @@ func main() { pushInterval = 15 * time.Minute } sched.Every("hub-report", pushInterval, func(ctx context.Context) error { - r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths()) + r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger) if err := hubPusher.Push(r); err != nil { return err } @@ -399,7 +402,7 @@ func main() { // --- Storage watchdog --- storageWatchdog := monitor.NewStorageWatchdog(sett, &watchdogStackAdapter{mgr: stackMgr}, notifier, cfg, logger) storageWatchdog.SetAlertRefresh(func() { - healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths()) + healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger) updateAvailable := false latestVersion := "" if updater != nil { @@ -413,7 +416,7 @@ func main() { }) if hubPusher != nil { storageWatchdog.SetHubReportPusher(func() { - r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths()) + r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger) hubPusher.Push(r) }) } @@ -430,7 +433,7 @@ func main() { var assetsSyncer *assets.Syncer if cfg.Hub.Enabled && cfg.Assets.SyncEnabled && cfg.Hub.URL != "" && cfg.Hub.APIKey != "" { assetsDir := filepath.Join(cfg.Paths.DataDir, "assets") - assetsSyncer = assets.New(cfg.Hub.URL, cfg.Hub.APIKey, assetsDir, "/usr/share/felhom/assets", logger) + assetsSyncer = assets.New(cfg.Hub.URL, cfg.Hub.APIKey, assetsDir, "/usr/share/felhom/assets", logger, cfg.Logging.Level == "debug") go func() { time.Sleep(10 * time.Second) if err := assetsSyncer.Sync(ctx); err != nil { @@ -443,6 +446,9 @@ func main() { logger.Printf("[INFO] Asset sync enabled (daily at %s from Hub)", cfg.Assets.SyncSchedule) } + // --- Startup self-test --- + selfTestResult := selftest.Run(cfg, sett, logger) + sched.Start(ctx) defer sched.Stop() @@ -467,14 +473,18 @@ func main() { time.Sleep(5 * time.Second) // Let all subsystems fully initialize // Push controller startup event to Hub - notifier.NotifyControllerStarted(Version) + notifier.NotifyControllerStarted(Version, map[string]interface{}{ + "selftest_pass": selfTestResult.Pass, + "selftest_warn": selfTestResult.Warn, + "selftest_fail": selfTestResult.Fail, + }) // Heartbeat ping pinger.Ping(cfg.Monitoring.PingUUIDs.Heartbeat, "startup") logger.Println("[INFO] Startup heartbeat ping sent") // System health ping - healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths()) + healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger) body := healthReport.FormatMessage() healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth if healthReport.Status == "fail" { @@ -487,7 +497,7 @@ func main() { // Hub report if hubPusher != nil { if cfg.Hub.Enabled { - r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths()) + r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger) var pushErr error for attempt := 1; attempt <= 3; attempt++ { pushErr = hubPusher.Push(r) @@ -559,7 +569,7 @@ func main() { // Initial alert refresh (so alerts appear immediately, not after first 5min health check) go func() { - report := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths()) + report := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger) alertMgr.Refresh(report, cfg, backupMgr, false, "") }() @@ -573,6 +583,7 @@ func main() { if assetsSyncer != nil { apiRouter.SetAssetsSyncer(assetsSyncer) } + apiRouter.SetDebugDumpDeps(sched, hubPusher, alertMgr, Version, startTime) // --- Initialize web server --- webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, crossDriveRunner, sched, sett, alertMgr, notifier, updater, logger, Version) @@ -603,7 +614,7 @@ func main() { driveMigrator.BackupTrigger = backupMgr } driveMigrator.AlertRefresh = func() { - healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths()) + healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths(), logger) updateAvailable := false latestVersion := "" if updater != nil { @@ -617,7 +628,7 @@ func main() { } if hubPusher != nil { driveMigrator.PushHubReport = func() { - r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths()) + r := report.BuildReport(cfg, *configPath, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths(), logger) hubPusher.Push(r) } driveMigrator.PushInfraBackup = func() { @@ -1025,7 +1036,7 @@ func writeLocalInfraBackup(cfg *config.Config, sett *settings.Settings, return } - backup.WriteLocalInfraBackup(data, cfg.Customer.ID, Version, ib.Timestamp, drives, logger) + backup.WriteLocalInfraBackup(data, cfg.Customer.ID, Version, ib.Timestamp, drives, logger, cfg.Logging.Level == "debug") } // discoverHDDPaths scans deployed apps' app.yaml for HDD_PATH env values. diff --git a/controller/internal/api/router.go b/controller/internal/api/router.go index 89449b4..6a07909 100644 --- a/controller/internal/api/router.go +++ b/controller/internal/api/router.go @@ -2,6 +2,8 @@ package api import ( "context" + "crypto/sha256" + "encoding/hex" "encoding/json" "fmt" "io" @@ -17,12 +19,16 @@ import ( "gitea.dooplex.hu/admin/felhom-controller/internal/backup" "gitea.dooplex.hu/admin/felhom-controller/internal/config" "gitea.dooplex.hu/admin/felhom-controller/internal/metrics" + "gitea.dooplex.hu/admin/felhom-controller/internal/monitor" "gitea.dooplex.hu/admin/felhom-controller/internal/notify" + "gitea.dooplex.hu/admin/felhom-controller/internal/report" + "gitea.dooplex.hu/admin/felhom-controller/internal/scheduler" "gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate" "gitea.dooplex.hu/admin/felhom-controller/internal/settings" "gitea.dooplex.hu/admin/felhom-controller/internal/stacks" catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync" "gitea.dooplex.hu/admin/felhom-controller/internal/system" + "gitea.dooplex.hu/admin/felhom-controller/internal/web" ) // Router handles all /api/* requests. @@ -45,6 +51,13 @@ type Router struct { // Asset syncer for on-demand Hub asset sync assetsSyncer *assets.Syncer + + // Debug dump dependencies (set via setters) + scheduler *scheduler.Scheduler + hubPusher *report.Pusher + alertMgr *web.AlertManager + version string + startTime time.Time } // SetAssetsSyncer sets the Hub asset syncer for on-demand sync triggers. @@ -52,6 +65,15 @@ func (r *Router) SetAssetsSyncer(as *assets.Syncer) { r.assetsSyncer = as } +// SetDebugDumpDeps sets optional dependencies for the /api/debug/dump endpoint. +func (r *Router) SetDebugDumpDeps(sched *scheduler.Scheduler, pusher *report.Pusher, alertMgr *web.AlertManager, version string, startTime time.Time) { + r.scheduler = sched + r.hubPusher = pusher + r.alertMgr = alertMgr + r.version = version + r.startTime = startTime +} + func NewRouter(cfg *config.Config, configPath string, sett *settings.Settings, stackMgr *stacks.Manager, syncer *catalogsync.Syncer, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, metricsStore *metrics.MetricsStore, updater *selfupdate.Updater, notif *notify.Notifier, logger *log.Logger) *Router { return &Router{cfg: cfg, configPath: configPath, sett: sett, stackMgr: stackMgr, syncer: syncer, cpuCollector: cpuCollector, backupMgr: backupMgr, crossDriveRunner: crossDrive, metricsStore: metricsStore, updater: updater, notifier: notif, logger: logger} } @@ -214,6 +236,10 @@ func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) { case path == "/assets/status" && req.Method == http.MethodGet: r.assetSyncStatus(w, req) + // GET /api/debug/dump — diagnostic JSON dump (debug mode only) + case path == "/debug/dump" && req.Method == http.MethodGet: + r.debugDump(w, req) + default: writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"}) } @@ -1046,6 +1072,194 @@ func (r *Router) assetSyncStatus(w http.ResponseWriter, _ *http.Request) { writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: r.assetsSyncer.Status()}) } +// --- Debug dump handler --- + +func (r *Router) debugDump(w http.ResponseWriter, req *http.Request) { + if r.cfg.Logging.Level != "debug" { + writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"}) + return + } + + dump := make(map[string]interface{}) + + // Controller info + configHash := "" + if data, err := os.ReadFile(r.configPath); err == nil { + h := sha256.Sum256(data) + configHash = hex.EncodeToString(h[:]) + } + dump["controller"] = map[string]interface{}{ + "version": r.version, + "uptime_seconds": int(time.Since(r.startTime).Seconds()), + "config_hash": configHash, + "logging_level": r.cfg.Logging.Level, + "pid": os.Getpid(), + } + + // Storage + storagePaths := r.sett.GetStoragePaths() + storageEntries := make([]map[string]interface{}, 0, len(storagePaths)) + for _, sp := range storagePaths { + entry := map[string]interface{}{ + "path": sp.Path, + "label": sp.Label, + "disconnected": sp.Disconnected, + "decommissioned": sp.Decommissioned, + } + if !sp.Disconnected && !sp.Decommissioned { + if di := system.GetDiskUsage(sp.Path); di != nil { + entry["total_gb"] = di.TotalGB + entry["used_gb"] = di.UsedGB + entry["used_percent"] = di.UsedPercent + } + } + storageEntries = append(storageEntries, entry) + } + dump["storage"] = storageEntries + + // Stacks + allStacks := r.stackMgr.GetStacks() + deployed := 0 + running := 0 + stopped := 0 + stackList := make([]map[string]interface{}, 0) + for _, s := range allStacks { + if !s.Deployed { + continue + } + deployed++ + info := map[string]interface{}{ + "name": s.Name, + "state": string(s.State), + } + if s.Meta.DisplayName != "" { + info["display_name"] = s.Meta.DisplayName + } + containerNames := make([]string, 0, len(s.Containers)) + for _, c := range s.Containers { + containerNames = append(containerNames, c.Name) + switch c.State { + case stacks.StateRunning, stacks.StateStarting, stacks.StateUnhealthy: + running++ + default: + stopped++ + } + } + info["containers"] = containerNames + stackList = append(stackList, info) + } + dump["stacks"] = map[string]interface{}{ + "deployed": deployed, + "running": running, + "stopped": stopped, + "list": stackList, + } + + // Backup + if r.backupMgr != nil { + backupInfo := map[string]interface{}{ + "enabled": true, + "running": r.backupMgr.IsRunning(), + } + dbDump, backupSt := r.backupMgr.GetStatus() + if dbDump != nil { + backupInfo["last_db_dump"] = map[string]interface{}{ + "time": dbDump.LastRun, + "success": dbDump.Success, + } + } + if backupSt != nil { + backupInfo["last_backup"] = map[string]interface{}{ + "time": backupSt.LastRun, + "success": backupSt.Success, + } + if backupSt.RepoStats != nil { + backupInfo["repo_size"] = backupSt.RepoStats.TotalSize + backupInfo["snapshot_count"] = backupSt.RepoStats.SnapshotCount + } + } + dump["backup"] = backupInfo + } else { + dump["backup"] = map[string]interface{}{"enabled": false} + } + + // Hub + hubInfo := map[string]interface{}{ + "url": r.cfg.Hub.URL, + "enabled": r.cfg.Hub.Enabled, + } + if r.hubPusher != nil { + s := r.hubPusher.GetStatus() + hubInfo["last_attempt"] = s.LastAttempt + hubInfo["last_success"] = s.LastSuccess + hubInfo["last_error"] = s.LastError + hubInfo["consecutive_failures"] = s.Consecutive + } + dump["hub"] = hubInfo + + // Scheduler + if r.scheduler != nil { + jobs := r.scheduler.GetJobs() + jobList := make([]map[string]interface{}, 0, len(jobs)) + for _, j := range jobs { + entry := map[string]interface{}{ + "name": j.Name, + "running": j.Running, + } + if j.Interval > 0 { + entry["type"] = "every" + entry["interval"] = j.Interval.String() + } else if j.Schedule != "" { + entry["type"] = "daily" + entry["schedule"] = j.Schedule + } + if !j.LastRun.IsZero() { + entry["last_run"] = j.LastRun + } + if j.LastErr != nil { + entry["last_error"] = j.LastErr.Error() + } + jobList = append(jobList, entry) + } + dump["scheduler"] = jobList + } + + // Health (fresh check) + healthReport := monitor.RunHealthCheck(r.cfg, r.cpuCollector, storagePaths, r.logger) + dump["health"] = map[string]interface{}{ + "status": healthReport.Status, + "issues": healthReport.Issues, + "warnings": healthReport.Warnings, + } + + // Notifications + prefs := r.sett.GetNotificationPrefs() + dump["notifications"] = map[string]interface{}{ + "email": prefs.Email, + "enabled_events": prefs.EnabledEvents, + "cooldown_hours": prefs.CooldownHours, + } + + // Self-update + if r.updater != nil { + status := r.updater.GetStatus() + dump["self_update"] = map[string]interface{}{ + "enabled": true, + "auto": r.cfg.SelfUpdate.AutoUpdate, + "last_check": status.LastCheck, + } + } else { + dump["self_update"] = map[string]interface{}{"enabled": false} + } + + // Alerts + if r.alertMgr != nil { + dump["alerts"] = r.alertMgr.GetAlerts() + } + + writeJSON(w, http.StatusOK, dump) +} + func writeJSON(w http.ResponseWriter, status int, v interface{}) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(status) diff --git a/controller/internal/assets/syncer.go b/controller/internal/assets/syncer.go index f038f38..b5f20b8 100644 --- a/controller/internal/assets/syncer.go +++ b/controller/internal/assets/syncer.go @@ -47,12 +47,13 @@ type Syncer struct { fallbackDir string // /usr/share/felhom/assets — baked-in fallback httpClient *http.Client logger *log.Logger + debug bool mu sync.Mutex status SyncStatus } // New creates a Syncer that downloads assets from the Hub. -func New(hubURL, apiKey, assetsDir, fallbackDir string, logger *log.Logger) *Syncer { +func New(hubURL, apiKey, assetsDir, fallbackDir string, logger *log.Logger, debug bool) *Syncer { return &Syncer{ hubURL: strings.TrimSuffix(hubURL, "/"), apiKey: apiKey, @@ -60,6 +61,7 @@ func New(hubURL, apiKey, assetsDir, fallbackDir string, logger *log.Logger) *Syn fallbackDir: fallbackDir, httpClient: &http.Client{Timeout: 60 * time.Second}, logger: logger, + debug: debug, status: SyncStatus{LastStatus: "never"}, } } @@ -78,11 +80,17 @@ func (s *Syncer) Sync(ctx context.Context) error { } // 1. Fetch Hub manifest + if s.debug { + s.logger.Printf("[DEBUG] Asset sync: fetching manifest from %s/api/v1/assets/manifest", s.hubURL) + } manifest, err := s.fetchManifest(ctx) if err != nil { s.setError(fmt.Errorf("fetch manifest: %w", err)) return err } + if s.debug { + s.logger.Printf("[DEBUG] Asset sync: manifest has %d files", len(manifest.Files)) + } // 2. Build local hash map localHashes, err := s.buildLocalHashes() @@ -90,6 +98,9 @@ func (s *Syncer) Sync(ctx context.Context) error { s.setError(fmt.Errorf("scan local assets: %w", err)) return err } + if s.debug { + s.logger.Printf("[DEBUG] Asset sync: %d local files found", len(localHashes)) + } // 3. Download changed/new files hubFiles := make(map[string]bool, len(manifest.Files)) @@ -105,6 +116,9 @@ func (s *Syncer) Sync(ctx context.Context) error { continue } + if s.debug { + s.logger.Printf("[DEBUG] Asset sync: downloading %s (remote sha256=%s)", entry.Filename, entry.SHA256[:12]+"...") + } if err := s.downloadFile(ctx, entry.Filename); err != nil { s.logger.Printf("[WARN] Failed to download asset %s: %v", entry.Filename, err) continue @@ -117,6 +131,9 @@ func (s *Syncer) Sync(ctx context.Context) error { for name := range localHashes { if !hubFiles[name] { path := filepath.Join(s.assetsDir, name) + if s.debug { + s.logger.Printf("[DEBUG] Asset sync: removing stale file %s", name) + } if err := os.Remove(path); err != nil { s.logger.Printf("[WARN] Failed to remove stale asset %s: %v", name, err) } else { diff --git a/controller/internal/backup/backup.go b/controller/internal/backup/backup.go index 13cf115..739ba82 100644 --- a/controller/internal/backup/backup.go +++ b/controller/internal/backup/backup.go @@ -190,6 +190,15 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary { drive := m.GetAppDrivePath(stack.Name) result[drive] = append(result[drive], stack) } + if m.isDebug() { + for drive, stacks := range result { + names := make([]string, len(stacks)) + for i, s := range stacks { + names[i] = s.Name + } + m.logger.Printf("[DEBUG] groupStacksByDrive: %s → [%s]", drive, strings.Join(names, ", ")) + } + } return result } @@ -197,10 +206,18 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary { func (m *Manager) activeDrives() []string { groups := m.groupStacksByDrive() var drives []string + var disconnected []string for d := range groups { + if m.settings != nil && (m.settings.IsDisconnected(d) || m.settings.IsDecommissioned(d)) { + disconnected = append(disconnected, d) + } drives = append(drives, d) } sort.Strings(drives) + if m.isDebug() { + m.logger.Printf("[DEBUG] activeDrives: %d total (%s), %d disconnected/decommissioned", + len(drives), strings.Join(drives, ", "), len(disconnected)) + } return drives } @@ -218,7 +235,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error { start := time.Now() m.logger.Printf("[INFO] Starting database dump run") - dbs, err := DiscoverDatabases(ctx, m.logger) + dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()) if err != nil { m.logger.Printf("[ERROR] Database discovery failed: %v", err) return err @@ -261,7 +278,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error { dumpDir := AppDBDumpPath(drivePath, db.StackName) - result := DumpOne(ctx, db, dumpDir, m.logger) + result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug()) results = append(results, result) if result.Error != nil { @@ -354,6 +371,9 @@ func (m *Manager) runBackupInternal(ctx context.Context) error { driveCount := 0 for drivePath, stacks := range driveStacks { + if m.isDebug() { + m.logger.Printf("[DEBUG] runBackupInternal: processing drive %s (%d stacks)", drivePath, len(stacks)) + } result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths) if err != nil { anyErr = err @@ -473,6 +493,13 @@ func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []St // Deduplicate paths paths = dedup(paths) + if m.isDebug() { + m.logger.Printf("[DEBUG] backupDrive %s: repo=%s, %d include paths:", drivePath, repoPath, len(paths)) + for _, p := range paths { + m.logger.Printf("[DEBUG] %s", p) + } + } + tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)} m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths)) @@ -549,15 +576,27 @@ func (m *Manager) RunIntegrityCheck(ctx context.Context) error { return nil } + if m.isDebug() { + m.logger.Printf("[DEBUG] RunIntegrityCheck: checking %d drives", len(drives)) + } + var checkErr error for _, drive := range drives { repoPath := PrimaryResticRepoPath(drive) if !m.restic.RepoExists(repoPath) { + if m.isDebug() { + m.logger.Printf("[DEBUG] RunIntegrityCheck: skipping %s (repo does not exist)", repoPath) + } continue } + if m.isDebug() { + m.logger.Printf("[DEBUG] RunIntegrityCheck: checking repo %s", repoPath) + } if err := m.restic.Check(repoPath); err != nil { m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err) checkErr = err + } else if m.isDebug() { + m.logger.Printf("[DEBUG] RunIntegrityCheck: repo %s OK", repoPath) } } @@ -587,12 +626,28 @@ func (m *Manager) RunFullBackup(ctx context.Context) error { } defer m.releaseRunning() + if m.isDebug() { + drives := m.activeDrives() + driveStacks := m.groupStacksByDrive() + totalStacks := 0 + for _, s := range driveStacks { + totalStacks += len(s) + } + m.logger.Printf("[DEBUG] RunFullBackup: starting full backup — %d active drives, %d stacks", len(drives), totalStacks) + } + // Step 1: DB dumps + if m.isDebug() { + m.logger.Printf("[DEBUG] RunFullBackup: phase 1 — database dumps") + } if err := m.runDBDumpsInternal(ctx); err != nil { m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway") } // Step 2: Restic backup + if m.isDebug() { + m.logger.Printf("[DEBUG] RunFullBackup: phase 2 — restic snapshots") + } return m.runBackupInternal(ctx) } @@ -737,7 +792,7 @@ func (m *Manager) GetStackHDDMounts(name string) []string { // DumpStackDB runs a database dump for containers belonging to a specific stack. // Dumps to the stack's home drive: /backups/primary//db-dumps/. func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error { - dbs, err := DiscoverDatabases(ctx, m.logger) + dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()) if err != nil { return fmt.Errorf("database discovery failed: %w", err) } @@ -762,7 +817,7 @@ func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error { m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir) for _, db := range stackDBs { - result := DumpOne(ctx, db, dumpDir, m.logger) + result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug()) if result.Error != nil { return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error) } @@ -1019,7 +1074,7 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil { + if dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()); err == nil { status.DiscoveredDBs = dbs } @@ -1172,6 +1227,11 @@ func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupSta } } +// isDebug returns true if logging level is "debug". +func (m *Manager) isDebug() bool { + return m.cfg.Logging.Level == "debug" +} + func dbNames(dbs []DiscoveredDB) string { var names []string for _, db := range dbs { diff --git a/controller/internal/backup/crossdrive.go b/controller/internal/backup/crossdrive.go index ecf79b1..81873ed 100644 --- a/controller/internal/backup/crossdrive.go +++ b/controller/internal/backup/crossdrive.go @@ -13,6 +13,7 @@ import ( "gitea.dooplex.hu/admin/felhom-controller/internal/settings" "gitea.dooplex.hu/admin/felhom-controller/internal/system" + "gitea.dooplex.hu/admin/felhom-controller/internal/util" ) // DBDumper can run a database dump for a specific stack. @@ -29,12 +30,13 @@ type CrossDriveRunner struct { stacksDir string // path to stacks dir (for infra backup) controllerYAMLPath string // path to controller.yaml (for infra backup) logger *log.Logger + debug bool mu sync.Mutex running map[string]bool // per-app running state } // NewCrossDriveRunner creates a new CrossDriveRunner. -func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger) *CrossDriveRunner { +func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger, debug bool) *CrossDriveRunner { return &CrossDriveRunner{ sett: sett, stackProvider: provider, @@ -42,6 +44,7 @@ func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, sy stacksDir: stacksDir, controllerYAMLPath: "/opt/docker/felhom-controller/controller.yaml", logger: logger, + debug: debug, running: make(map[string]bool), } } @@ -67,6 +70,11 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e return fmt.Errorf("cross-drive backup not configured or disabled for %s", stackName) } + if r.debug { + r.logger.Printf("[DEBUG] RunAppBackup: starting for %s, dest=%s, schedule=%s, method=%s", + stackName, cfg.DestinationPath, cfg.Schedule, cfg.Method) + } + // Prevent concurrent runs for the same app r.mu.Lock() if r.running[stackName] { @@ -84,12 +92,18 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e // Check if source or destination drive is disconnected srcDrive := r.stackProvider.GetStackHDDPath(stackName) if srcDrive != "" && r.sett.IsDisconnected(srcDrive) { + if r.debug { + r.logger.Printf("[DEBUG] RunAppBackup: source drive disconnected for %s: %s", stackName, srcDrive) + } r.mu.Lock() r.running[stackName] = false r.mu.Unlock() return fmt.Errorf("source drive disconnected: %s", srcDrive) } if r.sett.IsDisconnected(cfg.DestinationPath) { + if r.debug { + r.logger.Printf("[DEBUG] RunAppBackup: destination drive disconnected for %s: %s", stackName, cfg.DestinationPath) + } r.mu.Lock() r.running[stackName] = false r.mu.Unlock() @@ -107,6 +121,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e // Trigger fresh DB dump for this app before cross-drive backup if r.dbDumper != nil { + if r.debug { + r.logger.Printf("[DEBUG] RunAppBackup: triggering pre-backup DB dump for %s", stackName) + } if err := r.dbDumper.DumpStackDB(ctx, stackName); err != nil { r.logger.Printf("[WARN] Pre-backup DB dump failed for %s: %v — proceeding with user data backup", stackName, err) // Non-fatal: user data backup is still valuable without fresh dump @@ -120,6 +137,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e // Resolve HDD mounts for this app (may be empty for config-only apps) mounts := r.stackProvider.GetStackHDDMounts(stackName) + if r.debug { + r.logger.Printf("[DEBUG] RunAppBackup: %s has %d HDD mount(s): %v", stackName, len(mounts), mounts) + } // Safety: destination must not overlap with any source for _, m := range mounts { @@ -145,6 +165,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e destDir := AppSecondaryRsyncPath(cfg.DestinationPath, stackName) if sz, err := dirSizeBytes(destDir); err == nil { sizeHuman = humanizeBytes(sz) + if r.debug { + r.logger.Printf("[DEBUG] RunAppBackup: %s backup size at destination: %s", stackName, sizeHuman) + } } r.logger.Printf("[INFO] Cross-drive backup completed: %s (%s)", stackName, duration.Round(time.Second)) @@ -155,6 +178,10 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e // RunAllScheduled runs cross-drive backups for all apps matching the schedule. // Runs sequentially (disk I/O bound). func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) error { + if r.debug { + r.logger.Printf("[DEBUG] RunAllScheduled: starting for schedule=%s", schedule) + } + // Auto-enable Tier 2 for small apps (no HDD mounts) before running backups r.AutoEnableSmallApps() @@ -163,18 +190,39 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) configs := r.sett.GetAllCrossDriveConfigs() if len(configs) == 0 { + if r.debug { + r.logger.Printf("[DEBUG] RunAllScheduled: no cross-drive configs found") + } return nil } + if r.debug { + r.logger.Printf("[DEBUG] RunAllScheduled: %d total cross-drive config(s) found", len(configs)) + } + var errs []string + var scheduled, skippedDisabled, skippedWrongSchedule int for stackName, cfg := range configs { if !cfg.Enabled { + if r.debug { + r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — disabled", stackName) + } + skippedDisabled++ continue } if cfg.Schedule != schedule { + if r.debug { + r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — schedule mismatch (has=%s, want=%s)", stackName, cfg.Schedule, schedule) + } + skippedWrongSchedule++ continue } + if r.debug { + r.logger.Printf("[DEBUG] RunAllScheduled: queuing %s for backup (dest=%s)", stackName, cfg.DestinationPath) + } + scheduled++ + select { case <-ctx.Done(): return ctx.Err() @@ -186,6 +234,11 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) } } + if r.debug { + r.logger.Printf("[DEBUG] RunAllScheduled: done — %d scheduled, %d disabled, %d wrong schedule, %d errors", + scheduled, skippedDisabled, skippedWrongSchedule, len(errs)) + } + if len(errs) > 0 { return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; ")) } @@ -216,6 +269,9 @@ func (r *CrossDriveRunner) AnyRunning() bool { // (≥10 GB free, <90% used) to protect OS stability; external drives just need // ≥100 MB. Non-mount-point destinations are allowed with a logged warning. func (r *CrossDriveRunner) ValidateDestination(path string) error { + if r.debug { + r.logger.Printf("[DEBUG] ValidateDestination: checking path=%s", path) + } if path == "" { return fmt.Errorf("destination path is empty") } @@ -226,6 +282,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error { return fmt.Errorf("destination %s does not exist", path) } onSystemDrive := !system.IsMountPoint(path) + if r.debug { + r.logger.Printf("[DEBUG] ValidateDestination: path=%s, isMountPoint=%v", path, !onSystemDrive) + } if onSystemDrive { r.logger.Printf("[WARN] Destination %s is not a separate mount point (system drive) — backup will proceed but data is not protected against drive failure", path) } @@ -237,6 +296,10 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error { r.logger.Printf("[WARN] Cannot determine disk usage for %s — proceeding without space verification", path) return nil } + if r.debug { + r.logger.Printf("[DEBUG] ValidateDestination: path=%s, availGB=%.1f, usedPct=%.0f%%, onSystemDrive=%v", + path, di.AvailGB, di.UsedPercent, onSystemDrive) + } if onSystemDrive { // System drive: protect OS stability — require ≥10 GB free and <90% used if di.AvailGB < 10 { @@ -251,6 +314,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error { return fmt.Errorf("destination %s has insufficient free space (%.1f GB free)", path, di.AvailGB) } } + if r.debug { + r.logger.Printf("[DEBUG] ValidateDestination: path=%s passed all checks", path) + } return nil } @@ -258,6 +324,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error { func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBase string, mounts []string) error { destDir := AppSecondaryRsyncPath(destBase, stackName) + if r.debug { + r.logger.Printf("[DEBUG] runRsyncBackup: stack=%s, destBase=%s, destDir=%s, %d mount(s)", stackName, destBase, destDir, len(mounts)) + } if err := os.MkdirAll(destDir, 0755); err != nil { return fmt.Errorf("creating rsync dest dir: %w", err) } @@ -296,9 +365,16 @@ func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBa "--exclude", "backups/*.dump", src, dst) r.logger.Printf("[DEBUG] rsync: %s → %s", src, dst) - if out, err := cmd.CombinedOutput(); err != nil { + out, err := cmd.CombinedOutput() + if err != nil { + if r.debug { + r.logger.Printf("[DEBUG] runRsyncBackup: rsync failed for %s: %s", srcMount, util.TruncateStr(strings.TrimSpace(string(out)), 500)) + } return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out))) } + if r.debug { + r.logger.Printf("[DEBUG] runRsyncBackup: rsync OK for mount %s → %s", src, dst) + } } // --- Copy DB dumps for this stack from its home drive --- @@ -423,20 +499,35 @@ func (r *CrossDriveRunner) syncInfraConfig(ctx context.Context) { func (r *CrossDriveRunner) AutoEnableSmallApps() { storagePaths := r.sett.GetStoragePaths() if len(storagePaths) < 2 { + if r.debug { + r.logger.Printf("[DEBUG] AutoEnableSmallApps: fewer than 2 storage paths (%d) — skipping", len(storagePaths)) + } return // no secondary drive available } deployed := r.stackProvider.ListDeployedStacks() existingConfigs := r.sett.GetAllCrossDriveConfigs() + if r.debug { + r.logger.Printf("[DEBUG] AutoEnableSmallApps: %d deployed stacks, %d existing configs, %d storage paths", + len(deployed), len(existingConfigs), len(storagePaths)) + } + + var autoEnabled int for _, stack := range deployed { // Skip if already has cross-drive config (user has touched it) if _, exists := existingConfigs[stack.Name]; exists { + if r.debug { + r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — already has cross-drive config", stack.Name) + } continue } // Skip if app has HDD mounts (large user data — needs manual config) if mounts := r.stackProvider.GetStackHDDMounts(stack.Name); len(mounts) > 0 { + if r.debug { + r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — has %d HDD mount(s)", stack.Name, len(mounts)) + } continue } @@ -450,6 +541,9 @@ func (r *CrossDriveRunner) AutoEnableSmallApps() { } } if destPath == "" { + if r.debug { + r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — no suitable destination found", stack.Name) + } continue // no suitable destination found } @@ -464,8 +558,13 @@ func (r *CrossDriveRunner) AutoEnableSmallApps() { r.logger.Printf("[WARN] Auto-enable Tier 2 failed for %s: %v", stack.Name, err) continue } + autoEnabled++ r.logger.Printf("[INFO] Auto-enabled Tier 2 backup for %s → %s (no HDD mounts, daily rsync)", stack.Name, destPath) } + + if r.debug && autoEnabled > 0 { + r.logger.Printf("[DEBUG] AutoEnableSmallApps: auto-enabled %d app(s)", autoEnabled) + } } // --- helpers --- diff --git a/controller/internal/backup/dbdump.go b/controller/internal/backup/dbdump.go index 643c931..f5b4292 100644 --- a/controller/internal/backup/dbdump.go +++ b/controller/internal/backup/dbdump.go @@ -11,6 +11,8 @@ import ( "path/filepath" "strings" "time" + + "gitea.dooplex.hu/admin/felhom-controller/internal/util" ) // DBType represents a database engine type. @@ -61,14 +63,22 @@ type DumpFileInfo struct { } // DiscoverDatabases finds running database containers via docker ps. -func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB, error) { +func DiscoverDatabases(ctx context.Context, logger *log.Logger, debug bool) ([]DiscoveredDB, error) { + if debug { + logger.Printf("[DEBUG] DiscoverDatabases: running docker ps to find database containers") + } cmd := exec.CommandContext(ctx, "docker", "ps", "--format", "{{.ID}}\t{{.Names}}\t{{.Image}}", "--filter", "status=running") out, err := cmd.Output() if err != nil { return nil, fmt.Errorf("docker ps failed: %w", err) } + if debug { + logger.Printf("[DEBUG] DiscoverDatabases: docker ps output: %s", util.TruncateStr(strings.TrimSpace(string(out)), 500)) + } + var dbs []DiscoveredDB + var skipped int for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { if line == "" { @@ -87,9 +97,17 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB, } else if strings.Contains(image, "mariadb") || strings.Contains(image, "mysql") { dbType = DBTypeMariaDB } else { + if debug { + logger.Printf("[DEBUG] DiscoverDatabases: skipping container %s (image=%s, not a database)", name, image) + } + skipped++ continue } + if debug { + logger.Printf("[DEBUG] DiscoverDatabases: found %s container: %s (id=%s)", dbType, name, id[:12]) + } + db := DiscoveredDB{ ContainerID: id, ContainerName: name, @@ -100,33 +118,49 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB, // Get env vars from container if err := populateDBEnv(ctx, &db); err != nil { logger.Printf("[WARN] Could not read env vars for %s: %v", name, err) + if debug { + logger.Printf("[DEBUG] DiscoverDatabases: skipping %s — env read failed", name) + } continue } + if debug { + logger.Printf("[DEBUG] DiscoverDatabases: %s → stack=%s, dbUser=%s, dbName=%s", name, db.StackName, db.DBUser, db.DBName) + } + dbs = append(dbs, db) } + if debug { + logger.Printf("[DEBUG] DiscoverDatabases: found %d database(s), skipped %d non-DB container(s)", len(dbs), skipped) + } + return dbs, nil } // DumpAll dumps all discovered databases. -func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger) []DumpResult { +func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) []DumpResult { // Clean up old .tmp files (older than 1 hour) cleanupTmpFiles(dumpDir, logger) var results []DumpResult for _, db := range dbs { - result := DumpOne(ctx, db, dumpDir, logger) + result := DumpOne(ctx, db, dumpDir, logger, debug) results = append(results, result) } return results } // DumpOne dumps a single database. -func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger) DumpResult { +func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) DumpResult { start := time.Now() result := DumpResult{DB: db} + if debug { + logger.Printf("[DEBUG] DumpOne: starting dump for container=%s, stack=%s, dbType=%s, dumpDir=%s", + db.ContainerName, db.StackName, db.DBType, dumpDir) + } + // Ensure dump directory exists if err := os.MkdirAll(dumpDir, 0755); err != nil { result.Error = fmt.Errorf("creating dump dir: %w", err) @@ -148,6 +182,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L if err != nil || strings.TrimSpace(string(checkOut)) != "true" { result.Error = fmt.Errorf("container %s no longer running", db.ContainerName) result.Duration = time.Since(start) + if debug { + logger.Printf("[DEBUG] DumpOne: container %s is no longer running — skipping", db.ContainerName) + } return result } @@ -158,14 +195,29 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID, "pg_dump", "-U", db.DBUser, "-d", db.DBName, "--clean", "--if-exists", "--no-owner", "--no-privileges") + if debug { + logger.Printf("[DEBUG] DumpOne: pg_dump command: docker exec %s pg_dump -U %s -d %s --clean --if-exists --no-owner --no-privileges", + db.ContainerID[:12], db.DBUser, db.DBName) + } case DBTypeMariaDB: // Get root password from container env password := getMariaDBPassword(dumpCtx, db.ContainerID) if password == "" { result.Error = fmt.Errorf("could not determine MariaDB root password for %s", db.ContainerName) result.Duration = time.Since(start) + if debug { + logger.Printf("[DEBUG] DumpOne: MariaDB root password not found for %s — skipping", db.ContainerName) + } return result } + cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID, + "mariadb-dump", "-u", "root", "-p***", + "--single-transaction", "--routines", "--triggers", db.DBName) + if debug { + logger.Printf("[DEBUG] DumpOne: mariadb-dump command: docker exec %s mariadb-dump -u root -p*** --single-transaction --routines --triggers %s", + db.ContainerID[:12], db.DBName) + } + // Actual command with real password (not logged) cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID, "mariadb-dump", "-u", "root", "-p"+password, "--single-transaction", "--routines", "--triggers", db.DBName) @@ -198,6 +250,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L } result.Error = fmt.Errorf("dump failed: %v — %s", err, errMsg) result.Duration = time.Since(start) + if debug { + logger.Printf("[DEBUG] DumpOne: dump command failed for %s: %v", db.ContainerName, result.Error) + } return result } @@ -207,6 +262,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L os.Remove(tmpPath) result.Error = fmt.Errorf("dump produced empty file for %s", db.ContainerName) result.Duration = time.Since(start) + if debug { + logger.Printf("[DEBUG] DumpOne: dump produced empty file for %s", db.ContainerName) + } return result } @@ -225,6 +283,12 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L // Run validation on the dump file result.Validation = ValidateDump(finalPath, db.DBType) + if debug { + logger.Printf("[DEBUG] DumpOne: completed %s → %s (size=%s, valid=%v, tables=%d, duration=%s)", + db.ContainerName, filename, humanizeBytes(stat.Size()), + result.Validation.Valid, result.Validation.TableCount, result.Duration.Round(time.Millisecond)) + } + logger.Printf("[INFO] DB dump: %s → %s (%s, %s, %d tables)", db.ContainerName, filename, humanizeBytes(stat.Size()), result.Duration.Round(time.Millisecond), result.Validation.TableCount) diff --git a/controller/internal/backup/local_infra.go b/controller/internal/backup/local_infra.go index e75b067..36e7e46 100644 --- a/controller/internal/backup/local_infra.go +++ b/controller/internal/backup/local_infra.go @@ -24,12 +24,16 @@ type InfraMetadata struct { // WriteLocalInfraBackup writes the infra backup to .felhom-infra-backup/ on each drive. // Individual drive failures are logged but not returned — the function is best-effort. -func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, timestamp string, drives []string, logger *log.Logger) { +func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, timestamp string, drives []string, logger *log.Logger, debug bool) { if len(drives) == 0 { logger.Printf("[DEBUG] No drives configured for local infra backup") return } + if debug { + logger.Printf("[DEBUG] WriteLocalInfraBackup: payload size=%d bytes, %d target drive(s): %v", len(backupJSON), len(drives), drives) + } + // Compute checksum of backup data hash := sha256.Sum256(backupJSON) checksum := hex.EncodeToString(hash[:]) @@ -51,10 +55,16 @@ func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, tim written := 0 for _, drive := range drives { dir := InfraBackupDir(drive) + if debug { + logger.Printf("[DEBUG] WriteLocalInfraBackup: writing to drive=%s, dir=%s", drive, dir) + } if err := writeInfraToDir(dir, backupJSON, metaJSON); err != nil { logger.Printf("[WARN] Local infra backup: failed to write to %s: %v", drive, err) continue } + if debug { + logger.Printf("[DEBUG] WriteLocalInfraBackup: write OK to %s", drive) + } written++ } diff --git a/controller/internal/backup/local_infra_test.go b/controller/internal/backup/local_infra_test.go index cb3d68d..d10e375 100644 --- a/controller/internal/backup/local_infra_test.go +++ b/controller/internal/backup/local_infra_test.go @@ -20,7 +20,7 @@ func TestWriteAndReadLocalInfraBackup(t *testing.T) { backupJSON := []byte(`{"customer_id":"test-123","domain":"test.hu","controller_version":"v0.21.0","timestamp":"2026-02-21T10:00:00Z"}`) logger := testLogger(t) - WriteLocalInfraBackup(backupJSON, "test-123", "v0.21.0", "2026-02-21T10:00:00Z", []string{drive}, logger) + WriteLocalInfraBackup(backupJSON, "test-123", "v0.21.0", "2026-02-21T10:00:00Z", []string{drive}, logger, false) // Verify files exist dir := InfraBackupDir(drive) @@ -124,7 +124,7 @@ func TestWriteLocalInfraBackup_MultipleDrives(t *testing.T) { backupJSON := []byte(`{"test":"multi"}`) logger := testLogger(t) - WriteLocalInfraBackup(backupJSON, "multi-test", "v1.0", "2026-01-01T00:00:00Z", drives, logger) + WriteLocalInfraBackup(backupJSON, "multi-test", "v1.0", "2026-01-01T00:00:00Z", drives, logger, false) // All 3 should succeed for _, d := range drives { @@ -142,7 +142,7 @@ func TestWriteLocalInfraBackup_MultipleDrives(t *testing.T) { func TestWriteLocalInfraBackup_NoDrives(t *testing.T) { logger := testLogger(t) // Should not panic - WriteLocalInfraBackup([]byte(`{}`), "test", "v1.0", "2026-01-01T00:00:00Z", nil, logger) + WriteLocalInfraBackup([]byte(`{}`), "test", "v1.0", "2026-01-01T00:00:00Z", nil, logger, false) } func contains(s, substr string) bool { diff --git a/controller/internal/backup/restore.go b/controller/internal/backup/restore.go index 8fd2496..257c827 100644 --- a/controller/internal/backup/restore.go +++ b/controller/internal/backup/restore.go @@ -21,6 +21,10 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error { return fmt.Errorf("invalid snapshot ID: must be 8-64 lowercase hex characters") } + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: stack=%s, snapshotID=%s", stackName, snapshotID) + } + // Prevent concurrent operations m.mu.Lock() if m.running { @@ -39,6 +43,10 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error { hddMounts := m.stackProvider.GetStackHDDMounts(stackName) hasHDD := len(hddMounts) > 0 + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: %s has %d HDD mount(s), hasHDD=%v", stackName, len(hddMounts), hasHDD) + } + // Build list of paths to restore from the snapshot var restorePaths []string @@ -47,16 +55,25 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error { if ok { stackDir := filepath.Dir(composePath) restorePaths = append(restorePaths, stackDir) + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: will restore config dir: %s", stackDir) + } } // Restore DB dump files for this stack (per-drive path) drivePath := m.GetAppDrivePath(stackName) dumpDir := AppDBDumpPath(drivePath, stackName) restorePaths = append(restorePaths, dumpDir) + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: will restore DB dump dir: %s", dumpDir) + } // Restore HDD data (always included for apps that have it — backup is mandatory) if hasHDD { restorePaths = append(restorePaths, hddMounts...) + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: will restore HDD data: %v", hddMounts) + } } if len(restorePaths) == 0 { @@ -66,17 +83,30 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error { // Use the app's primary restic repo repoPath := PrimaryResticRepoPath(drivePath) + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: using repo=%s, %d restore path(s)", repoPath, len(restorePaths)) + } + m.logger.Printf("[WARN] RESTORE starting: stack=%s, snapshot=%s, repo=%s, paths=%v, hasHDD=%v", stackName, snapshotID, repoPath, restorePaths, hasHDD) // Stop the app before restore + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: step 1/4 — stopping app %s", stackName) + } if err := m.stackProvider.StopStack(stackName); err != nil { m.logger.Printf("[WARN] RESTORE could not stop %s: %v (proceeding anyway)", stackName, err) } // Execute restore via restic + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: step 2/4 — restoring data from snapshot %s", snapshotID) + } if err := m.restic.RestoreAppData(repoPath, snapshotID, restorePaths); err != nil { m.logger.Printf("[ERROR] RESTORE failed for %s: %v", stackName, err) + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: step 3/4 — restarting app %s after failure", stackName) + } if startErr := m.stackProvider.StartStack(stackName); startErr != nil { m.logger.Printf("[WARN] RESTORE could not restart %s after failure: %v", stackName, startErr) } @@ -84,6 +114,9 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error { } // Restart the app + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: step 3/4 — restarting app %s after successful restore", stackName) + } if err := m.stackProvider.StartStack(stackName); err != nil { m.logger.Printf("[WARN] RESTORE could not restart %s after restore: %v", stackName, err) } @@ -92,6 +125,9 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error { if hasHDD { restoreType = "full (config+DB+userdata)" } + if m.isDebug() { + m.logger.Printf("[DEBUG] RestoreApp: step 4/4 — restore completed, type=%s", restoreType) + } m.logger.Printf("[INFO] RESTORE completed: stack=%s, snapshot=%s, type=%s", stackName, snapshotID, restoreType) return nil } diff --git a/controller/internal/monitor/healthcheck.go b/controller/internal/monitor/healthcheck.go index 41430f0..a843eaa 100644 --- a/controller/internal/monitor/healthcheck.go +++ b/controller/internal/monitor/healthcheck.go @@ -2,6 +2,7 @@ package monitor import ( "fmt" + "log" "os" "os/exec" "strings" @@ -22,26 +23,44 @@ type HealthReport struct { } // RunHealthCheck runs system checks and returns a diagnostic report. -func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, storagePaths []settings.StoragePath) *HealthReport { +func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, storagePaths []settings.StoragePath, logger *log.Logger) *HealthReport { report := &HealthReport{ Status: "ok", Timestamp: time.Now(), } + debug := cfg.Logging.Level == "debug" && logger != nil + hddPath := cfg.Paths.HDDPath if len(storagePaths) > 0 { hddPath = storagePaths[0].Path } sysInfo := system.GetInfo(hddPath, cpuCollector) + if debug { + logger.Printf("[DEBUG] [HEALTH] Raw values: disk=%.1f%%, hdd=%.1f%% (configured=%v), mem=%.1f%% (%dMB/%dMB), cpu=%.1f%%, temp=%.1f°C (%s)", + sysInfo.DiskPercent, sysInfo.HDDPercent, sysInfo.HDDConfigured, + sysInfo.MemPercent, sysInfo.UsedMemMB, sysInfo.TotalMemMB, + sysInfo.CPUPercent, sysInfo.TemperatureCelsius, sysInfo.TemperatureSource) + } + // 1. Disk usage (SSD) if sysInfo.DiskPercent > 0 { if sysInfo.DiskPercent >= float64(cfg.Monitoring.Thresholds.DiskCritPercent) { report.Issues = append(report.Issues, fmt.Sprintf("SSD disk usage critical: %.0f%%", sysInfo.DiskPercent)) + if debug { + logger.Printf("[DEBUG] [HEALTH] SSD disk: CRITICAL (%.0f%% >= %d%%)", sysInfo.DiskPercent, cfg.Monitoring.Thresholds.DiskCritPercent) + } } else if sysInfo.DiskPercent >= float64(cfg.Monitoring.Thresholds.DiskWarnPercent) { report.Warnings = append(report.Warnings, fmt.Sprintf("SSD disk usage high: %.0f%%", sysInfo.DiskPercent)) + if debug { + logger.Printf("[DEBUG] [HEALTH] SSD disk: WARN (%.0f%% >= %d%%)", sysInfo.DiskPercent, cfg.Monitoring.Thresholds.DiskWarnPercent) + } } else { report.Info = append(report.Info, fmt.Sprintf("SSD: %.0f%% used", sysInfo.DiskPercent)) + if debug { + logger.Printf("[DEBUG] [HEALTH] SSD disk: OK (%.0f%%)", sysInfo.DiskPercent) + } } } @@ -58,8 +77,14 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora if sysInfo.MemPercent > 0 { if sysInfo.MemPercent >= float64(cfg.Monitoring.Thresholds.MemoryWarnPercent) { report.Warnings = append(report.Warnings, fmt.Sprintf("Memory usage high: %.0f%%", sysInfo.MemPercent)) + if debug { + logger.Printf("[DEBUG] [HEALTH] Memory: WARN (%.0f%% >= %d%%)", sysInfo.MemPercent, cfg.Monitoring.Thresholds.MemoryWarnPercent) + } } else { report.Info = append(report.Info, fmt.Sprintf("Memory: %.0f%% used", sysInfo.MemPercent)) + if debug { + logger.Printf("[DEBUG] [HEALTH] Memory: OK (%.0f%%)", sysInfo.MemPercent) + } } } @@ -67,8 +92,14 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora if sysInfo.CPUPercent > 0 { if sysInfo.CPUPercent >= float64(cfg.Monitoring.Thresholds.CPUWarnPercent) { report.Warnings = append(report.Warnings, fmt.Sprintf("CPU usage high: %.0f%%", sysInfo.CPUPercent)) + if debug { + logger.Printf("[DEBUG] [HEALTH] CPU: WARN (%.0f%% >= %d%%)", sysInfo.CPUPercent, cfg.Monitoring.Thresholds.CPUWarnPercent) + } } else { report.Info = append(report.Info, fmt.Sprintf("CPU: %.0f%%", sysInfo.CPUPercent)) + if debug { + logger.Printf("[DEBUG] [HEALTH] CPU: OK (%.0f%%)", sysInfo.CPUPercent) + } } } @@ -76,23 +107,45 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora if sysInfo.TemperatureCelsius > 0 { if sysInfo.TemperatureCelsius >= float64(cfg.Monitoring.Thresholds.TemperatureWarnCelsius) { report.Warnings = append(report.Warnings, fmt.Sprintf("Temperature high: %.0f°C (%s)", sysInfo.TemperatureCelsius, sysInfo.TemperatureSource)) + if debug { + logger.Printf("[DEBUG] [HEALTH] Temperature: WARN (%.0f°C >= %d°C)", sysInfo.TemperatureCelsius, cfg.Monitoring.Thresholds.TemperatureWarnCelsius) + } } else { report.Info = append(report.Info, fmt.Sprintf("Temperature: %.0f°C", sysInfo.TemperatureCelsius)) + if debug { + logger.Printf("[DEBUG] [HEALTH] Temperature: OK (%.0f°C)", sysInfo.TemperatureCelsius) + } } } // 5. Docker health if err := checkDocker(); err != nil { report.Issues = append(report.Issues, fmt.Sprintf("Docker: %v", err)) + if debug { + logger.Printf("[DEBUG] [HEALTH] Docker daemon: FAIL (%v)", err) + } } else { report.Info = append(report.Info, "Docker: reachable") + if debug { + logger.Printf("[DEBUG] [HEALTH] Docker daemon: OK") + } } // 6. Protected containers + if debug { + logger.Printf("[DEBUG] [HEALTH] Checking %d protected containers: %v", len(cfg.Stacks.Protected), cfg.Stacks.Protected) + } missingProtected := checkProtectedContainers(cfg.Stacks.Protected) for _, name := range missingProtected { report.Issues = append(report.Issues, fmt.Sprintf("Protected container not running: %s", name)) } + if debug { + if len(missingProtected) > 0 { + logger.Printf("[DEBUG] [HEALTH] Protected containers missing: %v", missingProtected) + } else { + logger.Printf("[DEBUG] [HEALTH] All protected containers running") + } + } // 7. Storage paths storageIssues, storageWarnings := checkStoragePaths(storagePaths) @@ -106,6 +159,11 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora report.Status = "warn" } + if debug { + logger.Printf("[DEBUG] [HEALTH] Final status: %s (issues=%d, warnings=%d, info=%d)", + report.Status, len(report.Issues), len(report.Warnings), len(report.Info)) + } + return report } diff --git a/controller/internal/monitor/watchdog.go b/controller/internal/monitor/watchdog.go index 4c22c28..6cd3c18 100644 --- a/controller/internal/monitor/watchdog.go +++ b/controller/internal/monitor/watchdog.go @@ -58,6 +58,11 @@ type pathProbeState struct { lastStatus string // "connected", "disconnected" lastProbeTime time.Time probeInterval time.Duration + // Debug counters for summary logging + probeCount int + probeOKCount int + lastSummaryTime time.Time + totalLatency time.Duration } // StorageWatchdog monitors registered storage paths and reacts to disconnection/reconnection. @@ -95,6 +100,9 @@ func NewStorageWatchdog( } } +// isDebug returns true if the logging level is set to "debug". +func (w *StorageWatchdog) isDebug() bool { return w.cfg.Logging.Level == "debug" } + // SetAlertRefresh sets the callback to trigger alert refresh. func (w *StorageWatchdog) SetAlertRefresh(fn func()) { w.alertRefresh = fn @@ -164,17 +172,45 @@ func (w *StorageWatchdog) getOrCreateState(path string) *pathProbeState { // handleConnectedProbe probes a connected drive and triggers disconnect if needed. func (w *StorageWatchdog) handleConnectedProbe(sp settings.StoragePath, state *pathProbeState) { + probeStart := time.Now() result := system.ProbeStoragePath(sp.Path) + probeLatency := time.Since(probeStart) + + if w.isDebug() { + state.probeCount++ + state.totalLatency += probeLatency + } + if result.Status == system.ProbeConnected { if state.consecutiveFailures > 0 { w.logger.Printf("[DEBUG] [STORAGE] Probe recovered for %s after %d failures", sp.Path, state.consecutiveFailures) } state.consecutiveFailures = 0 state.lastStatus = "connected" + if w.isDebug() { + state.probeOKCount++ + // Every 60 probes (~5 minutes at 5s interval): emit summary + if state.probeCount >= 60 { + avgLatency := state.totalLatency / time.Duration(state.probeCount) + w.logger.Printf("[DEBUG] [STORAGE] Storage watchdog: %s — %d/%d probes OK (last 5m, avg %dms)", + sp.Path, state.probeOKCount, state.probeCount, avgLatency.Milliseconds()) + state.probeCount = 0 + state.probeOKCount = 0 + state.totalLatency = 0 + state.lastSummaryTime = time.Now() + } + } return } state.consecutiveFailures++ + + // Debug: log immediately on unexpected failure (was connected, now failing) + if w.isDebug() && state.lastStatus == "connected" { + w.logger.Printf("[DEBUG] [STORAGE] Storage probe failed for %s (%d/%d before disconnect): %v", + sp.Path, state.consecutiveFailures, probeThreshold, result.Err) + } + w.logger.Printf("[WARN] [STORAGE] Probe failed for %s (%d/%d): %v", sp.Path, state.consecutiveFailures, probeThreshold, result.Err) @@ -239,6 +275,11 @@ func (w *StorageWatchdog) handleReconnectCheck(ctx context.Context, sp settings. return } + if w.isDebug() { + w.logger.Printf("[DEBUG] [STORAGE] Reconnect check for %s: UUID=%s, mountPath=%s, isAttachWizard=%v", + sp.Path, uuid, mountPath, isAttachWizard) + } + // Check if the UUID block device is present uuidPath := filepath.Join(hostDevUUIDPath, uuid) if _, err := os.Stat(uuidPath); err != nil { @@ -251,6 +292,11 @@ func (w *StorageWatchdog) handleReconnectCheck(ctx context.Context, sp settings. } w.logger.Printf("[INFO] [STORAGE] Drive reconnected (UUID found), attempting remount: %s (%s)", sp.Path, label) + if w.isDebug() { + w.logger.Printf("[DEBUG] [STORAGE] UUID %s found at %s, mounting %s (raw=%s, attachWizard=%v)", + uuid, uuidPath, sp.Path, rawPath, isAttachWizard) + } + // Attempt remount if err := w.remount(sp.Path, rawPath, isAttachWizard); err != nil { w.logger.Printf("[ERROR] [STORAGE] Remount failed for %s: %v", sp.Path, err) @@ -261,9 +307,17 @@ func (w *StorageWatchdog) handleReconnectCheck(ctx context.Context, sp settings. verifyResult := system.ProbeStoragePath(sp.Path) if verifyResult.Status != system.ProbeConnected { w.logger.Printf("[ERROR] [STORAGE] Post-remount probe failed for %s: %v", sp.Path, verifyResult.Err) + if w.isDebug() { + w.logger.Printf("[DEBUG] [STORAGE] Post-mount verification failed for %s: status=%v, err=%v", + sp.Path, verifyResult.Status, verifyResult.Err) + } return } + if w.isDebug() { + w.logger.Printf("[DEBUG] [STORAGE] Post-mount verification succeeded for %s", sp.Path) + } + w.logger.Printf("[INFO] [STORAGE] Drive successfully remounted: %s (%s)", sp.Path, label) // Clean stale restic locks diff --git a/controller/internal/notify/notifier.go b/controller/internal/notify/notifier.go index 79aa54e..9d068a9 100644 --- a/controller/internal/notify/notifier.go +++ b/controller/internal/notify/notifier.go @@ -23,6 +23,7 @@ type Notifier struct { httpClient *http.Client logger *log.Logger enabled bool + debug bool settings *settings.Settings mu sync.Mutex @@ -30,7 +31,7 @@ type Notifier struct { } // New creates a new Notifier. Returns a no-op notifier if hub is not enabled. -func New(hubURL, apiKey, customerID string, sett *settings.Settings, logger *log.Logger) *Notifier { +func New(hubURL, apiKey, customerID string, sett *settings.Settings, logger *log.Logger, debug bool) *Notifier { enabled := hubURL != "" && apiKey != "" if enabled { logger.Printf("[INFO] Notifier enabled (hub: %s)", hubURL) @@ -45,6 +46,7 @@ func New(hubURL, apiKey, customerID string, sett *settings.Settings, logger *log httpClient: &http.Client{Timeout: 10 * time.Second}, logger: logger, enabled: enabled, + debug: debug, settings: sett, } } @@ -162,6 +164,9 @@ func (n *Notifier) PushEvent(eventType, severity, message string, details interf go func() { url := n.hubURL + "/api/v1/event" + if n.debug { + n.logger.Printf("[DEBUG] PushEvent: type=%s severity=%s url=%s", eventType, severity, url) + } var lastErr error for attempt := 0; attempt < 3; attempt++ { if attempt > 0 { @@ -185,6 +190,9 @@ func (n *Notifier) PushEvent(eventType, severity, message string, details interf resp.Body.Close() if resp.StatusCode >= 200 && resp.StatusCode < 300 { + if n.debug { + n.logger.Printf("[DEBUG] PushEvent: %s pushed OK (HTTP %d)", eventType, resp.StatusCode) + } n.logger.Printf("[INFO] Event pushed: %s (%s) — %s", eventType, severity, message) return } @@ -284,9 +292,10 @@ func (n *Notifier) NotifyControllerUpdated(fromVer, toVer string, success bool) } // NotifyControllerStarted sends a controller startup event. -func (n *Notifier) NotifyControllerStarted(version string) { +// details may include self-test summary (e.g., {"selftest_pass": 8, "selftest_warn": 1, "selftest_fail": 0}). +func (n *Notifier) NotifyControllerStarted(version string, details map[string]interface{}) { n.PushEvent("controller_started", "info", - fmt.Sprintf("Controller elindult (%s)", version), nil) + fmt.Sprintf("Controller elindult (%s)", version), details) } // NotifyStorageDisconnected sends a drive disconnection event. @@ -375,6 +384,10 @@ func (n *Notifier) SyncPreferences(email string, enabledEvents []string, cooldow } url := n.hubURL + "/api/v1/preferences" + if n.debug { + n.logger.Printf("[DEBUG] SyncPreferences: url=%s email=%s events=%v cooldown=%dh", + url, email, enabledEvents, cooldownHours) + } req, err := http.NewRequest("POST", url, bytes.NewReader(jsonData)) if err != nil { return fmt.Errorf("request: %w", err) @@ -393,6 +406,9 @@ func (n *Notifier) SyncPreferences(email string, enabledEvents []string, cooldow return fmt.Errorf("hub hiba (%d): %s", resp.StatusCode, string(body)) } + if n.debug { + n.logger.Printf("[DEBUG] SyncPreferences: response HTTP %d", resp.StatusCode) + } n.logger.Printf("[INFO] Notification preferences synced to hub: email=%s, events=%v, cooldown=%dh", email, enabledEvents, cooldownHours) return nil } diff --git a/controller/internal/report/builder.go b/controller/internal/report/builder.go index 3e211b3..53f235e 100644 --- a/controller/internal/report/builder.go +++ b/controller/internal/report/builder.go @@ -4,6 +4,7 @@ import ( "crypto/sha256" "encoding/hex" "fmt" + "log" "os" "strconv" "strings" @@ -29,7 +30,13 @@ func BuildReport( metricsStore *metrics.MetricsStore, version string, storagePaths []settings.StoragePath, + logger *log.Logger, ) *Report { + debug := cfg.Logging.Level == "debug" + if debug && logger != nil { + logger.Printf("[DEBUG] BuildReport: starting — version=%s, storagePaths=%d", version, len(storagePaths)) + } + r := &Report{ Version: 1, CustomerID: cfg.Customer.ID, @@ -48,6 +55,9 @@ func BuildReport( if data, err := os.ReadFile(configPath); err == nil { h := sha256.Sum256(data) r.ConfigHash = hex.EncodeToString(h[:]) + if debug && logger != nil { + logger.Printf("[DEBUG] BuildReport: configHash=%s (%d bytes)", r.ConfigHash[:12]+"...", len(data)) + } } } @@ -111,6 +121,12 @@ func BuildReport( }) } + if debug && logger != nil { + logger.Printf("[DEBUG] BuildReport: system info collected — cpu=%.1f%%, mem=%d/%dMB, temp=%.1fC", + sysInfo.CPUPercent, sysInfo.UsedMemMB, sysInfo.TotalMemMB, sysInfo.TemperatureCelsius) + logger.Printf("[DEBUG] BuildReport: storage entries=%d", len(r.Storage)) + } + // Containers r.Containers = buildContainerReport(stackMgr, metricsStore) @@ -118,7 +134,7 @@ func BuildReport( r.Backup = buildBackupReport(cfg, backupMgr) // Health - healthReport := monitor.RunHealthCheck(cfg, cpuCollector, storagePaths) + healthReport := monitor.RunHealthCheck(cfg, cpuCollector, storagePaths, logger) r.Health = HealthReport{ Status: healthReport.Status, Issues: healthReport.Issues, @@ -134,6 +150,11 @@ func BuildReport( // Stacks r.Stacks = buildStacksReport(stackMgr) + if debug && logger != nil { + logger.Printf("[DEBUG] BuildReport: complete — containers=%d, health=%s, deployed=%d, available=%d", + r.Containers.Total, r.Health.Status, len(r.Stacks.Deployed), len(r.Stacks.Available)) + } + return r } diff --git a/controller/internal/report/pusher.go b/controller/internal/report/pusher.go index cc76240..a17d486 100644 --- a/controller/internal/report/pusher.go +++ b/controller/internal/report/pusher.go @@ -35,6 +35,7 @@ type Pusher struct { httpClient *http.Client logger *log.Logger enabled bool + debug bool statusMu sync.RWMutex status PushStatus @@ -45,7 +46,7 @@ type Pusher struct { } // NewPusher creates a new report pusher from hub configuration. -func NewPusher(cfg *config.HubConfig, logger *log.Logger) *Pusher { +func NewPusher(cfg *config.HubConfig, logger *log.Logger, debug bool) *Pusher { return &Pusher{ hubURL: strings.TrimRight(cfg.URL, "/"), apiKey: cfg.APIKey, @@ -54,6 +55,7 @@ func NewPusher(cfg *config.HubConfig, logger *log.Logger) *Pusher { }, logger: logger, enabled: cfg.Enabled, + debug: debug, } } @@ -69,6 +71,9 @@ func (p *Pusher) Push(report *Report) error { } url := p.hubURL + "/api/v1/report" + if p.debug { + p.logger.Printf("[DEBUG] Push: url=%s payload=%d bytes", url, len(data)) + } p.statusMu.Lock() p.status.LastAttempt = time.Now() @@ -143,6 +148,9 @@ func (p *Pusher) PushInfraBackup(data []byte) error { } url := p.hubURL + "/api/v1/infra-backup" + if p.debug { + p.logger.Printf("[DEBUG] PushInfraBackup: url=%s payload=%d bytes", url, len(data)) + } var lastErr error for attempt := 0; attempt < 3; attempt++ { @@ -173,6 +181,9 @@ func (p *Pusher) PushInfraBackup(data []byte) error { return nil } lastErr = fmt.Errorf("HTTP %d", resp.StatusCode) + if p.debug { + p.logger.Printf("[DEBUG] PushInfraBackup: attempt %d failed — HTTP %d", attempt+1, resp.StatusCode) + } } return fmt.Errorf("infra backup push failed after 3 attempts: %w", lastErr) diff --git a/controller/internal/selftest/selftest.go b/controller/internal/selftest/selftest.go new file mode 100644 index 0000000..fe2e33d --- /dev/null +++ b/controller/internal/selftest/selftest.go @@ -0,0 +1,256 @@ +package selftest + +import ( + "context" + "fmt" + "log" + "net/http" + "os" + "os/exec" + "path/filepath" + "time" + + "gitea.dooplex.hu/admin/felhom-controller/internal/backup" + "gitea.dooplex.hu/admin/felhom-controller/internal/config" + "gitea.dooplex.hu/admin/felhom-controller/internal/settings" + "gitea.dooplex.hu/admin/felhom-controller/internal/system" +) + +// CheckResult represents the outcome of a single self-test check. +type CheckResult struct { + Name string `json:"name"` + Status string `json:"status"` // "pass", "warn", "fail" + Message string `json:"message"` +} + +// Summary contains all self-test results. +type Summary struct { + Results []CheckResult `json:"results"` + Pass int `json:"pass"` + Warn int `json:"warn"` + Fail int `json:"fail"` +} + +// Run executes all startup self-test checks and returns a summary. +// It never blocks startup — each check has an individual timeout. +func Run(cfg *config.Config, sett *settings.Settings, logger *log.Logger) *Summary { + s := &Summary{} + + checks := []func() CheckResult{ + func() CheckResult { return checkDockerSocket() }, + func() CheckResult { return checkStacksDir(cfg.Paths.StacksDir) }, + func() CheckResult { return checkDataDir(cfg.Paths.DataDir) }, + func() CheckResult { return checkSystemDataPath(cfg.Paths.SystemDataPath) }, + func() CheckResult { return checkStoragePaths(sett) }, + func() CheckResult { return checkGitCatalog(cfg.Paths.StacksDir) }, + func() CheckResult { return checkHubConnectivity(cfg) }, + func() CheckResult { return checkResticRepos(sett) }, + func() CheckResult { return checkMetricsDB(cfg.Paths.DataDir) }, + } + + logger.Println("[INFO] ========== Startup Self-Test ==========") + for _, check := range checks { + result := runSafe(check) + s.Results = append(s.Results, result) + switch result.Status { + case "pass": + s.Pass++ + case "warn": + s.Warn++ + case "fail": + s.Fail++ + } + tag := "[PASS]" + if result.Status == "warn" { + tag = "[WARN]" + } else if result.Status == "fail" { + tag = "[FAIL]" + } + logger.Printf("[INFO] %s %s: %s", tag, result.Name, result.Message) + } + logger.Println("[INFO] ========================================") + logger.Printf("[INFO] Self-test complete: %d passed, %d warnings, %d failed", s.Pass, s.Warn, s.Fail) + + return s +} + +// runSafe executes a check function, recovering from panics. +func runSafe(fn func() CheckResult) (result CheckResult) { + defer func() { + if r := recover(); r != nil { + result = CheckResult{ + Name: "unknown", + Status: "fail", + Message: fmt.Sprintf("panic: %v", r), + } + } + }() + return fn() +} + +func checkDockerSocket() CheckResult { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + out, err := exec.CommandContext(ctx, "docker", "info", "--format", "{{.ServerVersion}}").Output() + if err != nil { + return CheckResult{Name: "Docker socket", Status: "fail", Message: fmt.Sprintf("docker info failed: %v", err)} + } + return CheckResult{Name: "Docker socket", Status: "pass", Message: fmt.Sprintf("reachable (v%s)", string(out[:len(out)-1]))} +} + +func checkStacksDir(stacksDir string) CheckResult { + info, err := os.Stat(stacksDir) + if err != nil { + return CheckResult{Name: "Stacks directory", Status: "fail", Message: fmt.Sprintf("not accessible: %v", err)} + } + if !info.IsDir() { + return CheckResult{Name: "Stacks directory", Status: "fail", Message: "path exists but is not a directory"} + } + return CheckResult{Name: "Stacks directory", Status: "pass", Message: stacksDir} +} + +func checkDataDir(dataDir string) CheckResult { + info, err := os.Stat(dataDir) + if err != nil { + return CheckResult{Name: "Data directory", Status: "fail", Message: fmt.Sprintf("not accessible: %v", err)} + } + if !info.IsDir() { + return CheckResult{Name: "Data directory", Status: "fail", Message: "path exists but is not a directory"} + } + + // Write test: create and remove a temp file + tmpFile := filepath.Join(dataDir, ".selftest-"+fmt.Sprintf("%d", time.Now().UnixNano())) + if err := os.WriteFile(tmpFile, []byte("test"), 0644); err != nil { + return CheckResult{Name: "Data directory", Status: "warn", Message: fmt.Sprintf("not writable: %v", err)} + } + os.Remove(tmpFile) + + return CheckResult{Name: "Data directory", Status: "pass", Message: fmt.Sprintf("%s (writable)", dataDir)} +} + +func checkSystemDataPath(systemDataPath string) CheckResult { + if systemDataPath == "" { + return CheckResult{Name: "System data path", Status: "warn", Message: "not configured"} + } + info, err := os.Stat(systemDataPath) + if err != nil { + return CheckResult{Name: "System data path", Status: "warn", Message: fmt.Sprintf("not accessible: %v", err)} + } + if !info.IsDir() { + return CheckResult{Name: "System data path", Status: "fail", Message: "path exists but is not a directory"} + } + if !system.IsMountPoint(systemDataPath) { + return CheckResult{Name: "System data path", Status: "warn", Message: fmt.Sprintf("%s exists but is not a mount point", systemDataPath)} + } + return CheckResult{Name: "System data path", Status: "pass", Message: systemDataPath} +} + +func checkStoragePaths(sett *settings.Settings) CheckResult { + paths := sett.GetStoragePaths() + if len(paths) == 0 { + return CheckResult{Name: "Storage paths", Status: "warn", Message: "no storage paths registered"} + } + + connected := 0 + disconnected := 0 + for _, sp := range paths { + if sp.Decommissioned { + continue + } + if sp.Disconnected { + disconnected++ + continue + } + if _, err := os.Stat(sp.Path); err == nil && system.IsMountPoint(sp.Path) { + connected++ + } else { + disconnected++ + } + } + + msg := fmt.Sprintf("%d connected, %d disconnected", connected, disconnected) + if connected == 0 && disconnected > 0 { + return CheckResult{Name: "Storage paths", Status: "warn", Message: msg} + } + return CheckResult{Name: "Storage paths", Status: "pass", Message: msg} +} + +func checkGitCatalog(stacksDir string) CheckResult { + matches, err := filepath.Glob(filepath.Join(stacksDir, "*", ".felhom.yml")) + if err != nil { + return CheckResult{Name: "Git catalog", Status: "warn", Message: fmt.Sprintf("glob error: %v", err)} + } + if len(matches) == 0 { + return CheckResult{Name: "Git catalog", Status: "warn", Message: "no .felhom.yml files found in stacks dir"} + } + return CheckResult{Name: "Git catalog", Status: "pass", Message: fmt.Sprintf("%d app definitions found", len(matches))} +} + +func checkHubConnectivity(cfg *config.Config) CheckResult { + if !cfg.Hub.Enabled || cfg.Hub.URL == "" { + return CheckResult{Name: "Hub connectivity", Status: "pass", Message: "hub disabled, skipped"} + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + url := cfg.Hub.URL + "/healthz" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("request error: %v", err)} + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("unreachable: %v", err)} + } + resp.Body.Close() + + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return CheckResult{Name: "Hub connectivity", Status: "pass", Message: fmt.Sprintf("%s reachable (HTTP %d)", cfg.Hub.URL, resp.StatusCode)} + } + return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("HTTP %d from %s", resp.StatusCode, url)} +} + +func checkResticRepos(sett *settings.Settings) CheckResult { + paths := sett.GetStoragePaths() + if len(paths) == 0 { + return CheckResult{Name: "Restic repos", Status: "pass", Message: "no storage paths, skipped"} + } + + found := 0 + missing := 0 + for _, sp := range paths { + if sp.Disconnected || sp.Decommissioned { + continue + } + repoPath := backup.PrimaryResticRepoPath(sp.Path) + if _, err := os.Stat(repoPath); err == nil { + found++ + } else { + missing++ + } + } + + if found == 0 && missing > 0 { + return CheckResult{Name: "Restic repos", Status: "warn", Message: fmt.Sprintf("0 repos found, %d expected", missing)} + } + msg := fmt.Sprintf("%d repos found", found) + if missing > 0 { + msg += fmt.Sprintf(", %d missing", missing) + } + return CheckResult{Name: "Restic repos", Status: "pass", Message: msg} +} + +func checkMetricsDB(dataDir string) CheckResult { + dbPath := filepath.Join(dataDir, "metrics.db") + info, err := os.Stat(dbPath) + if err != nil { + if os.IsNotExist(err) { + return CheckResult{Name: "Metrics DB", Status: "warn", Message: "metrics.db not found (will be created)"} + } + return CheckResult{Name: "Metrics DB", Status: "warn", Message: fmt.Sprintf("not accessible: %v", err)} + } + return CheckResult{Name: "Metrics DB", Status: "pass", Message: fmt.Sprintf("%.1f MB", float64(info.Size())/1024/1024)} +} diff --git a/controller/internal/selfupdate/updater.go b/controller/internal/selfupdate/updater.go index 2948480..fe9b370 100644 --- a/controller/internal/selfupdate/updater.go +++ b/controller/internal/selfupdate/updater.go @@ -40,6 +40,7 @@ type Updater struct { dataDir string composePath string // e.g., "/opt/docker/felhom-controller/docker-compose.yml" logger *log.Logger + debug bool mu sync.Mutex latestVersion string @@ -49,7 +50,7 @@ type Updater struct { } // NewUpdater creates a new Updater instance. -func NewUpdater(cfg *config.SelfUpdateConfig, gitCfg *config.GitConfig, currentVersion, dataDir, composePath string, logger *log.Logger) *Updater { +func NewUpdater(cfg *config.SelfUpdateConfig, gitCfg *config.GitConfig, currentVersion, dataDir, composePath string, logger *log.Logger, debug bool) *Updater { return &Updater{ cfg: cfg, gitCfg: gitCfg, @@ -57,6 +58,7 @@ func NewUpdater(cfg *config.SelfUpdateConfig, gitCfg *config.GitConfig, currentV dataDir: dataDir, composePath: composePath, logger: logger, + debug: debug, } } @@ -131,10 +133,16 @@ func (u *Updater) CheckForUpdate() CheckResult { return result } - if latestVer.Compare(currentVer) > 0 { + cmp := latestVer.Compare(currentVer) + if cmp > 0 { result.UpdateAvailable = true } + if u.debug { + u.logger.Printf("[DEBUG] [SELFUPDATE] Version comparison: current=%s, latest=%s, cmp=%d, updateAvailable=%v", + u.currentVer, latestStr, cmp, result.UpdateAvailable) + } + u.mu.Lock() u.latestVersion = latestStr u.lastCheck = &result @@ -153,6 +161,10 @@ func (u *Updater) queryRegistry() (string, error) { // Gitea registry V2: GET /v2///tags/list url := fmt.Sprintf("https://gitea.dooplex.hu/v2/%s/tags/list", registryImagePath(u.cfg.Image)) + if u.debug { + u.logger.Printf("[DEBUG] [SELFUPDATE] Registry API URL: %s (user: %s)", url, u.gitCfg.Username) + } + req, err := http.NewRequest("GET", url, nil) if err != nil { return "", fmt.Errorf("creating request: %w", err) @@ -181,6 +193,10 @@ func (u *Updater) queryRegistry() (string, error) { return "", fmt.Errorf("decoding response: %w", err) } + if u.debug { + u.logger.Printf("[DEBUG] [SELFUPDATE] Registry returned %d tags: %v", len(tagsResp.Tags), tagsResp.Tags) + } + // Find highest semver tag var highest *Version for _, tag := range tagsResp.Tags { @@ -342,6 +358,15 @@ func (u *Updater) updateComposeFile(newImage string) error { // Replace image line: "image: gitea.dooplex.hu/admin/felhom-controller:..." → new image re := regexp.MustCompile(`(image:\s*)gitea\.dooplex\.hu/admin/felhom-controller:\S+`) + + if u.debug { + // Log old image line for debugging + oldMatch := re.Find(data) + if oldMatch != nil { + u.logger.Printf("[DEBUG] [SELFUPDATE] Compose file edit: %q → %q", string(oldMatch), "image: "+newImage) + } + } + newData := re.ReplaceAll(data, []byte("${1}"+newImage)) if bytes.Equal(data, newData) { diff --git a/controller/internal/setup/handlers.go b/controller/internal/setup/handlers.go index b1507d0..6946987 100644 --- a/controller/internal/setup/handlers.go +++ b/controller/internal/setup/handlers.go @@ -73,6 +73,11 @@ func NewServer(cfg *config.Config, dataDir string, logger *log.Logger, version s return s } +// isDebug returns true if logging level is "debug". +func (s *Server) isDebug() bool { + return s.cfg != nil && s.cfg.Logging.Level == "debug" +} + func (s *Server) loadTemplates() { s.tmpl = template.Must( template.New("").Funcs(template.FuncMap{ @@ -311,6 +316,9 @@ func (s *Server) processHubRestore(w http.ResponseWriter, r *http.Request) { return } + if s.isDebug() { + s.logger.Printf("[DEBUG] Setup: hub restore — pulling recovery from %s for customer %s", hubURL, customerID) + } recovery, err := report.PullRecovery(hubURL, customerID, password) if err != nil { var msg string @@ -328,6 +336,10 @@ func (s *Server) processHubRestore(w http.ResponseWriter, r *http.Request) { return } + if s.isDebug() { + s.logger.Printf("[DEBUG] Setup: hub recovery received — hasInfra=%v, configLen=%d", recovery.HasInfraBackup, len(recovery.ConfigYAML)) + } + // Store recovery data in state for restore execution s.state.SelectedBackup = &SelectedBackup{ Source: "hub", @@ -392,6 +404,9 @@ func (s *Server) processFreshHub(w http.ResponseWriter, r *http.Request) { s.logger.Printf("[INFO] Setup: config downloaded (%d bytes), writing config...", len(configYAML)) // Write config and finish setup + if s.isDebug() { + s.logger.Printf("[DEBUG] Setup: writing fresh config (%d bytes)", len(configYAML)) + } s.state.SetFormField("retrieval_password", password) if err := s.writeFreshConfig(configYAML, password); err != nil { s.logger.Printf("[ERROR] Setup: writeFreshConfig failed: %v", err) @@ -447,6 +462,9 @@ func (s *Server) processManual(w http.ResponseWriter, r *http.Request) { // Generate controller.yaml configYAML := s.generateManualConfig() + if s.isDebug() { + s.logger.Printf("[DEBUG] Setup: generated manual config (%d bytes) for customer %s", len(configYAML), customerID) + } if err := s.writeFreshConfig(configYAML, ""); err != nil { csrf := ensureCSRFToken(w, r) data := map[string]interface{}{ diff --git a/controller/internal/setup/scanner.go b/controller/internal/setup/scanner.go index 7b8ce56..f8f2d3d 100644 --- a/controller/internal/setup/scanner.go +++ b/controller/internal/setup/scanner.go @@ -45,7 +45,7 @@ type lsblkDevice struct { } // ScanDrivesForInfraBackups scans all block devices for .felhom-infra-backup/ directories. -func ScanDrivesForInfraBackups(logger *log.Logger) ([]DriveBackup, error) { +func ScanDrivesForInfraBackups(logger *log.Logger, debug bool) ([]DriveBackup, error) { logger.Printf("[INFO] Setup: scanning drives for infra backups...") // Read currently mounted filesystems @@ -68,6 +68,10 @@ func ScanDrivesForInfraBackups(logger *log.Logger) ([]DriveBackup, error) { return nil, fmt.Errorf("parsing lsblk: %w", err) } + if debug { + logger.Printf("[DEBUG] Setup scan: lsblk returned %d block devices", len(lsblk.Blockdevices)) + } + var results []DriveBackup // Flatten all partitions @@ -83,6 +87,10 @@ func ScanDrivesForInfraBackups(logger *log.Logger) ([]DriveBackup, error) { } } + if debug { + logger.Printf("[DEBUG] Setup scan: found %d partitions to check, %d root devices to skip", len(partitions), len(rootDevices)) + } + for _, part := range partitions { // Skip partitions without filesystem if part.FSType == nil || *part.FSType == "" || *part.FSType == "swap" { @@ -256,7 +264,7 @@ func countValid(results []DriveBackup) int { // runDriveScan runs the scan asynchronously and stores results on the Server. func (s *Server) runDriveScan() { - results, err := ScanDrivesForInfraBackups(s.logger) + results, err := ScanDrivesForInfraBackups(s.logger, s.isDebug()) s.scanMu.Lock() defer s.scanMu.Unlock() diff --git a/controller/internal/storage/attach.go b/controller/internal/storage/attach.go index 9f61325..0acecad 100644 --- a/controller/internal/storage/attach.go +++ b/controller/internal/storage/attach.go @@ -1,13 +1,17 @@ package storage +import "log" + // AttachRequest holds parameters for attaching an existing partition // without formatting — using a bind mount from a subfolder. type AttachRequest struct { - DevicePath string // "/dev/sdb1" — must have an existing filesystem - MountName string // "hdd_1" → bind-mounts at /mnt/hdd_1 - SubPath string // full path on raw mount to bind-mount (e.g., "/mnt/.felhom-raw/hdd_1/felhom_data") - Label string // Display label for the UI - SetDefault bool // Register as default storage path + DevicePath string // "/dev/sdb1" — must have an existing filesystem + MountName string // "hdd_1" → bind-mounts at /mnt/hdd_1 + SubPath string // full path on raw mount to bind-mount (e.g., "/mnt/.felhom-raw/hdd_1/felhom_data") + Label string // Display label for the UI + SetDefault bool // Register as default storage path + Logger *log.Logger // Optional logger for debug output + Debug bool // Enable debug logging } // DirEntry represents a directory entry returned by ListDirectories. diff --git a/controller/internal/storage/attach_linux.go b/controller/internal/storage/attach_linux.go index 96dd0d9..0ce340f 100644 --- a/controller/internal/storage/attach_linux.go +++ b/controller/internal/storage/attach_linux.go @@ -182,8 +182,14 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string, progress <- FormatProgress{Step: "error", Message: msg, Error: errStr, Percent: 0} return fmt.Errorf("%s: %w", msg, err) } + dbg := func(format string, args ...interface{}) { + if req.Logger != nil && req.Debug { + req.Logger.Printf("[DEBUG] FinalizeAttach: "+format, args...) + } + } mountPath := "/mnt/" + req.MountName + dbg("starting: device=%s mountName=%s subPath=%s", req.DevicePath, req.MountName, req.SubPath) // --- Step 1: Validate --- send("validating", "Paraméterek ellenőrzése...", 5) @@ -244,6 +250,7 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string, subRel := strings.TrimPrefix(cleanSub, rawMountPath) subRel = strings.TrimPrefix(subRel, "/") + dbg("raw mount path: %s, sub relative: %q", rawMountPath, subRel) send("mounting", "fstab bejegyzések hozzáadása...", 35) // Backup fstab (non-fatal) @@ -251,13 +258,17 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string, // Fstab entry 1: raw partition mount // Use nofail so a missing disk doesn't block boot + dbg("fstab entry 1: UUID=%s → %s (fstype=%s)", uuid, rawMountPath, fsType) if err := AppendFstabEntry(FstabPath, uuid, rawMountPath, fsType, "defaults,nofail,noatime"); err != nil { + dbg("fstab raw mount entry failed: %v", err) return "", fail("mounting", "fstab bejegyzés hozzáadása sikertelen (raw mount)", err) } // Fstab entry 2: bind mount from subfolder to final path bindSource := cleanSub + dbg("fstab entry 2: bind %s → %s", bindSource, mountPath) if err := appendBindFstabEntry(FstabPath, bindSource, mountPath); err != nil { + dbg("fstab bind entry failed: %v", err) // Roll back the raw mount fstab entry _ = RemoveFstabEntry(FstabPath, uuid) return "", fail("mounting", "fstab bejegyzés hozzáadása sikertelen (bind mount)", err) @@ -272,20 +283,25 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string, return "", fail("mounting", "Csatlakoztatási mappa nem hozható létre: "+mountPath, err) } + dbg("bind mount: mount --bind %s %s", cleanSub, mountPath) if out, err := exec.Command("mount", "--bind", cleanSub, mountPath).CombinedOutput(); err != nil { + dbg("bind mount failed: %s", string(out)) _ = RemoveFstabEntry(FstabPath, uuid) _ = removeBindFstabEntry(FstabPath, mountPath) return "", fail("mounting", "Bind mount sikertelen: "+string(out), err) } // Verify bind mount + dbg("verifying bind mount with findmnt") verifyOut, verifyErr := exec.Command("findmnt", "-n", "-o", "SOURCE", "--target", mountPath).Output() if verifyErr != nil || strings.TrimSpace(string(verifyOut)) == "" { + dbg("bind mount verification failed: findmnt returned %q err=%v", string(verifyOut), verifyErr) _ = exec.Command("umount", mountPath).Run() _ = RemoveFstabEntry(FstabPath, uuid) _ = removeBindFstabEntry(FstabPath, mountPath) return "", fail("mounting", "A bind mount nem ellenőrizhető", fmt.Errorf("mount point %s not found after bind mount", mountPath)) } + dbg("bind mount verified: source=%q", strings.TrimSpace(string(verifyOut))) send("mounting", "Csatlakoztatva: "+mountPath, 70) @@ -301,6 +317,7 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string, } } + dbg("attach completed successfully: %s", mountPath) send("done", "Meghajtó sikeresen csatolva: "+mountPath, 100) return mountPath, nil diff --git a/controller/internal/storage/format.go b/controller/internal/storage/format.go index b37b1ec..a323554 100644 --- a/controller/internal/storage/format.go +++ b/controller/internal/storage/format.go @@ -3,16 +3,19 @@ package storage import ( "bufio" "fmt" + "log" "strings" ) // FormatRequest holds parameters for formatting and mounting a disk. type FormatRequest struct { - DevicePath string // "/dev/sdb" or "/dev/sdb1" - MountName string // "hdd_1" → mounts at /mnt/hdd_1 - Label string // Display label for the UI - CreatePartition bool // If true, create a single partition first (wipes disk) - SetDefault bool // Register as default storage path + DevicePath string // "/dev/sdb" or "/dev/sdb1" + MountName string // "hdd_1" → mounts at /mnt/hdd_1 + Label string // Display label for the UI + CreatePartition bool // If true, create a single partition first (wipes disk) + SetDefault bool // Register as default storage path + Logger *log.Logger // Optional logger for debug output + Debug bool // Enable debug logging } // FormatProgress tracks the formatting/mounting progress. diff --git a/controller/internal/storage/format_linux.go b/controller/internal/storage/format_linux.go index c490760..09ee350 100644 --- a/controller/internal/storage/format_linux.go +++ b/controller/internal/storage/format_linux.go @@ -10,6 +10,8 @@ import ( "path/filepath" "strings" "time" + + "gitea.dooplex.hu/admin/felhom-controller/internal/util" ) // FormatAndMount formats a disk/partition and mounts it. @@ -27,8 +29,14 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string, progress <- FormatProgress{Step: "error", Message: msg, Error: errStr, Percent: 0} return fmt.Errorf("%s: %w", msg, err) } + dbg := func(format string, args ...interface{}) { + if req.Logger != nil && req.Debug { + req.Logger.Printf("[DEBUG] FormatAndMount: "+format, args...) + } + } mountPath := "/mnt/" + req.MountName + dbg("starting: device=%s mountName=%s createPartition=%v", req.DevicePath, req.MountName, req.CreatePartition) // --- Step 1: Validate --- send("validating", "Eszköz ellenőrzése...", 5) @@ -78,10 +86,14 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string, if req.CreatePartition { // Wipe existing partition table and filesystem signatures first // H18: Log wipefs errors instead of silently discarding them. + dbg("step wipefs: wipefs -a %s", HostDevicePath(req.DevicePath)) send("partitioning", fmt.Sprintf("wipefs -a %s ...", HostDevicePath(req.DevicePath)), 12) if err := exec.Command("wipefs", "-a", HostDevicePath(req.DevicePath)).Run(); err != nil { // Non-fatal: some systems don't have wipefs; continue anyway + dbg("wipefs failed (non-fatal): %v", err) send("partitioning", fmt.Sprintf("[WARN] wipefs sikertelen %s: %v (folytatás)", req.DevicePath, err), 13) + } else { + dbg("wipefs completed successfully") } time.Sleep(500 * time.Millisecond) @@ -89,12 +101,16 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string, // ",," = start=default, size=default(fill disk), type=default(Linux filesystem GUID). // --force: overwrite even if device appears busy. // --wipe always: wipe filesystem signatures from newly created partitions. + dbg("step sfdisk: sfdisk --force --wipe always %s", HostDevicePath(req.DevicePath)) send("partitioning", fmt.Sprintf("sfdisk --force --wipe always %s ...", HostDevicePath(req.DevicePath)), 15) sfdiskInput := "label: gpt\n,,\n" cmd := exec.Command("sfdisk", "--force", "--wipe", "always", HostDevicePath(req.DevicePath)) cmd.Stdin = strings.NewReader(sfdiskInput) if out, err := cmd.CombinedOutput(); err != nil { + dbg("sfdisk failed: %s", util.TruncateStr(string(out), 500)) return "", fail("partitioning", "Partícionálás sikertelen: "+string(out), err) + } else { + dbg("sfdisk output: %s", util.TruncateStr(string(out), 500)) } _ = exec.Command("partprobe", HostDevicePath(req.DevicePath)).Run() @@ -119,14 +135,17 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string, fsLabel = fsLabel[:16] } + dbg("step mkfs.ext4: mkfs.ext4 -L %s -F %s", fsLabel, HostDevicePath(partDev)) send("formatting", fmt.Sprintf("mkfs.ext4 -L %s -F %s ...", fsLabel, HostDevicePath(partDev)), 30) mkfsCmd := exec.Command("mkfs.ext4", "-L", fsLabel, "-F", HostDevicePath(partDev)) var mkfsOut bytes.Buffer mkfsCmd.Stdout = &mkfsOut mkfsCmd.Stderr = &mkfsOut if err := mkfsCmd.Run(); err != nil { + dbg("mkfs.ext4 failed: %s", util.TruncateStr(mkfsOut.String(), 500)) return "", fail("formatting", "Formázás sikertelen: "+mkfsOut.String(), err) } + dbg("mkfs.ext4 output: %s", util.TruncateStr(mkfsOut.String(), 500)) send("formatting", "Formázás kész", 60) @@ -135,12 +154,15 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string, return "", fail("mounting", "Csatlakoztatási mappa nem hozható létre: "+mountPath, err) } + dbg("step blkid: blkid -s UUID -o value %s", HostDevicePath(partDev)) send("mounting", fmt.Sprintf("UUID lekérése: blkid %s ...", HostDevicePath(partDev)), 65) uuidOut, err := exec.Command("blkid", "-s", "UUID", "-o", "value", HostDevicePath(partDev)).Output() if err != nil { + dbg("blkid UUID failed: %v", err) return "", fail("mounting", "UUID lekérése sikertelen", err) } uuid := strings.TrimSpace(string(uuidOut)) + dbg("blkid returned UUID=%q", uuid) if uuid == "" { return "", fail("mounting", "UUID üres a formázás után", fmt.Errorf("empty UUID")) } @@ -148,28 +170,36 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string, // Backup fstab (non-fatal) _ = BackupFstab(FstabPath) + dbg("step fstab: appending UUID=%s mountPath=%s fstype=ext4", uuid, mountPath) if err := AppendFstabEntry(FstabPath, uuid, mountPath, "ext4", "defaults,nofail,noatime"); err != nil { + dbg("fstab append failed: %v", err) return "", fail("mounting", "fstab bejegyzés hozzáadása sikertelen", err) } + dbg("fstab entry added successfully") // Mount by device path explicitly — container's /etc/fstab != host fstab, // so "mount /mnt/hdd_1" (fstab lookup) won't work. + dbg("step mount: mount -t ext4 -o defaults,noatime %s %s", HostDevicePath(partDev), mountPath) send("mounting", fmt.Sprintf("mount -t ext4 %s %s ...", HostDevicePath(partDev), mountPath), 70) if out, err := exec.Command("mount", "-t", "ext4", "-o", "defaults,noatime", HostDevicePath(partDev), mountPath).CombinedOutput(); err != nil { + dbg("mount failed: %s", util.TruncateStr(string(out), 500)) // H19: Roll back fstab entry to prevent orphaned entry that hangs system on reboot. _ = RemoveFstabEntry(FstabPath, uuid) return "", fail("mounting", "Csatlakoztatás sikertelen: "+string(out), err) } // Verify mount actually worked (don't just trust exit code) + dbg("step verify: findmnt -n -o SOURCE --target %s", mountPath) verifyOut, verifyErr := exec.Command("findmnt", "-n", "-o", "SOURCE", "--target", mountPath).Output() if verifyErr != nil || strings.TrimSpace(string(verifyOut)) == "" { + dbg("mount verification failed: findmnt returned %q err=%v", string(verifyOut), verifyErr) // H19: Also roll back fstab if mount verify fails. _ = RemoveFstabEntry(FstabPath, uuid) return "", fail("mounting", "A csatlakoztatás nem ellenőrizhető: mount sikerült, de a meghajtó nem látható", fmt.Errorf("mount point %s not found after mount", mountPath)) } + dbg("mount verified: findmnt source=%q", strings.TrimSpace(string(verifyOut))) send("mounting", "Csatlakoztatva: "+mountPath, 80) @@ -185,6 +215,7 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string, } } + dbg("format and mount completed successfully: %s", mountPath) send("done", "Meghajtó sikeresen inicializálva: "+mountPath, 100) return mountPath, nil diff --git a/controller/internal/storage/migrate.go b/controller/internal/storage/migrate.go index dd02733..d0d3871 100644 --- a/controller/internal/storage/migrate.go +++ b/controller/internal/storage/migrate.go @@ -18,11 +18,13 @@ import ( // MigrateRequest holds parameters for migrating app data. type MigrateRequest struct { - StackName string // e.g., "immich" - DisplayName string // e.g., "Immich" - CurrentHDDPath string // e.g., "/mnt/hdd_placeholder" - TargetPath string // e.g., "/mnt/hdd_1" - HDDMounts []string // host-side paths to rsync (e.g., ["/mnt/hdd_placeholder/storage/immich"]) + StackName string // e.g., "immich" + DisplayName string // e.g., "Immich" + CurrentHDDPath string // e.g., "/mnt/hdd_placeholder" + TargetPath string // e.g., "/mnt/hdd_1" + HDDMounts []string // host-side paths to rsync (e.g., ["/mnt/hdd_placeholder/storage/immich"]) + Logger *log.Logger // Optional logger for debug output + Debug bool // Enable debug logging } // MigrateProgress tracks migration state. @@ -82,6 +84,14 @@ func MigrateAppData( return fmt.Errorf("%s: %w", msg, err) } + dbg := func(format string, args ...interface{}) { + if req.Logger != nil && req.Debug { + req.Logger.Printf("[DEBUG] MigrateAppData: "+format, args...) + } + } + + dbg("starting migration: stack=%s from=%s to=%s mounts=%d", req.StackName, req.CurrentHDDPath, req.TargetPath, len(req.HDDMounts)) + // --- Step 1: Validate --- if req.CurrentHDDPath == "" { return fail("validating", "A jelenlegi tárhely nem megadott", fmt.Errorf("empty current HDD path")) @@ -107,8 +117,11 @@ func MigrateAppData( } } + dbg("estimated total size: %s (%d bytes)", bytesHuman(totalBytes), totalBytes) + // Check free space on target freeBytes := getFreeBytes(req.TargetPath) + dbg("target free space: %s (%d bytes)", bytesHuman(freeBytes), freeBytes) if freeBytes > 0 && totalBytes > 0 && int64(float64(totalBytes)*1.05) > freeBytes { return fail("validating", fmt.Sprintf( "Nincs elég szabad hely a céltárolón: szükséges ~%s, szabad %s", @@ -126,15 +139,18 @@ func MigrateAppData( send("stopping", "Alkalmazás leállítva", 10, 0, totalBytes) // --- Step 3: rsync --- + dbg("starting rsync phase: %d mount(s) to copy", len(req.HDDMounts)) var bytesCopied int64 for i, srcPath := range req.HDDMounts { // Determine destination path: replace CurrentHDDPath prefix with TargetPath. // H13: Require trailing separator to prevent /mnt/hdd matching /mnt/hdd_backup/data. if srcPath != req.CurrentHDDPath && !strings.HasPrefix(srcPath, req.CurrentHDDPath+"/") { + dbg("skipping mount %s (not under %s)", srcPath, req.CurrentHDDPath) continue } relPath := strings.TrimPrefix(srcPath, req.CurrentHDDPath) dstPath := filepath.Join(req.TargetPath, relPath) + dbg("rsync %d/%d: %s → %s", i+1, len(req.HDDMounts), srcPath, dstPath) // Ensure destination parent exists if err := os.MkdirAll(filepath.Dir(dstPath), 0755); err != nil { @@ -162,6 +178,7 @@ func MigrateAppData( send("updating", "Konfiguráció frissítése...", 75, bytesCopied, totalBytes) // --- Step 4: Update app.yaml HDD_PATH --- + dbg("updating config: HDD_PATH %s → %s for stack %s", req.CurrentHDDPath, req.TargetPath, req.StackName) if err := updateFn(req.StackName, req.TargetPath); err != nil { send("rolling_back", "Konfiguráció frissítése sikertelen, visszaállítás...", 0, bytesCopied, totalBytes) _ = startFn(req.StackName) @@ -178,6 +195,7 @@ func MigrateAppData( return fail("starting", "Alkalmazás indítása sikertelen az új tárolóról", err) } + dbg("migration completed: stack=%s bytesCopied=%d elapsed=%ds", req.StackName, bytesCopied, int(time.Since(start).Seconds())) send("done", fmt.Sprintf("Áthelyezés kész! Az alkalmazás az új tárolóról fut. (Régi adat: %s, idő: %ds)", req.CurrentHDDPath, int(time.Since(start).Seconds())), diff --git a/controller/internal/storage/scan_linux.go b/controller/internal/storage/scan_linux.go index f9abca3..0923b42 100644 --- a/controller/internal/storage/scan_linux.go +++ b/controller/internal/storage/scan_linux.go @@ -5,11 +5,14 @@ package storage import ( "encoding/json" "fmt" + "log" "os" "os/exec" "path/filepath" "strconv" "strings" + + "gitea.dooplex.hu/admin/felhom-controller/internal/util" ) // lsblkOutput matches the top-level JSON from lsblk -J. @@ -193,7 +196,13 @@ func partitionToParentDisk(devPath string) string { // enrichWithBlkid fills in missing FSType, UUID, and Label on partitions using blkid. // Probes each partition individually via /host-dev (Docker overrides /dev with its own // tmpfs, so the host block devices are accessible at /host-dev instead). -func enrichWithBlkid(disks []BlockDevice) { +func enrichWithBlkid(disks []BlockDevice, logger *log.Logger, debug bool) { + dbg := func(format string, args ...interface{}) { + if debug && logger != nil { + logger.Printf("[DEBUG] enrichWithBlkid: "+format, args...) + } + } + for i := range disks { for j := range disks[i].Partitions { p := &disks[i].Partitions[j] @@ -202,16 +211,25 @@ func enrichWithBlkid(disks []BlockDevice) { if p.FSType == "" { if out, err := exec.Command("blkid", "-o", "value", "-s", "TYPE", hostPath).Output(); err == nil { p.FSType = strings.TrimSpace(string(out)) + dbg("blkid TYPE %s → %q", hostPath, p.FSType) + } else { + dbg("blkid TYPE %s failed: %v", hostPath, err) } } if p.UUID == "" { if out, err := exec.Command("blkid", "-o", "value", "-s", "UUID", hostPath).Output(); err == nil { p.UUID = strings.TrimSpace(string(out)) + dbg("blkid UUID %s → %q", hostPath, p.UUID) + } else { + dbg("blkid UUID %s failed: %v", hostPath, err) } } if p.Label == "" { if out, err := exec.Command("blkid", "-o", "value", "-s", "LABEL", hostPath).Output(); err == nil { p.Label = strings.TrimSpace(string(out)) + dbg("blkid LABEL %s → %q", hostPath, p.Label) + } else { + dbg("blkid LABEL %s failed: %v", hostPath, err) } } } @@ -220,7 +238,13 @@ func enrichWithBlkid(disks []BlockDevice) { // ScanDisks detects all block devices and classifies them into // available (not mounted, not system) and system/mounted disks. -func ScanDisks() (*ScanResult, error) { +func ScanDisks(logger *log.Logger, debug bool) (*ScanResult, error) { + dbg := func(format string, args ...interface{}) { + if debug && logger != nil { + logger.Printf("[DEBUG] ScanDisks: "+format, args...) + } + } + out, err := exec.Command( "lsblk", "-J", "-b", "-o", "NAME,PATH,SIZE,TYPE,FSTYPE,MOUNTPOINT,MODEL,RM", @@ -229,11 +253,15 @@ func ScanDisks() (*ScanResult, error) { return nil, fmt.Errorf("lsblk failed: %w", err) } + dbg("raw lsblk JSON: %s", util.TruncateStr(string(out), 500)) + var parsed lsblkOutput if err := json.Unmarshal(out, &parsed); err != nil { return nil, fmt.Errorf("lsblk JSON parse failed: %w", err) } + dbg("lsblk returned %d block devices", len(parsed.BlockDevices)) + // Get system disk names from host fstab (works correctly inside container) systemDiskNames := getSystemDiskNames() @@ -287,16 +315,27 @@ func ScanDisks() (*ScanResult, error) { isSystem := systemDiskNames[dev.Name] bd.Mounted = anyMounted || isSystem + classification := "available" if isSystem || anyMounted { + classification = "system" result.SystemDisks = append(result.SystemDisks, bd) } else { result.AvailableDisks = append(result.AvailableDisks, bd) } + + dbg("disk %s: model=%q size=%s partitions=%d removable=%v classification=%s", + bd.Name, bd.Model, bd.Size, len(bd.Partitions), bd.Removable, classification) + + for _, p := range bd.Partitions { + dbg(" partition %s: fstype=%q mountpoint=%q size=%s", p.Name, p.FSType, p.MountPoint, p.Size) + } } + dbg("classification result: %d available, %d system", len(result.AvailableDisks), len(result.SystemDisks)) + // Enrich FSType, UUID, Label from blkid (lsblk can't probe fstype in container) - enrichWithBlkid(result.AvailableDisks) - enrichWithBlkid(result.SystemDisks) + enrichWithBlkid(result.AvailableDisks, logger, debug) + enrichWithBlkid(result.SystemDisks, logger, debug) return result, nil } diff --git a/controller/internal/storage/scan_other.go b/controller/internal/storage/scan_other.go index 011cdca..634bf62 100644 --- a/controller/internal/storage/scan_other.go +++ b/controller/internal/storage/scan_other.go @@ -2,9 +2,12 @@ package storage -import "fmt" +import ( + "fmt" + "log" +) // ScanDisks is not supported on non-Linux platforms. -func ScanDisks() (*ScanResult, error) { +func ScanDisks(logger *log.Logger, debug bool) (*ScanResult, error) { return nil, fmt.Errorf("storage init is only supported on Linux") } diff --git a/controller/internal/sync/sync.go b/controller/internal/sync/sync.go index a1ba54b..35d54e3 100644 --- a/controller/internal/sync/sync.go +++ b/controller/internal/sync/sync.go @@ -3,12 +3,14 @@ package sync import ( "bytes" "crypto/sha256" + "encoding/hex" "fmt" "io" "log" "os" "os/exec" "path/filepath" + "regexp" "strings" "sync" "time" @@ -60,6 +62,17 @@ func New(cfg *config.Config, logger *log.Logger, rescanFn func() error, postSync } } +// isDebug returns true if the logging level is set to "debug". +func (s *Syncer) isDebug() bool { return s.cfg.Logging.Level == "debug" } + +// maskRepoURL masks credentials in a git URL for safe logging. +// e.g., "https://user:token@host/path" → "https://user:***@host/path" +var reURLCreds = regexp.MustCompile(`(https?://)([^:]+):([^@]+)@`) + +func maskRepoURL(url string) string { + return reURLCreds.ReplaceAllString(url, "${1}${2}:***@") +} + // Start begins the periodic sync loop. Call Stop() to terminate. func (s *Syncer) Start() { if s.cfg.Git.RepoURL == "" { @@ -192,6 +205,9 @@ func (s *Syncer) doSync() SyncResult { // Step 4: Inject missing deploy fields for updated stacks if len(updated) > 0 && s.postSyncHook != nil { + if s.isDebug() { + s.logger.Printf("[DEBUG] [SYNC] Post-sync hook: triggering missing field injection for %d stack(s): %v", len(updated), updated) + } s.postSyncHook(updated) } @@ -230,11 +246,17 @@ func (s *Syncer) gitCloneOrPull() error { args := []string{"clone", "--depth", "1", "--branch", s.cfg.Git.Branch} repoURL := s.buildRepoURL() args = append(args, repoURL, s.cacheDir) + if s.isDebug() { + s.logger.Printf("[DEBUG] [SYNC] git clone URL: %s, branch: %s, cacheDir: %s", maskRepoURL(repoURL), s.cfg.Git.Branch, s.cacheDir) + } return s.runGit(args...) } // Pull s.logger.Printf("[SYNC] Pulling latest from %s (branch: %s)", s.cfg.Git.RepoURL, s.cfg.Git.Branch) + if s.isDebug() { + s.logger.Printf("[DEBUG] [SYNC] git fetch --depth 1 origin %s in %s", s.cfg.Git.Branch, s.cacheDir) + } if err := s.runGitInDir(s.cacheDir, "fetch", "--depth", "1", "origin", s.cfg.Git.Branch); err != nil { return fmt.Errorf("git fetch: %w", err) } @@ -290,6 +312,9 @@ func (s *Syncer) copyTemplates() (newApps []string, updated []string, err error) dst := filepath.Join(dstDir, filename) if _, err := os.Stat(src); os.IsNotExist(err) { + if s.isDebug() { + s.logger.Printf("[DEBUG] [SYNC] %s/%s: source not found, skipping", appName, filename) + } continue } @@ -301,6 +326,11 @@ func (s *Syncer) copyTemplates() (newApps []string, updated []string, err error) if changed { anyChanged = true s.logger.Printf("[SYNC] Updated %s/%s", appName, filename) + if s.isDebug() { + s.logFileHashes(appName, filename, src, dst) + } + } else if s.isDebug() { + s.logger.Printf("[DEBUG] [SYNC] %s/%s: hash match, skipped", appName, filename) } } @@ -314,6 +344,22 @@ func (s *Syncer) copyTemplates() (newApps []string, updated []string, err error) return newApps, updated, nil } +// logFileHashes logs the source and destination file hashes for debugging. +func (s *Syncer) logFileHashes(appName, filename, src, dst string) { + srcData, err := os.ReadFile(src) + if err != nil { + return + } + srcHash := sha256.Sum256(srcData) + dstData, err := os.ReadFile(dst) + if err != nil { + s.logger.Printf("[DEBUG] [SYNC] %s/%s: src=%s, dst=new file", appName, filename, hex.EncodeToString(srcHash[:8])) + return + } + dstHash := sha256.Sum256(dstData) + s.logger.Printf("[DEBUG] [SYNC] %s/%s: src=%s, dst=%s (changed)", appName, filename, hex.EncodeToString(srcHash[:8]), hex.EncodeToString(dstHash[:8])) +} + // copyIfChanged copies src to dst only if the content differs. // Returns true if the file was actually written. func copyIfChanged(src, dst string) (bool, error) { diff --git a/controller/internal/util/strings.go b/controller/internal/util/strings.go new file mode 100644 index 0000000..e0e5383 --- /dev/null +++ b/controller/internal/util/strings.go @@ -0,0 +1,12 @@ +package util + +import "strings" + +// TruncateStr truncates a string to maxLen characters, appending "..." if truncated. +func TruncateStr(s string, maxLen int) string { + s = strings.TrimSpace(s) + if len(s) <= maxLen { + return s + } + return s[:maxLen] + "..." +} diff --git a/controller/internal/web/storage_handlers.go b/controller/internal/web/storage_handlers.go index 8844efc..3fee8ae 100644 --- a/controller/internal/web/storage_handlers.go +++ b/controller/internal/web/storage_handlers.go @@ -197,7 +197,7 @@ func (s *Server) storageAPIHandler(w http.ResponseWriter, r *http.Request) { // storageScanAPIHandler handles POST /api/storage/scan. func (s *Server) storageScanAPIHandler(w http.ResponseWriter, r *http.Request) { - result, err := storage.ScanDisks() + result, err := storage.ScanDisks(s.logger, s.cfg.Logging.Level == "debug") if err != nil { s.logger.Printf("[ERROR] storageScan: %v", err) jsonError(w, "Meghajtók keresése sikertelen: "+err.Error(), http.StatusInternalServerError) @@ -249,12 +249,14 @@ func (s *Server) storageInitAPIHandler(w http.ResponseWriter, r *http.Request) { Label: req.Label, CreatePartition: req.CreatePartition, SetDefault: req.SetDefault, + Logger: s.logger, + Debug: s.cfg.Logging.Level == "debug", } // Smart partition: if disk has exactly 1 partition with no filesystem, // skip destructive repartitioning and format the existing partition directly. if fmtReq.CreatePartition { - if scanResult, scanErr := storage.ScanDisks(); scanErr == nil { + if scanResult, scanErr := storage.ScanDisks(s.logger, s.cfg.Logging.Level == "debug"); scanErr == nil { for _, disk := range scanResult.AvailableDisks { if disk.Path == req.DevicePath && len(disk.Partitions) == 1 && disk.Partitions[0].FSType == "" { s.logger.Printf("[INFO] Disk %s has 1 empty partition (%s) — skipping repartition", @@ -493,6 +495,8 @@ func (s *Server) storageMigrateAPIHandler(w http.ResponseWriter, r *http.Request CurrentHDDPath: currentHDDPath, TargetPath: req.TargetPath, HDDMounts: mounts, + Logger: s.logger, + Debug: s.cfg.Logging.Level == "debug", } stopFn := func(name string) error { @@ -1037,6 +1041,8 @@ func (s *Server) storageAttachAPIHandler(w http.ResponseWriter, r *http.Request) SubPath: req.SubPath, Label: req.Label, SetDefault: req.SetDefault, + Logger: s.logger, + Debug: s.cfg.Logging.Level == "debug", } go func() {