From e7c27364bf69f9c01d9c750ab2c5ade5d759c5e2 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Tue, 17 Feb 2026 10:10:35 +0100 Subject: [PATCH] =?UTF-8?q?Phase=20C=20=E2=80=94=20Storage=20Initializatio?= =?UTF-8?q?n,=20Data=20Migration=20&=20Startup=20Fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TASK.md | 1036 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 637 insertions(+), 399 deletions(-) diff --git a/TASK.md b/TASK.md index 731de42..df9e7e6 100644 --- a/TASK.md +++ b/TASK.md @@ -1,467 +1,705 @@ -# TASK: Phase B — Storage Management UI Polish & Health Severity Fix +# TASK: Phase C — Storage Initialization, Data Migration & Startup Fixes -**Version target:** controller 0.10.0 +**Version target:** controller 0.11.0 **Repo:** `deploy-felhom-compose` (controller) ## Overview -Phase A (v0.9.0) delivered the storage paths foundation: registry in settings.json, auto-discovery, per-app HDD_PATH resolution, settings UI with CRUD, deploy dropdown, and health monitoring. All functional — but health check now FAILS on demo-felhom because `/mnt/hdd_placeholder` is (correctly) detected as not a real mount point. +Three features in this phase: -**Immediate fix:** Health severity reclassification — non-mount-point is a **warning**, not an **issue** that causes FAIL. The FAIL status should be reserved for genuinely broken things (services down, disk critically full, backup failing), not informational findings. - -Phase B then polishes the UI and fills gaps: - -1. **Health severity fix** — mount-point check: warning not issue -2. **Success flash messages** — storage operations only show errors, never success -3. **Edit labels** — can't rename a storage path after adding it -4. **App names per storage path** — settings page shows count, not which apps -5. **Per-app storage info on stacks page** — no visibility into which storage each app uses -6. **Deploy dropdown enhancements** — show free space, disk usage warning -7. **Filesystem & disk info** — show ext4/btrfs, device, model on settings page -8. **Backup page: storage path context** — show which storage path each app is on +1. **Startup ping + hub report** — Controller should announce itself immediately on start, not wait 5–15 minutes for the first scheduler tick +2. **Storage initialization** — Detect unformatted/unmounted disks, format (ext4), mount, and register as storage path — all from the web UI +3. **Data migration** — Per-app "Mozgatás" button to move app data between storage paths with rsync + progress --- -## 0. Health Severity Fix (URGENT — do first) +## 0. Startup Ping & Hub Report (Quick Fix) ### 0.1 Problem -`checkStoragePaths()` in `healthcheck.go` currently classifies non-mount-point as an **issue**: +After controller starts (e.g., after `docker restart felhom-controller` or system reboot), the first heartbeat fires after 5 minutes, first system_health after 5 minutes, and first hub report after 15 minutes. During this gap, Healthchecks shows stale "Last Ping: X minutes ago" and hub has no fresh data. + +### 0.2 Fix: Fire initial pings + report immediately after scheduler starts + +Add to `main.go` after `sched.Start(ctx)`, inside a goroutine (non-blocking): ```go -// CURRENT (line ~6751): -if !system.IsMountPoint(sp.Path) { - issues = append(issues, fmt.Sprintf("Storage path %s is NOT a mount point — data writes to SSD!", sp.Path)) -} -``` +// Fire startup pings + hub report immediately (don't wait for first scheduler tick) +go func() { + time.Sleep(5 * time.Second) // Let all subsystems fully initialize -Issues → `status = "fail"` → Healthchecks shows FAIL → Healthchecks triggers alert → hub shows "STATUS: FAIL". This cascades into a false alarm for any setup where the storage path is intentionally on SSD (demo environments, test environments, customers who haven't connected an external drive yet). + // Heartbeat ping + pinger.Ping(cfg.Monitoring.PingUUIDs.Heartbeat, "startup") + logger.Println("[INFO] Startup heartbeat ping sent") -### 0.2 Fix: Warning + Hungarian message + // System health ping + healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths()) + body := healthReport.FormatMessage() + healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth + if healthReport.Status == "fail" { + pinger.Fail(healthUUID, body) + } else { + pinger.Ping(healthUUID, body) + } + logger.Printf("[INFO] Startup health ping sent (status: %s)", healthReport.Status) -```go -// FIXED: -if !system.IsMountPoint(sp.Path) { - warnings = append(warnings, fmt.Sprintf( - "Storage path %s is not a separate mount point — data is stored on the system drive", - sp.Path)) -} -``` - -Health status becomes `"warn"` instead of `"fail"`. The warning still appears on: -- Controller monitoring page (red banner → yellow banner) -- Hub customer detail page (Issues → Warnings section) -- Healthchecks ping body (status: WARN instead of FAIL) - -### 0.3 When should non-mount-point be an ISSUE? - -In the future (Phase C or later), consider an "acknowledged" flag per storage path: -- When adding a path that's not a mount point, show a confirmation dialog: "Ez az útvonal a rendszermeghajtón van. Biztosan folytatja?" -- If acknowledged, the health check uses warning level -- If a previously-mount-point path STOPS being a mount point (drive disconnected), that IS an issue — it means something changed unexpectedly - -For now, the simple severity downgrade to warning is sufficient. The informational value is preserved, without false alarms. - -### 0.4 Also: Hungarian messages in health check - -Currently health messages are in English: -- "Storage path /mnt/hdd_placeholder is NOT a mount point — data writes to SSD!" -- "Storage path not accessible: ..." -- "Storage ... nearly full: ..." - -These appear on the customer-facing monitoring page. Change to Hungarian: - -```go -// Path not accessible -warnings = append(warnings, fmt.Sprintf("Adattároló nem elérhető: %s", sp.Path)) - -// Not a mount point -warnings = append(warnings, fmt.Sprintf( - "Az adattároló (%s) nem külön meghajtón van — az adatok a rendszermeghajtóra íródnak", sp.Path)) - -// Disk usage critical (≥95%) — this stays as issue -issues = append(issues, fmt.Sprintf("Adattároló majdnem megtelt: %s (%.0f%%)", sp.Path, di.UsedPercent)) - -// Disk usage high (≥90%) — warning -warnings = append(warnings, fmt.Sprintf("Adattároló használat magas: %s (%.0f%%)", sp.Path, di.UsedPercent)) -``` - -Note: Hub and Healthchecks receive the raw text. Hub is operator-facing (English would also be fine there), but since the same messages show on the customer controller, Hungarian is better for consistency. - -### 0.5 Monitoring page banner color - -Currently the monitoring page shows issues as red banners. Warnings should be yellow/amber: - -```html -{{range .Warnings}} -
- ⚠️ {{.}} -
-{{end}} -``` - -Check if the monitoring template already differentiates issue vs warning banners. If not, add CSS class: - -```css -.monitoring-banner-warn { - background: rgba(255, 193, 7, 0.15); - border-left: 4px solid var(--yellow); - color: var(--yellow); -} -``` - ---- - -## 1. Success Flash Messages for Storage Operations - -### 1.1 Problem - -All storage handlers (`/settings/storage/add`, `/remove`, `/default`, `/schedulable`) only set `StorageError` on failure. On success they redirect without feedback. - -### 1.2 Fix: Query param flash (consistent with backup page) - -Use the existing backup page pattern: redirect with query params `?storage_msg=success&storage_detail=...` - -In settings handler, parse: -```go -if msg := r.URL.Query().Get("storage_msg"); msg == "success" { - data["StorageSuccess"] = r.URL.Query().Get("storage_detail") -} -``` - -Success messages: -- **Add:** "Adattároló sikeresen hozzáadva: /mnt/hdd_1" -- **Remove:** "Adattároló eltávolítva: /mnt/hdd_1" -- **Set default:** "Alapértelmezett adattároló beállítva: /mnt/hdd_1" -- **Toggle schedulable:** "Adattároló állapot módosítva: /mnt/hdd_1" - -### 1.3 Template - -Add to settings.html (after `StorageError`): -```html -{{if .StorageSuccess}}
{{.StorageSuccess}}
{{end}} -``` - ---- - -## 2. Edit Storage Path Labels - -### 2.1 UI: Inline edit - -Add edit button next to label. JS toggles between display and inline form: - -```html -
- {{.Label}} - -
-``` - -JS `editStorageLabel()` replaces content with: -```html -
- - - - -
-``` - -### 2.2 Route & handler - -| Method | Path | Auth? | -|--------|------|-------| -| POST | `/settings/storage/label` | Yes | - -Handler: parse `storage_path` + `storage_label`, validate (non-empty, max 50 chars), call `settings.SetStorageLabel()`, redirect with success flash. - -### 2.3 Settings method - -```go -func (s *Settings) SetStorageLabel(path, label string) error { - s.mu.Lock() - defer s.mu.Unlock() - for i := range s.StoragePaths { - if s.StoragePaths[i].Path == path { - s.StoragePaths[i].Label = label - return s.save() + // Hub report + if cfg.Hub.Enabled && cfg.Hub.URL != "" { + pusher := report.NewPusher(&cfg.Hub, logger) + r := report.BuildReport(cfg, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths()) + if err := pusher.Push(r); err != nil { + logger.Printf("[WARN] Startup hub report failed: %v", err) + } else { + logger.Println("[INFO] Startup hub report sent") } } - return fmt.Errorf("storage path %q not found", path) +}() +``` + +**Note:** The existing `go func()` that runs `alertMgr.Refresh()` at startup already does a health check but doesn't ping Healthchecks or hub. Merge this logic or add alongside it. The 5-second delay gives Docker, metrics, and backup subsystems time to initialize. + +### 0.3 Reuse existing pusher instance + +The hub-report scheduler task already creates a `pusher` — consider creating it once at init and reusing: + +```go +var pusher *report.Pusher +if cfg.Hub.Enabled && cfg.Hub.URL != "" { + pusher = report.NewPusher(&cfg.Hub, logger) + sched.Every("hub-report", pushInterval, func(ctx context.Context) error { + r := report.BuildReport(...) + return pusher.Push(r) + }) +} +// ... then in startup goroutine: +if pusher != nil { + // use same pusher } ``` --- -## 3. App Names Per Storage Path (Settings Page) +## 1. Storage Initialization Feature -### 3.1 Current: "3 alkalmazás használja" — no names +### 1.1 Concept -### 3.2 Enhancement: Expandable list with names + sizes +The controller UI provides a guided wizard to: +1. **Scan** for block devices that are not mounted (like `sdb` in the demo system) +2. **Show** disk info: size, model, existing partitions/filesystems +3. **Format** with ext4 after explicit confirmation with safety warnings +4. **Mount** at a user-specified path under `/mnt/` +5. **Register** automatically as a storage path in settings.json + +This replaces the need to SSH in and run `hdd-setup.sh` manually. The existing `scripts/hdd-setup.sh` in the repo provides the proven logic — the controller wraps this as a web-based flow. + +### 1.2 Architecture: Go package, not bash wrapper + +Implement in Go directly (not shelling out to `hdd-setup.sh`) for: +- Structured error handling and progress feedback +- Testability +- No dependency on bash script being present in the container + +Use `exec.Command` for: `lsblk`, `blkid`, `mkfs.ext4`, `mount`, and fstab editing. + +### 1.3 New package: `internal/storage/` -Extend `StoragePathView`: ```go -type StorageAppDetail struct { - Name string // Display name (e.g., "Immich") - Stack string // Stack name (for link) - SizeHuman string // Data size on this path +package storage + +// BlockDevice represents a detected physical disk. +type BlockDevice struct { + Name string // "sdb" + Path string // "/dev/sdb" + Size string // "931.5G" + SizeBytes int64 + Model string // "WD Elements 25A2" + Serial string // "WX..." (if available) + Type string // "disk" + Removable bool // true for USB + Partitions []Partition + Mounted bool // any partition mounted } -type StoragePathView struct { - // ... existing fields ... - AppDetails []StorageAppDetail // NEW +// Partition represents a partition on a block device. +type Partition struct { + Name string // "sdb1" + Path string // "/dev/sdb1" + Size string // "931.5G" + FSType string // "ext4", "" (no filesystem) + Label string // filesystem label + UUID string + MountPoint string // "" if not mounted +} + +// ScanResult from disk detection. +type ScanResult struct { + AvailableDisks []BlockDevice // Unmounted disks/partitions + SystemDisks []BlockDevice // Mounted/system disks (for display only, not selectable) +} + +// FormatRequest parameters for formatting a disk. +type FormatRequest struct { + DevicePath string // "/dev/sdb" (whole disk) or "/dev/sdb1" (partition) + MountName string // "hdd_1" → will mount at /mnt/hdd_1 + Label string // Display label for storage path registry + CreatePartition bool // If true, wipe and create single partition first +} + +// FormatProgress tracks the formatting/mounting progress. +type FormatProgress struct { + Step string // "partitioning", "formatting", "mounting", "fstab", "registering", "done", "error" + Message string // Human-readable status + Error string // Error message if Step == "error" + Percent int // 0-100 } ``` -Template: +### 1.4 Core functions + +```go +// ScanDisks detects all block devices and classifies them. +func ScanDisks() (*ScanResult, error) { + // Run: lsblk -J -o NAME,SIZE,TYPE,FSTYPE,MOUNTPOINT,MODEL,SERIAL,RM,PKNAME -b + // Parse JSON output + // Classify: system disk (has / or /boot mount) vs available + // A disk is "available" if NO partition is mounted AND it's not the system disk +} + +// FormatAndMount formats a partition and mounts it. +// This is a long-running operation — use a channel for progress updates. +func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) error { + // Step 1: Validate + // - Device exists + // - Not already mounted + // - Not system disk (compare with / device) + // - Mount name valid (alphanumeric + underscore, no spaces) + // - Mount path doesn't already exist OR is empty dir + + // Step 2: Partition (if CreatePartition) + // - sfdisk: create single Linux partition filling whole disk + // - Wait for kernel to re-read partition table (partprobe) + // - Update device path to new partition (e.g., /dev/sdb1) + + // Step 3: Format + // - mkfs.ext4 -L