From 0b23834e3ce7c54ef88f24445dd91778264825e8 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Tue, 17 Feb 2026 10:41:05 +0100 Subject: [PATCH] =?UTF-8?q?BUGFIX:=20Storage=20Scan=20=E2=80=94=20System?= =?UTF-8?q?=20Disk=20Detection=20&=20FSType=20in=20Container?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TASK.md | 879 ++++++++++++++------------------------------------------ 1 file changed, 213 insertions(+), 666 deletions(-) diff --git a/TASK.md b/TASK.md index df9e7e6..df0d702 100644 --- a/TASK.md +++ b/TASK.md @@ -1,705 +1,252 @@ -# TASK: Phase C — Storage Initialization, Data Migration & Startup Fixes +# BUGFIX: Storage Scan — System Disk Detection & FSType in Container -**Version target:** controller 0.11.0 -**Repo:** `deploy-felhom-compose` (controller) +**Affects:** v0.11.0, `internal/storage/scan_linux.go` +**Root cause:** Controller runs in a Docker container. Even with `--privileged`, `lsblk` reports mount points from the container's mount namespace (not host), and often can't probe filesystem types due to missing udev/blkid cache. -## Overview +## Bug 1: System disk (sda) shows as available -Three features in this phase: +### Current broken logic +```go +if part.MountPoint == "/" || part.MountPoint == "/boot" || part.MountPoint == "/boot/efi" { + isSystem = true +} +``` +Inside the container, sda2 (host's `/`) shows mounted at `/opt/docker/felhom-controller/data` (bind mount), not `/`. So `isSystem` stays false → sda appears in AvailableDisks. -1. **Startup ping + hub report** — Controller should announce itself immediately on start, not wait 5–15 minutes for the first scheduler tick -2. **Storage initialization** — Detect unformatted/unmounted disks, format (ext4), mount, and register as storage path — all from the web UI -3. **Data migration** — Per-app "Mozgatás" button to move app data between storage paths with rsync + progress +### Fix: Parse host's fstab + blkid to detect system disk ---- - -## 0. Startup Ping & Hub Report (Quick Fix) - -### 0.1 Problem - -After controller starts (e.g., after `docker restart felhom-controller` or system reboot), the first heartbeat fires after 5 minutes, first system_health after 5 minutes, and first hub report after 15 minutes. During this gap, Healthchecks shows stale "Last Ping: X minutes ago" and hub has no fresh data. - -### 0.2 Fix: Fire initial pings + report immediately after scheduler starts - -Add to `main.go` after `sched.Start(ctx)`, inside a goroutine (non-blocking): +The host's fstab is mounted at `/host-fstab` inside the container. Parse it to find which devices/UUIDs are used for `/`, `/boot`, `/boot/efi`, and `swap`. Then resolve UUIDs to device paths via `blkid`, and mark their parent disks as system disks. ```go -// Fire startup pings + hub report immediately (don't wait for first scheduler tick) -go func() { - time.Sleep(5 * time.Second) // Let all subsystems fully initialize - - // Heartbeat ping - pinger.Ping(cfg.Monitoring.PingUUIDs.Heartbeat, "startup") - logger.Println("[INFO] Startup heartbeat ping sent") - - // System health ping - healthReport := monitor.RunHealthCheck(cfg, cpuCollector, sett.GetStoragePaths()) - body := healthReport.FormatMessage() - healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth - if healthReport.Status == "fail" { - pinger.Fail(healthUUID, body) - } else { - pinger.Ping(healthUUID, body) +// getSystemDiskNames returns the set of parent disk names (e.g., "sda") +// that contain system partitions (/, /boot, /boot/efi, swap). +func getSystemDiskNames() map[string]bool { + systemDisks := map[string]bool{} + + // Step 1: Parse /host-fstab for system mount points + fstabPath := "/host-fstab" + if _, err := os.Stat(fstabPath); err != nil { + // Fallback: try /etc/fstab (if not containerized or different mount) + fstabPath = "/etc/fstab" } - logger.Printf("[INFO] Startup health ping sent (status: %s)", healthReport.Status) - - // Hub report - if cfg.Hub.Enabled && cfg.Hub.URL != "" { - pusher := report.NewPusher(&cfg.Hub, logger) - r := report.BuildReport(cfg, stackMgr, backupMgr, cpuCollector, metricsStore, Version, sett.GetStoragePaths()) - if err := pusher.Push(r); err != nil { - logger.Printf("[WARN] Startup hub report failed: %v", err) - } else { - logger.Println("[INFO] Startup hub report sent") + + data, err := os.ReadFile(fstabPath) + if err != nil { + return systemDisks // Can't read fstab, return empty (safe default: nothing excluded) + } + + // System mount points we care about + systemMounts := map[string]bool{"/": true, "/boot": true, "/boot/efi": true} + + var systemUUIDs []string + var systemDevices []string + + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + fields := strings.Fields(line) + if len(fields) < 3 { + continue + } + source := fields[0] + mountPoint := fields[1] + fsType := fields[2] + + isSystemEntry := systemMounts[mountPoint] || fsType == "swap" + if !isSystemEntry { + continue + } + + if strings.HasPrefix(source, "UUID=") { + systemUUIDs = append(systemUUIDs, strings.TrimPrefix(source, "UUID=")) + } else if strings.HasPrefix(source, "/dev/") { + systemDevices = append(systemDevices, source) } } -}() -``` - -**Note:** The existing `go func()` that runs `alertMgr.Refresh()` at startup already does a health check but doesn't ping Healthchecks or hub. Merge this logic or add alongside it. The 5-second delay gives Docker, metrics, and backup subsystems time to initialize. - -### 0.3 Reuse existing pusher instance - -The hub-report scheduler task already creates a `pusher` — consider creating it once at init and reusing: - -```go -var pusher *report.Pusher -if cfg.Hub.Enabled && cfg.Hub.URL != "" { - pusher = report.NewPusher(&cfg.Hub, logger) - sched.Every("hub-report", pushInterval, func(ctx context.Context) error { - r := report.BuildReport(...) - return pusher.Push(r) - }) + + // Step 2: Resolve UUIDs to device paths via blkid + for _, uuid := range systemUUIDs { + out, err := exec.Command("blkid", "-U", uuid).Output() + if err == nil { + devPath := strings.TrimSpace(string(out)) // e.g., "/dev/sda2" + systemDevices = append(systemDevices, devPath) + } + } + + // Step 3: Extract parent disk names from device paths + for _, devPath := range systemDevices { + diskName := partitionToParentDisk(devPath) + if diskName != "" { + systemDisks[diskName] = true + } + } + + return systemDisks } -// ... then in startup goroutine: -if pusher != nil { - // use same pusher + +// partitionToParentDisk extracts parent disk name from a partition device path. +// "/dev/sda2" → "sda", "/dev/nvme0n1p2" → "nvme0n1" +func partitionToParentDisk(devPath string) string { + name := filepath.Base(devPath) // "sda2" + + // NVMe: nvme0n1p2 → nvme0n1 + if strings.Contains(name, "nvme") { + if idx := strings.LastIndex(name, "p"); idx > 0 { + candidate := name[:idx] + // Verify it's actually a partition number after 'p' + if _, err := strconv.Atoi(name[idx+1:]); err == nil { + return candidate + } + } + return name + } + + // Standard: sda2 → sda, sdb1 → sdb + return strings.TrimRight(name, "0123456789") } ``` ---- - -## 1. Storage Initialization Feature - -### 1.1 Concept - -The controller UI provides a guided wizard to: -1. **Scan** for block devices that are not mounted (like `sdb` in the demo system) -2. **Show** disk info: size, model, existing partitions/filesystems -3. **Format** with ext4 after explicit confirmation with safety warnings -4. **Mount** at a user-specified path under `/mnt/` -5. **Register** automatically as a storage path in settings.json - -This replaces the need to SSH in and run `hdd-setup.sh` manually. The existing `scripts/hdd-setup.sh` in the repo provides the proven logic — the controller wraps this as a web-based flow. - -### 1.2 Architecture: Go package, not bash wrapper - -Implement in Go directly (not shelling out to `hdd-setup.sh`) for: -- Structured error handling and progress feedback -- Testability -- No dependency on bash script being present in the container - -Use `exec.Command` for: `lsblk`, `blkid`, `mkfs.ext4`, `mount`, and fstab editing. - -### 1.3 New package: `internal/storage/` +Then in `ScanDisks()`, replace the mount-point-based detection: ```go -package storage - -// BlockDevice represents a detected physical disk. -type BlockDevice struct { - Name string // "sdb" - Path string // "/dev/sdb" - Size string // "931.5G" - SizeBytes int64 - Model string // "WD Elements 25A2" - Serial string // "WX..." (if available) - Type string // "disk" - Removable bool // true for USB - Partitions []Partition - Mounted bool // any partition mounted -} - -// Partition represents a partition on a block device. -type Partition struct { - Name string // "sdb1" - Path string // "/dev/sdb1" - Size string // "931.5G" - FSType string // "ext4", "" (no filesystem) - Label string // filesystem label - UUID string - MountPoint string // "" if not mounted -} - -// ScanResult from disk detection. -type ScanResult struct { - AvailableDisks []BlockDevice // Unmounted disks/partitions - SystemDisks []BlockDevice // Mounted/system disks (for display only, not selectable) -} - -// FormatRequest parameters for formatting a disk. -type FormatRequest struct { - DevicePath string // "/dev/sdb" (whole disk) or "/dev/sdb1" (partition) - MountName string // "hdd_1" → will mount at /mnt/hdd_1 - Label string // Display label for storage path registry - CreatePartition bool // If true, wipe and create single partition first -} - -// FormatProgress tracks the formatting/mounting progress. -type FormatProgress struct { - Step string // "partitioning", "formatting", "mounting", "fstab", "registering", "done", "error" - Message string // Human-readable status - Error string // Error message if Step == "error" - Percent int // 0-100 -} -``` - -### 1.4 Core functions - -```go -// ScanDisks detects all block devices and classifies them. func ScanDisks() (*ScanResult, error) { - // Run: lsblk -J -o NAME,SIZE,TYPE,FSTYPE,MOUNTPOINT,MODEL,SERIAL,RM,PKNAME -b - // Parse JSON output - // Classify: system disk (has / or /boot mount) vs available - // A disk is "available" if NO partition is mounted AND it's not the system disk -} - -// FormatAndMount formats a partition and mounts it. -// This is a long-running operation — use a channel for progress updates. -func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) error { - // Step 1: Validate - // - Device exists - // - Not already mounted - // - Not system disk (compare with / device) - // - Mount name valid (alphanumeric + underscore, no spaces) - // - Mount path doesn't already exist OR is empty dir - - // Step 2: Partition (if CreatePartition) - // - sfdisk: create single Linux partition filling whole disk - // - Wait for kernel to re-read partition table (partprobe) - // - Update device path to new partition (e.g., /dev/sdb1) - - // Step 3: Format - // - mkfs.ext4 -L