feat: storage watchdog — USB disconnect detection, auto-stop, safe eject, auto-reconnect (v0.17.0)
New storage watchdog monitors registered storage paths every 5s. On disconnect (3 consecutive probe failures), auto-stops affected apps, lazy-unmounts stale VFS entries, fires alerts/notifications/hub report. On reconnect (UUID detected), auto-remounts via fstab, cleans stale restic locks, offers app restart. Safe disconnect UI for USB drives: confirmation dialog, stop apps, sync, unmount. Disconnected state visible across all pages (dashboard, settings, backups, monitoring) with hatched red bars and badges. Backup guards skip disconnected drives. 22 files changed (1 new: monitor/watchdog.go), ~1500 lines added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -172,6 +172,12 @@ func checkProtectedContainers(protected []string) []string {
|
||||
|
||||
func checkStoragePaths(paths []settings.StoragePath) (issues, warnings []string) {
|
||||
for _, sp := range paths {
|
||||
// Skip disconnected paths — handled by the storage watchdog
|
||||
if sp.Disconnected {
|
||||
warnings = append(warnings, fmt.Sprintf("Meghajtó leválasztva: %s (%s)", sp.Label, sp.Path))
|
||||
continue
|
||||
}
|
||||
|
||||
// Path accessible?
|
||||
if _, err := os.Stat(sp.Path); err != nil {
|
||||
warnings = append(warnings, fmt.Sprintf("Adattároló nem elérhető: %s", sp.Path))
|
||||
|
||||
@@ -0,0 +1,612 @@
|
||||
package monitor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/notify"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
||||
)
|
||||
|
||||
const (
|
||||
// probeThreshold is the number of consecutive probe failures before declaring disconnected.
|
||||
probeThreshold = 3
|
||||
|
||||
// defaultProbeInterval is the normal probe interval for connected drives.
|
||||
defaultProbeInterval = 5 * time.Second
|
||||
|
||||
// disconnectedProbeInterval is the slower probe interval for disconnected drives
|
||||
// (checking for UUID reappearance, not I/O probing).
|
||||
disconnectedProbeInterval = 30 * time.Second
|
||||
|
||||
// hostFstabPath is where the host's fstab is mounted inside the container.
|
||||
hostFstabPath = "/host-fstab"
|
||||
|
||||
// hostDevUUIDPath is where the host's /dev/disk/by-uuid is accessible.
|
||||
hostDevUUIDPath = "/host-dev/disk/by-uuid"
|
||||
|
||||
// primaryResticSubpath is the relative path to the primary restic repo under a drive.
|
||||
primaryResticSubpath = "backups/primary/restic"
|
||||
)
|
||||
|
||||
// WatchdogStackInfo holds minimal stack info for the watchdog.
|
||||
type WatchdogStackInfo struct {
|
||||
Name string
|
||||
}
|
||||
|
||||
// WatchdogStackProvider provides stack operations needed by the watchdog.
|
||||
// Defined here to avoid circular imports with the backup package.
|
||||
type WatchdogStackProvider interface {
|
||||
ListDeployedStacks() []WatchdogStackInfo
|
||||
GetStackHDDPath(name string) string
|
||||
StopStack(name string) error
|
||||
StartStack(name string) error
|
||||
}
|
||||
|
||||
// pathProbeState tracks in-memory probe state for a single storage path.
|
||||
type pathProbeState struct {
|
||||
consecutiveFailures int
|
||||
lastStatus string // "connected", "disconnected"
|
||||
lastProbeTime time.Time
|
||||
probeInterval time.Duration
|
||||
}
|
||||
|
||||
// StorageWatchdog monitors registered storage paths and reacts to disconnection/reconnection.
|
||||
type StorageWatchdog struct {
|
||||
settings *settings.Settings
|
||||
stackProvider WatchdogStackProvider
|
||||
notifier *notify.Notifier
|
||||
cfg *config.Config
|
||||
logger *log.Logger
|
||||
|
||||
// Callbacks to break import cycles — set via SetXxx methods after construction
|
||||
alertRefresh func()
|
||||
pushHubReport func()
|
||||
unlockRepo func(ctx context.Context, repoPath string) error
|
||||
|
||||
mu sync.Mutex
|
||||
pathState map[string]*pathProbeState
|
||||
}
|
||||
|
||||
// NewStorageWatchdog creates a new storage watchdog.
|
||||
func NewStorageWatchdog(
|
||||
sett *settings.Settings,
|
||||
stackProvider WatchdogStackProvider,
|
||||
notifier *notify.Notifier,
|
||||
cfg *config.Config,
|
||||
logger *log.Logger,
|
||||
) *StorageWatchdog {
|
||||
return &StorageWatchdog{
|
||||
settings: sett,
|
||||
stackProvider: stackProvider,
|
||||
notifier: notifier,
|
||||
cfg: cfg,
|
||||
logger: logger,
|
||||
pathState: make(map[string]*pathProbeState),
|
||||
}
|
||||
}
|
||||
|
||||
// SetAlertRefresh sets the callback to trigger alert refresh.
|
||||
func (w *StorageWatchdog) SetAlertRefresh(fn func()) {
|
||||
w.alertRefresh = fn
|
||||
}
|
||||
|
||||
// SetHubReportPusher sets the callback to push an immediate hub report.
|
||||
func (w *StorageWatchdog) SetHubReportPusher(fn func()) {
|
||||
w.pushHubReport = fn
|
||||
}
|
||||
|
||||
// SetRepoUnlocker sets the callback to unlock a restic repo on reconnect.
|
||||
func (w *StorageWatchdog) SetRepoUnlocker(fn func(ctx context.Context, repoPath string) error) {
|
||||
w.unlockRepo = fn
|
||||
}
|
||||
|
||||
// Check probes all registered storage paths and reacts to state changes.
|
||||
// Called by the scheduler every 5 seconds.
|
||||
func (w *StorageWatchdog) Check(ctx context.Context) error {
|
||||
paths := w.settings.GetStoragePaths()
|
||||
if len(paths) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, sp := range paths {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
state := w.getOrCreateState(sp.Path)
|
||||
|
||||
// Rate-limit per-path probes
|
||||
if time.Since(state.lastProbeTime) < state.probeInterval {
|
||||
continue
|
||||
}
|
||||
state.lastProbeTime = time.Now()
|
||||
|
||||
if sp.Disconnected {
|
||||
w.handleReconnectCheck(ctx, sp)
|
||||
} else {
|
||||
w.handleConnectedProbe(sp, state)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getOrCreateState returns the in-memory probe state for a path, creating if needed.
|
||||
func (w *StorageWatchdog) getOrCreateState(path string) *pathProbeState {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
if s, ok := w.pathState[path]; ok {
|
||||
return s
|
||||
}
|
||||
s := &pathProbeState{
|
||||
lastStatus: "connected",
|
||||
probeInterval: defaultProbeInterval,
|
||||
}
|
||||
w.pathState[path] = s
|
||||
return s
|
||||
}
|
||||
|
||||
// handleConnectedProbe probes a connected drive and triggers disconnect if needed.
|
||||
func (w *StorageWatchdog) handleConnectedProbe(sp settings.StoragePath, state *pathProbeState) {
|
||||
result := system.ProbeStoragePath(sp.Path)
|
||||
if result.Status == system.ProbeConnected {
|
||||
if state.consecutiveFailures > 0 {
|
||||
w.logger.Printf("[DEBUG] [STORAGE] Probe recovered for %s after %d failures", sp.Path, state.consecutiveFailures)
|
||||
}
|
||||
state.consecutiveFailures = 0
|
||||
state.lastStatus = "connected"
|
||||
return
|
||||
}
|
||||
|
||||
state.consecutiveFailures++
|
||||
w.logger.Printf("[WARN] [STORAGE] Probe failed for %s (%d/%d): %v",
|
||||
sp.Path, state.consecutiveFailures, probeThreshold, result.Err)
|
||||
|
||||
if state.consecutiveFailures >= probeThreshold {
|
||||
w.handleDisconnect(sp, state, result)
|
||||
}
|
||||
}
|
||||
|
||||
// handleDisconnect reacts to a confirmed drive disconnection.
|
||||
func (w *StorageWatchdog) handleDisconnect(sp settings.StoragePath, state *pathProbeState, probe system.ProbeResult) {
|
||||
label := sp.Label
|
||||
if label == "" {
|
||||
label = sp.Path
|
||||
}
|
||||
w.logger.Printf("[ERROR] [STORAGE] Drive disconnected: %s (%s)", sp.Path, label)
|
||||
|
||||
// 1. Find and stop affected stacks
|
||||
stoppedStacks := w.stopAffectedStacks(sp.Path)
|
||||
|
||||
// 2. Mark disconnected in settings (persists to settings.json)
|
||||
if err := w.settings.SetDisconnected(sp.Path, true, stoppedStacks); err != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] Failed to mark disconnected: %v", err)
|
||||
}
|
||||
|
||||
// 3. Lazy unmount stale mount (if probe timed out — mount is likely hanging)
|
||||
if probe.Status == system.ProbeTimeout {
|
||||
w.lazyUnmount(sp.Path)
|
||||
}
|
||||
|
||||
// 4. Update in-memory state
|
||||
state.lastStatus = "disconnected"
|
||||
state.probeInterval = disconnectedProbeInterval
|
||||
state.consecutiveFailures = 0
|
||||
|
||||
// 5. Trigger alert refresh
|
||||
if w.alertRefresh != nil {
|
||||
w.alertRefresh()
|
||||
}
|
||||
|
||||
// 6. Send notification
|
||||
w.notifier.NotifyStorageDisconnected(label, stoppedStacks)
|
||||
|
||||
// 7. Push immediate hub report
|
||||
if w.pushHubReport != nil {
|
||||
go w.pushHubReport()
|
||||
}
|
||||
}
|
||||
|
||||
// handleReconnectCheck checks if a disconnected drive has been reconnected.
|
||||
func (w *StorageWatchdog) handleReconnectCheck(ctx context.Context, sp settings.StoragePath) {
|
||||
// Find the UUID for this path from fstab
|
||||
// For attach-wizard drives, the UUID is on the raw mount, not the bind mount
|
||||
mountPath := sp.Path
|
||||
rawPath, isAttachWizard := system.HasFelhomRawMount(hostFstabPath, sp.Path)
|
||||
if isAttachWizard {
|
||||
mountPath = rawPath
|
||||
}
|
||||
|
||||
uuid := system.ParseFstabUUID(hostFstabPath, mountPath)
|
||||
if uuid == "" {
|
||||
// No UUID in fstab — can't detect reconnection automatically
|
||||
return
|
||||
}
|
||||
|
||||
// Check if the UUID block device is present
|
||||
uuidPath := filepath.Join(hostDevUUIDPath, uuid)
|
||||
if _, err := os.Stat(uuidPath); err != nil {
|
||||
return // Drive not reconnected yet
|
||||
}
|
||||
|
||||
label := sp.Label
|
||||
if label == "" {
|
||||
label = sp.Path
|
||||
}
|
||||
w.logger.Printf("[INFO] [STORAGE] Drive reconnected (UUID found), attempting remount: %s (%s)", sp.Path, label)
|
||||
|
||||
// Attempt remount
|
||||
if err := w.remount(sp.Path, rawPath, isAttachWizard); err != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] Remount failed for %s: %v", sp.Path, err)
|
||||
return // Try again next cycle
|
||||
}
|
||||
|
||||
// Verify with a probe
|
||||
verifyResult := system.ProbeStoragePath(sp.Path)
|
||||
if verifyResult.Status != system.ProbeConnected {
|
||||
w.logger.Printf("[ERROR] [STORAGE] Post-remount probe failed for %s: %v", sp.Path, verifyResult.Err)
|
||||
return
|
||||
}
|
||||
|
||||
w.logger.Printf("[INFO] [STORAGE] Drive successfully remounted: %s (%s)", sp.Path, label)
|
||||
|
||||
// Clean stale restic locks
|
||||
w.cleanResticLocks(ctx, sp.Path)
|
||||
|
||||
// Validate stopped stacks — filter to only actually stopped ones
|
||||
filteredStacks := w.filterStoppedStacks(sp.StoppedStacks)
|
||||
|
||||
// Clear disconnected but preserve StoppedStacks for the restart UI
|
||||
if err := w.settings.SetDisconnected(sp.Path, false, filteredStacks); err != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] Failed to clear disconnected: %v", err)
|
||||
}
|
||||
|
||||
// Update in-memory state
|
||||
state := w.getOrCreateState(sp.Path)
|
||||
state.lastStatus = "connected"
|
||||
state.probeInterval = defaultProbeInterval
|
||||
state.consecutiveFailures = 0
|
||||
|
||||
// Trigger alert refresh
|
||||
if w.alertRefresh != nil {
|
||||
w.alertRefresh()
|
||||
}
|
||||
|
||||
// Send notification
|
||||
w.notifier.NotifyStorageReconnected(label)
|
||||
|
||||
// Push immediate hub report
|
||||
if w.pushHubReport != nil {
|
||||
go w.pushHubReport()
|
||||
}
|
||||
}
|
||||
|
||||
// stopAffectedStacks stops all deployed stacks whose HDD_PATH matches the disconnected drive.
|
||||
func (w *StorageWatchdog) stopAffectedStacks(drivePath string) []string {
|
||||
if w.stackProvider == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var stopped []string
|
||||
cleanDrive := filepath.Clean(drivePath)
|
||||
|
||||
for _, stack := range w.stackProvider.ListDeployedStacks() {
|
||||
hddPath := w.stackProvider.GetStackHDDPath(stack.Name)
|
||||
if hddPath == "" {
|
||||
continue
|
||||
}
|
||||
cleanHDD := filepath.Clean(hddPath)
|
||||
if cleanHDD != cleanDrive && !strings.HasPrefix(cleanHDD, cleanDrive+"/") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Don't stop protected stacks
|
||||
if w.cfg.IsProtectedStack(stack.Name) {
|
||||
w.logger.Printf("[WARN] [STORAGE] Skipping protected stack: %s", stack.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
w.logger.Printf("[INFO] [STORAGE] Stopping stack %s (drive disconnected: %s)", stack.Name, drivePath)
|
||||
if err := w.stackProvider.StopStack(stack.Name); err != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] Failed to stop stack %s: %v", stack.Name, err)
|
||||
continue // Don't add to stopped list if stop failed
|
||||
}
|
||||
stopped = append(stopped, stack.Name)
|
||||
}
|
||||
|
||||
if len(stopped) > 0 {
|
||||
w.logger.Printf("[INFO] [STORAGE] Stopped %d stack(s) due to drive disconnect: %v", len(stopped), stopped)
|
||||
}
|
||||
return stopped
|
||||
}
|
||||
|
||||
// lazyUnmount performs a lazy unmount of a path and its raw mount (if attach-wizard).
|
||||
func (w *StorageWatchdog) lazyUnmount(path string) {
|
||||
// For attach-wizard, unmount bind first, then raw
|
||||
rawPath, isAttachWizard := system.HasFelhomRawMount(hostFstabPath, path)
|
||||
|
||||
// Unmount the bind/main path
|
||||
cmd := exec.Command("umount", "-l", path)
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
w.logger.Printf("[WARN] [STORAGE] umount -l %s: %v (%s)", path, err, strings.TrimSpace(string(out)))
|
||||
} else {
|
||||
w.logger.Printf("[INFO] [STORAGE] Lazy unmounted: %s", path)
|
||||
}
|
||||
|
||||
// Then unmount the raw path if it's an attach-wizard drive
|
||||
if isAttachWizard && rawPath != "" {
|
||||
cmd = exec.Command("umount", "-l", rawPath)
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
w.logger.Printf("[WARN] [STORAGE] umount -l %s: %v (%s)", rawPath, err, strings.TrimSpace(string(out)))
|
||||
} else {
|
||||
w.logger.Printf("[INFO] [STORAGE] Lazy unmounted raw: %s", rawPath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// remount attempts to remount a storage path using fstab entries.
|
||||
func (w *StorageWatchdog) remount(path, rawPath string, isAttachWizard bool) error {
|
||||
// Clean any stale mount entries first
|
||||
exec.Command("umount", "-l", path).Run()
|
||||
if isAttachWizard && rawPath != "" {
|
||||
exec.Command("umount", "-l", rawPath).Run()
|
||||
}
|
||||
|
||||
if isAttachWizard && rawPath != "" {
|
||||
// Mount raw first, then bind
|
||||
cmd := exec.Command("mount", "-T", hostFstabPath, rawPath)
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("mount raw %s: %v (%s)", rawPath, err, strings.TrimSpace(string(out)))
|
||||
}
|
||||
w.logger.Printf("[INFO] [STORAGE] Mounted raw: %s", rawPath)
|
||||
|
||||
cmd = exec.Command("mount", "-T", hostFstabPath, path)
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("mount bind %s: %v (%s)", path, err, strings.TrimSpace(string(out)))
|
||||
}
|
||||
w.logger.Printf("[INFO] [STORAGE] Mounted bind: %s", path)
|
||||
} else {
|
||||
cmd := exec.Command("mount", "-T", hostFstabPath, path)
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("mount %s: %v (%s)", path, err, strings.TrimSpace(string(out)))
|
||||
}
|
||||
w.logger.Printf("[INFO] [STORAGE] Mounted: %s", path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanResticLocks runs restic unlock on the primary repo for a drive path.
|
||||
func (w *StorageWatchdog) cleanResticLocks(ctx context.Context, drivePath string) {
|
||||
repoPath := filepath.Join(drivePath, primaryResticSubpath)
|
||||
locksDir := filepath.Join(repoPath, "locks")
|
||||
entries, err := os.ReadDir(locksDir)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return // No locks dir or no lock files
|
||||
}
|
||||
|
||||
w.logger.Printf("[INFO] [STORAGE] Found %d restic lock file(s) in %s, running unlock", len(entries), repoPath)
|
||||
|
||||
if w.unlockRepo != nil {
|
||||
if err := w.unlockRepo(ctx, repoPath); err != nil {
|
||||
w.logger.Printf("[WARN] [STORAGE] Restic unlock failed for %s: %v", repoPath, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// filterStoppedStacks validates that stacks in the list still exist as deployed stacks.
|
||||
func (w *StorageWatchdog) filterStoppedStacks(stackNames []string) []string {
|
||||
if w.stackProvider == nil || len(stackNames) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
deployed := make(map[string]bool)
|
||||
for _, s := range w.stackProvider.ListDeployedStacks() {
|
||||
deployed[s.Name] = true
|
||||
}
|
||||
|
||||
var result []string
|
||||
for _, name := range stackNames {
|
||||
if deployed[name] {
|
||||
result = append(result, name)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// SafeDisconnect performs a safe disconnect of a storage path.
|
||||
// Stops affected apps, syncs filesystem, and unmounts the drive.
|
||||
func (w *StorageWatchdog) SafeDisconnect(ctx context.Context, path string) (stoppedStacks []string, err error) {
|
||||
sp := w.findStoragePath(path)
|
||||
if sp == nil {
|
||||
return nil, fmt.Errorf("storage path %q not found", path)
|
||||
}
|
||||
if sp.Disconnected {
|
||||
return nil, fmt.Errorf("drive already disconnected")
|
||||
}
|
||||
|
||||
label := sp.Label
|
||||
if label == "" {
|
||||
label = sp.Path
|
||||
}
|
||||
w.logger.Printf("[INFO] [STORAGE] Safe disconnect requested: %s (%s)", path, label)
|
||||
|
||||
// 1. Stop affected stacks
|
||||
stoppedStacks = w.stopAffectedStacks(path)
|
||||
|
||||
// 2. Sync filesystem
|
||||
exec.Command("sync").Run()
|
||||
|
||||
// 3. Unmount
|
||||
rawPath, isAttachWizard := system.HasFelhomRawMount(hostFstabPath, path)
|
||||
|
||||
// Unmount bind/main
|
||||
cmd := exec.Command("umount", path)
|
||||
if out, umountErr := cmd.CombinedOutput(); umountErr != nil {
|
||||
// Try lazy unmount as fallback
|
||||
w.logger.Printf("[WARN] [STORAGE] umount %s failed, trying lazy: %v", path, umountErr)
|
||||
cmd = exec.Command("umount", "-l", path)
|
||||
if out, umountErr = cmd.CombinedOutput(); umountErr != nil {
|
||||
return stoppedStacks, fmt.Errorf("umount %s failed: %v (%s)", path, umountErr, strings.TrimSpace(string(out)))
|
||||
}
|
||||
}
|
||||
|
||||
// Unmount raw if attach-wizard
|
||||
if isAttachWizard && rawPath != "" {
|
||||
cmd = exec.Command("umount", rawPath)
|
||||
if out, umountErr := cmd.CombinedOutput(); umountErr != nil {
|
||||
cmd = exec.Command("umount", "-l", rawPath)
|
||||
if out, umountErr = cmd.CombinedOutput(); umountErr != nil {
|
||||
w.logger.Printf("[WARN] [STORAGE] umount raw %s failed: %v (%s)", rawPath, umountErr, strings.TrimSpace(string(out)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Mark disconnected
|
||||
if setErr := w.settings.SetDisconnected(path, true, stoppedStacks); setErr != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] Failed to mark disconnected: %v", setErr)
|
||||
}
|
||||
|
||||
// 5. Update in-memory state
|
||||
state := w.getOrCreateState(path)
|
||||
state.lastStatus = "disconnected"
|
||||
state.probeInterval = disconnectedProbeInterval
|
||||
state.consecutiveFailures = 0
|
||||
|
||||
// 6. Trigger alert refresh
|
||||
if w.alertRefresh != nil {
|
||||
w.alertRefresh()
|
||||
}
|
||||
|
||||
// 7. Notify and push hub report
|
||||
w.notifier.Notify("storage_safe_disconnect", "info",
|
||||
fmt.Sprintf("Meghajtó biztonságosan leválasztva: %s", label), "")
|
||||
if w.pushHubReport != nil {
|
||||
go w.pushHubReport()
|
||||
}
|
||||
|
||||
w.logger.Printf("[INFO] [STORAGE] Safe disconnect completed: %s — drive can be removed", path)
|
||||
return stoppedStacks, nil
|
||||
}
|
||||
|
||||
// Reconnect attempts to remount a disconnected storage path.
|
||||
func (w *StorageWatchdog) Reconnect(ctx context.Context, path string) (stoppedStacks []string, err error) {
|
||||
sp := w.findStoragePath(path)
|
||||
if sp == nil {
|
||||
return nil, fmt.Errorf("storage path %q not found", path)
|
||||
}
|
||||
if !sp.Disconnected {
|
||||
return nil, fmt.Errorf("drive is not disconnected")
|
||||
}
|
||||
|
||||
label := sp.Label
|
||||
if label == "" {
|
||||
label = sp.Path
|
||||
}
|
||||
|
||||
// Check UUID availability
|
||||
mountPath := sp.Path
|
||||
rawPath, isAttachWizard := system.HasFelhomRawMount(hostFstabPath, sp.Path)
|
||||
if isAttachWizard {
|
||||
mountPath = rawPath
|
||||
}
|
||||
uuid := system.ParseFstabUUID(hostFstabPath, mountPath)
|
||||
if uuid != "" {
|
||||
uuidPath := filepath.Join(hostDevUUIDPath, uuid)
|
||||
if _, statErr := os.Stat(uuidPath); statErr != nil {
|
||||
return nil, fmt.Errorf("drive not detected (UUID %s not found) — ensure the drive is physically connected", uuid)
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt remount
|
||||
if mountErr := w.remount(path, rawPath, isAttachWizard); mountErr != nil {
|
||||
return nil, fmt.Errorf("mount failed: %w", mountErr)
|
||||
}
|
||||
|
||||
// Verify
|
||||
verifyResult := system.ProbeStoragePath(path)
|
||||
if verifyResult.Status != system.ProbeConnected {
|
||||
return nil, fmt.Errorf("mount appeared to succeed but probe failed: %v", verifyResult.Err)
|
||||
}
|
||||
|
||||
// Clean restic locks
|
||||
w.cleanResticLocks(ctx, path)
|
||||
|
||||
// Validate stopped stacks
|
||||
filteredStacks := w.filterStoppedStacks(sp.StoppedStacks)
|
||||
|
||||
// Clear disconnected, preserve stopped stacks for restart UI
|
||||
if setErr := w.settings.SetDisconnected(path, false, filteredStacks); setErr != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] Failed to clear disconnected: %v", setErr)
|
||||
}
|
||||
|
||||
// Update in-memory state
|
||||
state := w.getOrCreateState(path)
|
||||
state.lastStatus = "connected"
|
||||
state.probeInterval = defaultProbeInterval
|
||||
state.consecutiveFailures = 0
|
||||
|
||||
// Trigger alert refresh
|
||||
if w.alertRefresh != nil {
|
||||
w.alertRefresh()
|
||||
}
|
||||
|
||||
// Notify
|
||||
w.notifier.NotifyStorageReconnected(label)
|
||||
if w.pushHubReport != nil {
|
||||
go w.pushHubReport()
|
||||
}
|
||||
|
||||
w.logger.Printf("[INFO] [STORAGE] Reconnect completed: %s", path)
|
||||
return filteredStacks, nil
|
||||
}
|
||||
|
||||
// RestartStoppedApps restarts apps that were auto-stopped due to a drive disconnect.
|
||||
func (w *StorageWatchdog) RestartStoppedApps(path string) (started, failed []string) {
|
||||
sp := w.findStoragePath(path)
|
||||
if sp == nil || sp.Disconnected {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
stacks := w.settings.GetStoppedStacks(path)
|
||||
if len(stacks) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
for _, name := range stacks {
|
||||
w.logger.Printf("[INFO] [STORAGE] Starting stack %s (drive reconnected: %s)", name, path)
|
||||
if err := w.stackProvider.StartStack(name); err != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] Failed to start stack %s: %v", name, err)
|
||||
failed = append(failed, name)
|
||||
} else {
|
||||
started = append(started, name)
|
||||
}
|
||||
}
|
||||
|
||||
// Clear stopped stacks list
|
||||
if err := w.settings.ClearStoppedStacks(path); err != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] Failed to clear stopped stacks: %v", err)
|
||||
}
|
||||
|
||||
return started, failed
|
||||
}
|
||||
|
||||
// findStoragePath returns the storage path entry for a given path, or nil.
|
||||
func (w *StorageWatchdog) findStoragePath(path string) *settings.StoragePath {
|
||||
for _, sp := range w.settings.GetStoragePaths() {
|
||||
if sp.Path == path {
|
||||
return &sp
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user