v0.25.0 — Debug page: operator testing & diagnostics dashboard

Debug-mode-only dashboard (/debug) with 8 collapsible sections:
system diagnostics, notification testing, backup triggers, storage
simulation, hub & connectivity, self-update dry-run, DR/setup wizard,
and in-memory log viewer. Migrates debug dump from API router to web
server. Adds ring buffer log capture, storage disconnect simulation,
event history tracking, and cross-drive/self-update test methods.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 20:18:57 +01:00
parent be7803c0ac
commit 7f48786312
16 changed files with 2283 additions and 233 deletions
+206 -6
View File
@@ -80,6 +80,10 @@ type StorageWatchdog struct {
mu sync.Mutex
pathState map[string]*pathProbeState
// Debug simulation state
simulatedMu sync.RWMutex
simulatedPaths map[string]bool
}
// NewStorageWatchdog creates a new storage watchdog.
@@ -91,12 +95,13 @@ func NewStorageWatchdog(
logger *log.Logger,
) *StorageWatchdog {
return &StorageWatchdog{
settings: sett,
stackProvider: stackProvider,
notifier: notifier,
cfg: cfg,
logger: logger,
pathState: make(map[string]*pathProbeState),
settings: sett,
stackProvider: stackProvider,
notifier: notifier,
cfg: cfg,
logger: logger,
pathState: make(map[string]*pathProbeState),
simulatedPaths: make(map[string]bool),
}
}
@@ -146,6 +151,11 @@ func (w *StorageWatchdog) Check(ctx context.Context) error {
continue
}
// Skip simulated-disconnected paths (handled by debug UI)
if w.isSimulated(sp.Path) {
continue
}
if sp.Disconnected {
w.handleReconnectCheck(ctx, sp)
} else {
@@ -663,6 +673,196 @@ func (w *StorageWatchdog) RestartStoppedApps(path string) (started, failed []str
return started, failed
}
// ── Debug simulation methods ─────────────────────────────────────────
// isSimulated returns true if the path is in simulated-disconnect state.
func (w *StorageWatchdog) isSimulated(path string) bool {
w.simulatedMu.RLock()
defer w.simulatedMu.RUnlock()
return w.simulatedPaths[path]
}
// SimulateDisconnect simulates a drive disconnection without actually unmounting.
// Runs disconnect steps 1,2,4,5,6,7 (skips step 3: lazyUnmount).
// Returns the list of stopped stacks.
func (w *StorageWatchdog) SimulateDisconnect(ctx context.Context, path string) ([]string, error) {
sp := w.findStoragePath(path)
if sp == nil {
return nil, fmt.Errorf("storage path %q not found", path)
}
if sp.Disconnected {
return nil, fmt.Errorf("drive already disconnected")
}
if sp.Decommissioned {
return nil, fmt.Errorf("drive is decommissioned")
}
label := sp.Label
if label == "" {
label = sp.Path
}
w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Simulating disconnect: %s (%s)", path, label)
// Mark as simulated so the watchdog skips probing this path
w.simulatedMu.Lock()
w.simulatedPaths[path] = true
w.simulatedMu.Unlock()
// Step 1: Stop affected stacks
stoppedStacks := w.stopAffectedStacks(path)
// Step 2: Mark disconnected in settings
if err := w.settings.SetDisconnected(path, true, stoppedStacks); err != nil {
w.logger.Printf("[ERROR] [STORAGE] [DEBUG-SIM] Failed to mark disconnected: %v", err)
}
// Step 3: SKIPPED (no lazyUnmount — drive stays physically mounted)
// Step 4: Update in-memory state
state := w.getOrCreateState(path)
state.lastStatus = "disconnected"
state.probeInterval = disconnectedProbeInterval
state.consecutiveFailures = 0
// Step 5: Trigger alert refresh
if w.alertRefresh != nil {
w.alertRefresh()
}
// Step 6: Send notification
w.notifier.NotifyStorageDisconnected(label, stoppedStacks)
// Step 7: Push hub report
if w.pushHubReport != nil {
go w.pushHubReport()
}
w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Disconnect simulated: %s — %d stack(s) stopped", path, len(stoppedStacks))
return stoppedStacks, nil
}
// SimulateReconnect undoes a simulated disconnection.
func (w *StorageWatchdog) SimulateReconnect(ctx context.Context, path string) error {
if !w.isSimulated(path) {
return fmt.Errorf("path %q is not in simulated-disconnect state", path)
}
sp := w.findStoragePath(path)
if sp == nil {
return fmt.Errorf("storage path %q not found", path)
}
label := sp.Label
if label == "" {
label = sp.Path
}
w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Simulating reconnect: %s (%s)", path, label)
// Remove from simulated set
w.simulatedMu.Lock()
delete(w.simulatedPaths, path)
w.simulatedMu.Unlock()
// Verify drive is actually still mounted (it should be since we never unmounted)
verifyResult := system.ProbeStoragePath(path)
if verifyResult.Status != system.ProbeConnected {
return fmt.Errorf("drive probe failed after simulation clear: %v", verifyResult.Err)
}
// Clean restic locks
w.cleanResticLocks(ctx, path)
// Validate stopped stacks
filteredStacks := w.filterStoppedStacks(sp.StoppedStacks)
// Clear disconnected, preserve stopped stacks for restart UI
if err := w.settings.SetDisconnected(path, false, filteredStacks); err != nil {
w.logger.Printf("[ERROR] [STORAGE] [DEBUG-SIM] Failed to clear disconnected: %v", err)
}
// Update in-memory state
state := w.getOrCreateState(path)
state.lastStatus = "connected"
state.probeInterval = defaultProbeInterval
state.consecutiveFailures = 0
// Trigger alert refresh
if w.alertRefresh != nil {
w.alertRefresh()
}
// Send notification
w.notifier.NotifyStorageReconnected(label)
if w.pushHubReport != nil {
go w.pushHubReport()
}
w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Reconnect simulated: %s", path)
return nil
}
// PathDebugStatus holds per-path probe state for the debug page.
type PathDebugStatus struct {
Path string `json:"path"`
Label string `json:"label"`
Status string `json:"status"`
Simulated bool `json:"simulated"`
ProbeOK bool `json:"probe_ok"`
DebounceCount int `json:"debounce_count"`
DebounceMax int `json:"debounce_max"`
LastProbe time.Time `json:"last_probe"`
AvgLatencyMs float64 `json:"avg_latency_ms"`
ProbeCount int `json:"probe_count"`
ProbeOKCount int `json:"probe_ok_count"`
}
// GetDebugStatus returns per-path probe state for the debug page.
func (w *StorageWatchdog) GetDebugStatus() []PathDebugStatus {
paths := w.settings.GetStoragePaths()
result := make([]PathDebugStatus, 0, len(paths))
w.mu.Lock()
defer w.mu.Unlock()
for _, sp := range paths {
if sp.Decommissioned {
continue
}
ds := PathDebugStatus{
Path: sp.Path,
Label: sp.Label,
DebounceMax: probeThreshold,
}
if sp.Disconnected {
ds.Status = "disconnected"
} else {
ds.Status = "connected"
}
ds.Simulated = w.isSimulatedLocked(sp.Path)
if state, ok := w.pathState[sp.Path]; ok {
ds.DebounceCount = state.consecutiveFailures
ds.LastProbe = state.lastProbeTime
ds.ProbeOK = state.lastStatus == "connected"
ds.ProbeCount = state.probeCount
ds.ProbeOKCount = state.probeOKCount
if state.probeCount > 0 {
ds.AvgLatencyMs = float64(state.totalLatency.Milliseconds()) / float64(state.probeCount)
}
}
result = append(result, ds)
}
return result
}
// isSimulatedLocked checks simulation state without acquiring simulatedMu
// (caller must hold w.mu or be ok with a racy read for debug display).
func (w *StorageWatchdog) isSimulatedLocked(path string) bool {
w.simulatedMu.RLock()
defer w.simulatedMu.RUnlock()
return w.simulatedPaths[path]
}
// findStoragePath returns the storage path entry for a given path, or nil.
func (w *StorageWatchdog) findStoragePath(path string) *settings.StoragePath {
for _, sp := range w.settings.GetStoragePaths() {