v0.25.0 — Debug page: operator testing & diagnostics dashboard
Debug-mode-only dashboard (/debug) with 8 collapsible sections: system diagnostics, notification testing, backup triggers, storage simulation, hub & connectivity, self-update dry-run, DR/setup wizard, and in-memory log viewer. Migrates debug dump from API router to web server. Adds ring buffer log capture, storage disconnect simulation, event history tracking, and cross-drive/self-update test methods. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -80,6 +80,10 @@ type StorageWatchdog struct {
|
||||
|
||||
mu sync.Mutex
|
||||
pathState map[string]*pathProbeState
|
||||
|
||||
// Debug simulation state
|
||||
simulatedMu sync.RWMutex
|
||||
simulatedPaths map[string]bool
|
||||
}
|
||||
|
||||
// NewStorageWatchdog creates a new storage watchdog.
|
||||
@@ -91,12 +95,13 @@ func NewStorageWatchdog(
|
||||
logger *log.Logger,
|
||||
) *StorageWatchdog {
|
||||
return &StorageWatchdog{
|
||||
settings: sett,
|
||||
stackProvider: stackProvider,
|
||||
notifier: notifier,
|
||||
cfg: cfg,
|
||||
logger: logger,
|
||||
pathState: make(map[string]*pathProbeState),
|
||||
settings: sett,
|
||||
stackProvider: stackProvider,
|
||||
notifier: notifier,
|
||||
cfg: cfg,
|
||||
logger: logger,
|
||||
pathState: make(map[string]*pathProbeState),
|
||||
simulatedPaths: make(map[string]bool),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,6 +151,11 @@ func (w *StorageWatchdog) Check(ctx context.Context) error {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip simulated-disconnected paths (handled by debug UI)
|
||||
if w.isSimulated(sp.Path) {
|
||||
continue
|
||||
}
|
||||
|
||||
if sp.Disconnected {
|
||||
w.handleReconnectCheck(ctx, sp)
|
||||
} else {
|
||||
@@ -663,6 +673,196 @@ func (w *StorageWatchdog) RestartStoppedApps(path string) (started, failed []str
|
||||
return started, failed
|
||||
}
|
||||
|
||||
// ── Debug simulation methods ─────────────────────────────────────────
|
||||
|
||||
// isSimulated returns true if the path is in simulated-disconnect state.
|
||||
func (w *StorageWatchdog) isSimulated(path string) bool {
|
||||
w.simulatedMu.RLock()
|
||||
defer w.simulatedMu.RUnlock()
|
||||
return w.simulatedPaths[path]
|
||||
}
|
||||
|
||||
// SimulateDisconnect simulates a drive disconnection without actually unmounting.
|
||||
// Runs disconnect steps 1,2,4,5,6,7 (skips step 3: lazyUnmount).
|
||||
// Returns the list of stopped stacks.
|
||||
func (w *StorageWatchdog) SimulateDisconnect(ctx context.Context, path string) ([]string, error) {
|
||||
sp := w.findStoragePath(path)
|
||||
if sp == nil {
|
||||
return nil, fmt.Errorf("storage path %q not found", path)
|
||||
}
|
||||
if sp.Disconnected {
|
||||
return nil, fmt.Errorf("drive already disconnected")
|
||||
}
|
||||
if sp.Decommissioned {
|
||||
return nil, fmt.Errorf("drive is decommissioned")
|
||||
}
|
||||
|
||||
label := sp.Label
|
||||
if label == "" {
|
||||
label = sp.Path
|
||||
}
|
||||
w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Simulating disconnect: %s (%s)", path, label)
|
||||
|
||||
// Mark as simulated so the watchdog skips probing this path
|
||||
w.simulatedMu.Lock()
|
||||
w.simulatedPaths[path] = true
|
||||
w.simulatedMu.Unlock()
|
||||
|
||||
// Step 1: Stop affected stacks
|
||||
stoppedStacks := w.stopAffectedStacks(path)
|
||||
|
||||
// Step 2: Mark disconnected in settings
|
||||
if err := w.settings.SetDisconnected(path, true, stoppedStacks); err != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] [DEBUG-SIM] Failed to mark disconnected: %v", err)
|
||||
}
|
||||
|
||||
// Step 3: SKIPPED (no lazyUnmount — drive stays physically mounted)
|
||||
|
||||
// Step 4: Update in-memory state
|
||||
state := w.getOrCreateState(path)
|
||||
state.lastStatus = "disconnected"
|
||||
state.probeInterval = disconnectedProbeInterval
|
||||
state.consecutiveFailures = 0
|
||||
|
||||
// Step 5: Trigger alert refresh
|
||||
if w.alertRefresh != nil {
|
||||
w.alertRefresh()
|
||||
}
|
||||
|
||||
// Step 6: Send notification
|
||||
w.notifier.NotifyStorageDisconnected(label, stoppedStacks)
|
||||
|
||||
// Step 7: Push hub report
|
||||
if w.pushHubReport != nil {
|
||||
go w.pushHubReport()
|
||||
}
|
||||
|
||||
w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Disconnect simulated: %s — %d stack(s) stopped", path, len(stoppedStacks))
|
||||
return stoppedStacks, nil
|
||||
}
|
||||
|
||||
// SimulateReconnect undoes a simulated disconnection.
|
||||
func (w *StorageWatchdog) SimulateReconnect(ctx context.Context, path string) error {
|
||||
if !w.isSimulated(path) {
|
||||
return fmt.Errorf("path %q is not in simulated-disconnect state", path)
|
||||
}
|
||||
|
||||
sp := w.findStoragePath(path)
|
||||
if sp == nil {
|
||||
return fmt.Errorf("storage path %q not found", path)
|
||||
}
|
||||
|
||||
label := sp.Label
|
||||
if label == "" {
|
||||
label = sp.Path
|
||||
}
|
||||
w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Simulating reconnect: %s (%s)", path, label)
|
||||
|
||||
// Remove from simulated set
|
||||
w.simulatedMu.Lock()
|
||||
delete(w.simulatedPaths, path)
|
||||
w.simulatedMu.Unlock()
|
||||
|
||||
// Verify drive is actually still mounted (it should be since we never unmounted)
|
||||
verifyResult := system.ProbeStoragePath(path)
|
||||
if verifyResult.Status != system.ProbeConnected {
|
||||
return fmt.Errorf("drive probe failed after simulation clear: %v", verifyResult.Err)
|
||||
}
|
||||
|
||||
// Clean restic locks
|
||||
w.cleanResticLocks(ctx, path)
|
||||
|
||||
// Validate stopped stacks
|
||||
filteredStacks := w.filterStoppedStacks(sp.StoppedStacks)
|
||||
|
||||
// Clear disconnected, preserve stopped stacks for restart UI
|
||||
if err := w.settings.SetDisconnected(path, false, filteredStacks); err != nil {
|
||||
w.logger.Printf("[ERROR] [STORAGE] [DEBUG-SIM] Failed to clear disconnected: %v", err)
|
||||
}
|
||||
|
||||
// Update in-memory state
|
||||
state := w.getOrCreateState(path)
|
||||
state.lastStatus = "connected"
|
||||
state.probeInterval = defaultProbeInterval
|
||||
state.consecutiveFailures = 0
|
||||
|
||||
// Trigger alert refresh
|
||||
if w.alertRefresh != nil {
|
||||
w.alertRefresh()
|
||||
}
|
||||
|
||||
// Send notification
|
||||
w.notifier.NotifyStorageReconnected(label)
|
||||
if w.pushHubReport != nil {
|
||||
go w.pushHubReport()
|
||||
}
|
||||
|
||||
w.logger.Printf("[INFO] [STORAGE] [DEBUG-SIM] Reconnect simulated: %s", path)
|
||||
return nil
|
||||
}
|
||||
|
||||
// PathDebugStatus holds per-path probe state for the debug page.
|
||||
type PathDebugStatus struct {
|
||||
Path string `json:"path"`
|
||||
Label string `json:"label"`
|
||||
Status string `json:"status"`
|
||||
Simulated bool `json:"simulated"`
|
||||
ProbeOK bool `json:"probe_ok"`
|
||||
DebounceCount int `json:"debounce_count"`
|
||||
DebounceMax int `json:"debounce_max"`
|
||||
LastProbe time.Time `json:"last_probe"`
|
||||
AvgLatencyMs float64 `json:"avg_latency_ms"`
|
||||
ProbeCount int `json:"probe_count"`
|
||||
ProbeOKCount int `json:"probe_ok_count"`
|
||||
}
|
||||
|
||||
// GetDebugStatus returns per-path probe state for the debug page.
|
||||
func (w *StorageWatchdog) GetDebugStatus() []PathDebugStatus {
|
||||
paths := w.settings.GetStoragePaths()
|
||||
result := make([]PathDebugStatus, 0, len(paths))
|
||||
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
|
||||
for _, sp := range paths {
|
||||
if sp.Decommissioned {
|
||||
continue
|
||||
}
|
||||
ds := PathDebugStatus{
|
||||
Path: sp.Path,
|
||||
Label: sp.Label,
|
||||
DebounceMax: probeThreshold,
|
||||
}
|
||||
if sp.Disconnected {
|
||||
ds.Status = "disconnected"
|
||||
} else {
|
||||
ds.Status = "connected"
|
||||
}
|
||||
ds.Simulated = w.isSimulatedLocked(sp.Path)
|
||||
|
||||
if state, ok := w.pathState[sp.Path]; ok {
|
||||
ds.DebounceCount = state.consecutiveFailures
|
||||
ds.LastProbe = state.lastProbeTime
|
||||
ds.ProbeOK = state.lastStatus == "connected"
|
||||
ds.ProbeCount = state.probeCount
|
||||
ds.ProbeOKCount = state.probeOKCount
|
||||
if state.probeCount > 0 {
|
||||
ds.AvgLatencyMs = float64(state.totalLatency.Milliseconds()) / float64(state.probeCount)
|
||||
}
|
||||
}
|
||||
result = append(result, ds)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// isSimulatedLocked checks simulation state without acquiring simulatedMu
|
||||
// (caller must hold w.mu or be ok with a racy read for debug display).
|
||||
func (w *StorageWatchdog) isSimulatedLocked(path string) bool {
|
||||
w.simulatedMu.RLock()
|
||||
defer w.simulatedMu.RUnlock()
|
||||
return w.simulatedPaths[path]
|
||||
}
|
||||
|
||||
// findStoragePath returns the storage path entry for a given path, or nil.
|
||||
func (w *StorageWatchdog) findStoragePath(path string) *settings.StoragePath {
|
||||
for _, sp := range w.settings.GetStoragePaths() {
|
||||
|
||||
Reference in New Issue
Block a user