health-probes: clear stale results on start/restart, fast 10s probing until healthy

- Clear HealthProbe on StartStack/RestartStack so stale unhealthy state isn't re-applied by RefreshStatus - Use 10s probe interval for unhealthy/new stacks (nil HealthProbe probes immediately on next tick), switch to normal 5m interval once healthy - Scheduler frequency 1m → 10s to support fast probing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 14:59:25 +01:00
parent db83db383c
commit 2e9634e50f
3 changed files with 29 additions and 4 deletions
@@ -576,6 +576,15 @@ func (m *Manager) StartStack(name string) error {

 	m.logger.Printf("[INFO] Stack %s started successfully (took %.1fs)", name, time.Since(start).Seconds())
 	m.logPostStartStatus(name, dir, env)
+
+	// Clear stale health probe so refreshStatus won't re-apply an old unhealthy override.
+	// The next health-probes tick (≤10s) will run a fresh probe.
+	m.mu.Lock()
+	if s, ok := m.stacks[name]; ok {
+		s.HealthProbe = nil
+	}
+	m.mu.Unlock()
+
 	return m.RefreshStatus()
 }

@@ -624,6 +633,14 @@ func (m *Manager) RestartStack(name string) error {

 	m.logger.Printf("[INFO] Stack %s restarted successfully (took %.1fs)", name, time.Since(start).Seconds())
 	m.logPostStartStatus(name, dir, env)
+
+	// Clear stale health probe so refreshStatus won't re-apply an old unhealthy override.
+	m.mu.Lock()
+	if s, ok := m.stacks[name]; ok {
+		s.HealthProbe = nil
+	}
+	m.mu.Unlock()
+
 	return m.RefreshStatus()
 }