feat: controller-side HTTP/TCP health probes
Add network-level health probing from the controller to deployed apps. The controller probes containers over the shared Docker network and overrides stack state to "unhealthy" if the service isn't responding. Three probe types: http (any response = alive), api (validates status code and body content), tcp (port reachability). Configured per-app via healthcheck: section in .felhom.yml. Runs every minute, per-app interval defaults to 5 minutes. This replaces Docker-level healthchecks for distroless images (e.g. Vikunja) that lack shell utilities, and complements existing Docker healthchecks for other apps. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -42,20 +42,38 @@ type ContainerInfo struct {
|
||||
Status string `json:"status"` // e.g. "Up 3 hours (healthy)"
|
||||
}
|
||||
|
||||
// HealthProbeResult holds the latest controller-side health probe result.
|
||||
type HealthProbeResult struct {
|
||||
Healthy bool `json:"healthy"`
|
||||
LastCheck time.Time `json:"last_check"`
|
||||
Details []HealthCheckDetail `json:"details"`
|
||||
}
|
||||
|
||||
// HealthCheckDetail holds the result of a single health check item.
|
||||
type HealthCheckDetail struct {
|
||||
Type string `json:"type"` // "http", "api", "tcp"
|
||||
Target string `json:"target"` // e.g. ":3456/api/v1/info"
|
||||
Healthy bool `json:"healthy"`
|
||||
Status int `json:"status,omitempty"` // HTTP status code (for http/api)
|
||||
Latency string `json:"latency"` // e.g. "45ms"
|
||||
Error string `json:"error,omitempty"` // error message if unhealthy
|
||||
}
|
||||
|
||||
// Stack represents a docker compose stack on disk.
|
||||
type Stack struct {
|
||||
Name string `json:"name"`
|
||||
Meta Metadata `json:"meta"`
|
||||
ComposePath string `json:"compose_path"`
|
||||
State ContainerState `json:"state"`
|
||||
Deployed bool `json:"deployed"` // Has app.yaml with deployed=true
|
||||
Protected bool `json:"protected"`
|
||||
Orphaned bool `json:"orphaned"` // Deployed but no catalog template
|
||||
Containers []ContainerInfo `json:"containers"`
|
||||
AppConfig *AppConfig `json:"app_config,omitempty"`
|
||||
Deploying bool `json:"deploying"` // compose up in progress
|
||||
DeployError string `json:"deploy_error,omitempty"` // last async deploy error
|
||||
LastUpdated time.Time `json:"last_updated"`
|
||||
Name string `json:"name"`
|
||||
Meta Metadata `json:"meta"`
|
||||
ComposePath string `json:"compose_path"`
|
||||
State ContainerState `json:"state"`
|
||||
Deployed bool `json:"deployed"` // Has app.yaml with deployed=true
|
||||
Protected bool `json:"protected"`
|
||||
Orphaned bool `json:"orphaned"` // Deployed but no catalog template
|
||||
Containers []ContainerInfo `json:"containers"`
|
||||
AppConfig *AppConfig `json:"app_config,omitempty"`
|
||||
Deploying bool `json:"deploying"` // compose up in progress
|
||||
DeployError string `json:"deploy_error,omitempty"` // last async deploy error
|
||||
HealthProbe *HealthProbeResult `json:"health_probe,omitempty"` // controller-side probe result
|
||||
LastUpdated time.Time `json:"last_updated"`
|
||||
}
|
||||
|
||||
// Manager handles all docker compose stack operations.
|
||||
@@ -315,6 +333,13 @@ func (m *Manager) refreshStatusLocked() error {
|
||||
stack.Containers = containers
|
||||
stack.State = aggregateState(containers)
|
||||
}
|
||||
|
||||
// Re-apply controller-side health probe results: if the last probe
|
||||
// failed and Docker thinks the container is running, override to unhealthy.
|
||||
if stack.State == StateRunning && stack.HealthProbe != nil && !stack.HealthProbe.Healthy {
|
||||
stack.State = StateUnhealthy
|
||||
}
|
||||
|
||||
stack.LastUpdated = time.Now()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user