feat: controller-side HTTP/TCP health probes
Add network-level health probing from the controller to deployed apps. The controller probes containers over the shared Docker network and overrides stack state to "unhealthy" if the service isn't responding. Three probe types: http (any response = alive), api (validates status code and body content), tcp (port reachability). Configured per-app via healthcheck: section in .felhom.yml. Runs every minute, per-app interval defaults to 5 minutes. This replaces Docker-level healthchecks for distroless images (e.g. Vikunja) that lack shell utilities, and complements existing Docker healthchecks for other apps. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,16 @@
|
|||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
|
### v0.29.3 — Controller-side Health Probes (2026-02-25)
|
||||||
|
|
||||||
|
#### Added
|
||||||
|
- **HTTP/TCP health probes** (`internal/stacks/healthprobe.go`) — The controller now probes deployed apps directly over the Docker network to verify services are actually responding, not just that containers are running. Runs every minute, configurable per-app interval (default 5 min).
|
||||||
|
- **Three probe types**: `http` (any response = alive), `api` (validates status code and response body), `tcp` (port reachability). Multiple checks per app supported.
|
||||||
|
- **`.felhom.yml` healthcheck config** (`internal/stacks/metadata.go`) — New `healthcheck:` section with `interval`, `checks[]` (type, port, path, method, expect). Parsed from app catalog metadata.
|
||||||
|
- **State override** (`internal/stacks/manager.go`) — If a running container's health probe fails, the stack state is overridden to "unhealthy". Clears automatically when probe passes again.
|
||||||
|
|
||||||
|
#### Fixed
|
||||||
|
- **Vikunja healthcheck** — Removed Docker-level healthcheck (distroless image has no wget/curl). Controller-side API probe to `:3456/api/v1/info` replaces it.
|
||||||
|
|
||||||
### v0.29.2 — Dynamic Logo & Favicon (2026-02-25)
|
### v0.29.2 — Dynamic Logo & Favicon (2026-02-25)
|
||||||
|
|
||||||
#### Changed
|
#### Changed
|
||||||
|
|||||||
+12
-1
@@ -212,11 +212,22 @@ When app templates are updated (e.g., a new `APP_KEY` secret is added to `.felho
|
|||||||
| Running + healthy | Green | "Fut" | All containers running and healthy |
|
| Running + healthy | Green | "Fut" | All containers running and healthy |
|
||||||
| Running + starting | Orange | "Indulas..." | Healthcheck not yet passed |
|
| Running + starting | Orange | "Indulas..." | Healthcheck not yet passed |
|
||||||
| Deploying | Orange | "Telepítés..." | Compose up in progress (image pull, container creation) |
|
| Deploying | Orange | "Telepítés..." | Compose up in progress (image pull, container creation) |
|
||||||
| Running + unhealthy | Yellow | "Nem egeszseges" | Healthcheck failing |
|
| Running + unhealthy | Yellow | "Nem egeszseges" | Docker or controller-side healthcheck failing |
|
||||||
| Stopped/exited | Red | "Leallitva" | All containers stopped |
|
| Stopped/exited | Red | "Leallitva" | All containers stopped |
|
||||||
| Restarting | Yellow | "Ujrainditas..." | Restart loop |
|
| Restarting | Yellow | "Ujrainditas..." | Restart loop |
|
||||||
| Not deployed | Gray | "Nincs telepitve" | Compose file exists, not deployed |
|
| Not deployed | Gray | "Nincs telepitve" | Compose file exists, not deployed |
|
||||||
|
|
||||||
|
#### Controller-side Health Probes (`internal/stacks/healthprobe.go`)
|
||||||
|
|
||||||
|
For apps that declare a `healthcheck:` section in `.felhom.yml`, the controller probes the container directly over the Docker network (both are on `traefik-public`). This complements Docker-level healthchecks and is the **only** health mechanism for distroless/scratch images that lack shell utilities.
|
||||||
|
|
||||||
|
Three probe types are supported:
|
||||||
|
- **`http`** — Any HTTP response (even 4xx/5xx) = service is alive. Only connection refused/timeout = unhealthy.
|
||||||
|
- **`api`** — HTTP request with response validation (expected status code, body content). Fails if expectations aren't met.
|
||||||
|
- **`tcp`** — Simple port reachability check via `net.Dial`.
|
||||||
|
|
||||||
|
Multiple checks per app are supported (all must pass). The probe scheduler runs every minute; per-app intervals default to 5 minutes and are configurable via `healthcheck.interval` in `.felhom.yml`. Probe results are stored in `Stack.HealthProbe` and exposed via the API. Failed probes override the stack state to `StateUnhealthy`; the override clears automatically when the next probe passes.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### 2. Backup System
|
### 2. Backup System
|
||||||
|
|||||||
@@ -220,6 +220,9 @@ func main() {
|
|||||||
sched.Every("stack-scan", 2*time.Minute, func(ctx context.Context) error {
|
sched.Every("stack-scan", 2*time.Minute, func(ctx context.Context) error {
|
||||||
return stackMgr.ScanStacks()
|
return stackMgr.ScanStacks()
|
||||||
})
|
})
|
||||||
|
sched.Every("health-probes", 1*time.Minute, func(ctx context.Context) error {
|
||||||
|
return stackMgr.RunHealthProbes()
|
||||||
|
})
|
||||||
|
|
||||||
// Heartbeat — lightweight "I'm alive" signal
|
// Heartbeat — lightweight "I'm alive" signal
|
||||||
sched.Every("heartbeat", 5*time.Minute, func(ctx context.Context) error {
|
sched.Every("heartbeat", 5*time.Minute, func(ctx context.Context) error {
|
||||||
|
|||||||
@@ -0,0 +1,323 @@
|
|||||||
|
package stacks
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// probeTarget holds the info needed to probe a single stack.
|
||||||
|
type probeTarget struct {
|
||||||
|
stackName string
|
||||||
|
containerName string
|
||||||
|
checks []HealthCheckItem
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunHealthProbes runs controller-side health probes for all running stacks
|
||||||
|
// that have healthcheck configuration and whose interval has elapsed.
|
||||||
|
// Called by the scheduler every minute.
|
||||||
|
func (m *Manager) RunHealthProbes() error {
|
||||||
|
// Phase 1: collect targets (under lock)
|
||||||
|
m.mu.RLock()
|
||||||
|
var targets []probeTarget
|
||||||
|
for name, stack := range m.stacks {
|
||||||
|
if stack.State != StateRunning && stack.State != StateUnhealthy {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
hc := stack.Meta.HealthCheck
|
||||||
|
if hc == nil || len(hc.Checks) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if interval has elapsed since last probe
|
||||||
|
interval := parseInterval(hc.Interval)
|
||||||
|
if stack.HealthProbe != nil && time.Since(stack.HealthProbe.LastCheck) < interval {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the main container to probe (matching stack name)
|
||||||
|
containerName := findProbeContainer(name, stack.Containers)
|
||||||
|
if containerName == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
targets = append(targets, probeTarget{
|
||||||
|
stackName: name,
|
||||||
|
containerName: containerName,
|
||||||
|
checks: hc.Checks,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
m.mu.RUnlock()
|
||||||
|
|
||||||
|
if len(targets) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2: run all probes concurrently (no lock held)
|
||||||
|
type probeResult struct {
|
||||||
|
stackName string
|
||||||
|
result *HealthProbeResult
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]probeResult, len(targets))
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
for i, t := range targets {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(idx int, t probeTarget) {
|
||||||
|
defer wg.Done()
|
||||||
|
result := m.runChecks(t)
|
||||||
|
results[idx] = probeResult{stackName: t.stackName, result: result}
|
||||||
|
}(i, t)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
// Phase 3: apply results and log (under lock)
|
||||||
|
m.mu.Lock()
|
||||||
|
okCount, failCount := 0, 0
|
||||||
|
for _, pr := range results {
|
||||||
|
stack, ok := m.stacks[pr.stackName]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
stack.HealthProbe = pr.result
|
||||||
|
|
||||||
|
if pr.result.Healthy {
|
||||||
|
okCount++
|
||||||
|
// If Docker says running and probe is healthy, ensure state is running
|
||||||
|
// (clears a previous unhealthy override)
|
||||||
|
if stack.State == StateUnhealthy {
|
||||||
|
stack.State = StateRunning
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
failCount++
|
||||||
|
if stack.State == StateRunning {
|
||||||
|
stack.State = StateUnhealthy
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.mu.Unlock()
|
||||||
|
|
||||||
|
// Summary log
|
||||||
|
if failCount > 0 {
|
||||||
|
m.logger.Printf("[INFO] Health probes: %d ok, %d unhealthy (of %d probed)", okCount, failCount, len(targets))
|
||||||
|
} else if m.isDebug() {
|
||||||
|
m.logger.Printf("[DEBUG] Health probes: %d ok (of %d probed)", okCount, len(targets))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// runChecks executes all health check items for a single stack target.
|
||||||
|
func (m *Manager) runChecks(t probeTarget) *HealthProbeResult {
|
||||||
|
result := &HealthProbeResult{
|
||||||
|
LastCheck: time.Now(),
|
||||||
|
Healthy: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, check := range t.checks {
|
||||||
|
detail := m.runSingleCheck(t.containerName, check)
|
||||||
|
result.Details = append(result.Details, detail)
|
||||||
|
|
||||||
|
if detail.Healthy {
|
||||||
|
if m.isDebug() {
|
||||||
|
if detail.Status > 0 {
|
||||||
|
m.logger.Printf("[DEBUG] Health probe %s: %s %s :%d%s → %d (%s)",
|
||||||
|
t.stackName, strings.ToUpper(check.Type), methodOrEmpty(check), check.Port, check.Path, detail.Status, detail.Latency)
|
||||||
|
} else {
|
||||||
|
m.logger.Printf("[DEBUG] Health probe %s: TCP :%d → ok (%s)",
|
||||||
|
t.stackName, check.Port, detail.Latency)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result.Healthy = false
|
||||||
|
m.logger.Printf("[WARN] Health probe %s: %s %s :%d%s → %s",
|
||||||
|
t.stackName, strings.ToUpper(check.Type), methodOrEmpty(check), check.Port, check.Path, detail.Error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// runSingleCheck executes one health check item and returns the result.
|
||||||
|
func (m *Manager) runSingleCheck(containerName string, check HealthCheckItem) HealthCheckDetail {
|
||||||
|
target := fmt.Sprintf(":%d%s", check.Port, check.Path)
|
||||||
|
|
||||||
|
switch check.Type {
|
||||||
|
case "tcp":
|
||||||
|
return m.probeTCP(containerName, check.Port, target)
|
||||||
|
case "http", "api":
|
||||||
|
return m.probeHTTP(containerName, check, target)
|
||||||
|
default:
|
||||||
|
return HealthCheckDetail{
|
||||||
|
Type: check.Type,
|
||||||
|
Target: target,
|
||||||
|
Healthy: false,
|
||||||
|
Error: fmt.Sprintf("unknown check type: %s", check.Type),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// probeTCP tests if a TCP port is reachable on the container.
|
||||||
|
func (m *Manager) probeTCP(containerName string, port int, target string) HealthCheckDetail {
|
||||||
|
start := time.Now()
|
||||||
|
addr := fmt.Sprintf("%s:%d", containerName, port)
|
||||||
|
conn, err := net.DialTimeout("tcp", addr, 5*time.Second)
|
||||||
|
latency := time.Since(start)
|
||||||
|
|
||||||
|
detail := HealthCheckDetail{
|
||||||
|
Type: "tcp",
|
||||||
|
Target: target,
|
||||||
|
Latency: formatLatency(latency),
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
detail.Healthy = false
|
||||||
|
detail.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
conn.Close()
|
||||||
|
detail.Healthy = true
|
||||||
|
}
|
||||||
|
return detail
|
||||||
|
}
|
||||||
|
|
||||||
|
// probeHTTP makes an HTTP request to the container and evaluates the result.
|
||||||
|
// For "http" type: any response = healthy. For "api" type: validates expect rules.
|
||||||
|
func (m *Manager) probeHTTP(containerName string, check HealthCheckItem, target string) HealthCheckDetail {
|
||||||
|
url := fmt.Sprintf("http://%s:%d%s", containerName, check.Port, check.Path)
|
||||||
|
method := check.Method
|
||||||
|
if method == "" {
|
||||||
|
method = "GET"
|
||||||
|
}
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
client := &http.Client{
|
||||||
|
Timeout: 5 * time.Second,
|
||||||
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||||
|
return http.ErrUseLastResponse
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequest(method, url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return HealthCheckDetail{
|
||||||
|
Type: check.Type,
|
||||||
|
Target: target,
|
||||||
|
Healthy: false,
|
||||||
|
Error: fmt.Sprintf("bad request: %v", err),
|
||||||
|
Latency: "0ms",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
latency := time.Since(start)
|
||||||
|
|
||||||
|
detail := HealthCheckDetail{
|
||||||
|
Type: check.Type,
|
||||||
|
Target: target,
|
||||||
|
Latency: formatLatency(latency),
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
detail.Healthy = false
|
||||||
|
detail.Error = err.Error()
|
||||||
|
return detail
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
detail.Status = resp.StatusCode
|
||||||
|
|
||||||
|
// For "http" type, any response means the service is alive
|
||||||
|
if check.Type == "http" {
|
||||||
|
detail.Healthy = true
|
||||||
|
return detail
|
||||||
|
}
|
||||||
|
|
||||||
|
// For "api" type, validate expectations
|
||||||
|
if check.Expect == nil {
|
||||||
|
// No expectations = just check for a response (same as http)
|
||||||
|
detail.Healthy = true
|
||||||
|
return detail
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check expected status code
|
||||||
|
if check.Expect.Status > 0 && resp.StatusCode != check.Expect.Status {
|
||||||
|
detail.Healthy = false
|
||||||
|
detail.Error = fmt.Sprintf("expected status %d, got %d", check.Expect.Status, resp.StatusCode)
|
||||||
|
return detail
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check expected body content
|
||||||
|
if check.Expect.BodyContains != "" {
|
||||||
|
body, err := io.ReadAll(io.LimitReader(resp.Body, 8192)) // read up to 8KB
|
||||||
|
if err != nil {
|
||||||
|
detail.Healthy = false
|
||||||
|
detail.Error = fmt.Sprintf("reading body: %v", err)
|
||||||
|
return detail
|
||||||
|
}
|
||||||
|
if !strings.Contains(string(body), check.Expect.BodyContains) {
|
||||||
|
detail.Healthy = false
|
||||||
|
detail.Error = fmt.Sprintf("body missing expected string %q", check.Expect.BodyContains)
|
||||||
|
return detail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
detail.Healthy = true
|
||||||
|
return detail
|
||||||
|
}
|
||||||
|
|
||||||
|
// findProbeContainer returns the container name to probe for a stack.
|
||||||
|
// Prefers exact match with stack name, then prefix match (stack-service-N).
|
||||||
|
func findProbeContainer(stackName string, containers []ContainerInfo) string {
|
||||||
|
for _, c := range containers {
|
||||||
|
if c.Name == stackName && (c.State == StateRunning || c.State == StateUnhealthy) {
|
||||||
|
return c.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Fallback: first running container with matching prefix
|
||||||
|
for _, c := range containers {
|
||||||
|
if strings.HasPrefix(c.Name, stackName) && (c.State == StateRunning || c.State == StateUnhealthy) {
|
||||||
|
return c.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseInterval parses a duration string like "5m", "30s", "1h".
|
||||||
|
// Returns 5 minutes as default if parsing fails.
|
||||||
|
func parseInterval(s string) time.Duration {
|
||||||
|
if s == "" {
|
||||||
|
return 5 * time.Minute
|
||||||
|
}
|
||||||
|
d, err := time.ParseDuration(s)
|
||||||
|
if err != nil {
|
||||||
|
return 5 * time.Minute
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatLatency formats a duration as a human-readable latency string.
|
||||||
|
func formatLatency(d time.Duration) string {
|
||||||
|
if d < time.Millisecond {
|
||||||
|
return fmt.Sprintf("%dµs", d.Microseconds())
|
||||||
|
}
|
||||||
|
if d < time.Second {
|
||||||
|
return fmt.Sprintf("%dms", d.Milliseconds())
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%.1fs", d.Seconds())
|
||||||
|
}
|
||||||
|
|
||||||
|
// methodOrEmpty returns the method string for logging, or empty for non-api checks.
|
||||||
|
func methodOrEmpty(check HealthCheckItem) string {
|
||||||
|
if check.Type == "api" && check.Method != "" {
|
||||||
|
return check.Method
|
||||||
|
}
|
||||||
|
if check.Type == "api" {
|
||||||
|
return "GET"
|
||||||
|
}
|
||||||
|
return "GET"
|
||||||
|
}
|
||||||
@@ -42,6 +42,23 @@ type ContainerInfo struct {
|
|||||||
Status string `json:"status"` // e.g. "Up 3 hours (healthy)"
|
Status string `json:"status"` // e.g. "Up 3 hours (healthy)"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HealthProbeResult holds the latest controller-side health probe result.
|
||||||
|
type HealthProbeResult struct {
|
||||||
|
Healthy bool `json:"healthy"`
|
||||||
|
LastCheck time.Time `json:"last_check"`
|
||||||
|
Details []HealthCheckDetail `json:"details"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// HealthCheckDetail holds the result of a single health check item.
|
||||||
|
type HealthCheckDetail struct {
|
||||||
|
Type string `json:"type"` // "http", "api", "tcp"
|
||||||
|
Target string `json:"target"` // e.g. ":3456/api/v1/info"
|
||||||
|
Healthy bool `json:"healthy"`
|
||||||
|
Status int `json:"status,omitempty"` // HTTP status code (for http/api)
|
||||||
|
Latency string `json:"latency"` // e.g. "45ms"
|
||||||
|
Error string `json:"error,omitempty"` // error message if unhealthy
|
||||||
|
}
|
||||||
|
|
||||||
// Stack represents a docker compose stack on disk.
|
// Stack represents a docker compose stack on disk.
|
||||||
type Stack struct {
|
type Stack struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
@@ -55,6 +72,7 @@ type Stack struct {
|
|||||||
AppConfig *AppConfig `json:"app_config,omitempty"`
|
AppConfig *AppConfig `json:"app_config,omitempty"`
|
||||||
Deploying bool `json:"deploying"` // compose up in progress
|
Deploying bool `json:"deploying"` // compose up in progress
|
||||||
DeployError string `json:"deploy_error,omitempty"` // last async deploy error
|
DeployError string `json:"deploy_error,omitempty"` // last async deploy error
|
||||||
|
HealthProbe *HealthProbeResult `json:"health_probe,omitempty"` // controller-side probe result
|
||||||
LastUpdated time.Time `json:"last_updated"`
|
LastUpdated time.Time `json:"last_updated"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -315,6 +333,13 @@ func (m *Manager) refreshStatusLocked() error {
|
|||||||
stack.Containers = containers
|
stack.Containers = containers
|
||||||
stack.State = aggregateState(containers)
|
stack.State = aggregateState(containers)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Re-apply controller-side health probe results: if the last probe
|
||||||
|
// failed and Docker thinks the container is running, override to unhealthy.
|
||||||
|
if stack.State == StateRunning && stack.HealthProbe != nil && !stack.HealthProbe.Healthy {
|
||||||
|
stack.State = StateUnhealthy
|
||||||
|
}
|
||||||
|
|
||||||
stack.LastUpdated = time.Now()
|
stack.LastUpdated = time.Now()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ type Metadata struct {
|
|||||||
DeployFields []DeployField `yaml:"deploy_fields" json:"deploy_fields"`
|
DeployFields []DeployField `yaml:"deploy_fields" json:"deploy_fields"`
|
||||||
AppInfo AppInfo `yaml:"app_info" json:"app_info"`
|
AppInfo AppInfo `yaml:"app_info" json:"app_info"`
|
||||||
OptionalConfig []OptionalConfigGroup `yaml:"optional_config" json:"optional_config"`
|
OptionalConfig []OptionalConfigGroup `yaml:"optional_config" json:"optional_config"`
|
||||||
|
HealthCheck *HealthCheckConfig `yaml:"healthcheck,omitempty" json:"healthcheck,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AppInfo holds detailed app information for the info page.
|
// AppInfo holds detailed app information for the info page.
|
||||||
@@ -77,6 +78,29 @@ type SelectOption struct {
|
|||||||
Label string `yaml:"label" json:"label"`
|
Label string `yaml:"label" json:"label"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HealthCheckConfig defines controller-side health probe configuration.
|
||||||
|
// When configured, the controller periodically probes the app's container
|
||||||
|
// and overrides the stack state to "unhealthy" if the service is not responding.
|
||||||
|
type HealthCheckConfig struct {
|
||||||
|
Interval string `yaml:"interval" json:"interval"` // e.g. "5m", "30s"; default "5m"
|
||||||
|
Checks []HealthCheckItem `yaml:"checks" json:"checks"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// HealthCheckItem defines a single health check probe.
|
||||||
|
type HealthCheckItem struct {
|
||||||
|
Type string `yaml:"type" json:"type"` // "http", "api", "tcp"
|
||||||
|
Port int `yaml:"port" json:"port"`
|
||||||
|
Path string `yaml:"path" json:"path"` // for http/api; default "/"
|
||||||
|
Method string `yaml:"method" json:"method"` // for api; default "GET"
|
||||||
|
Expect *HealthCheckExpect `yaml:"expect,omitempty" json:"expect,omitempty"` // for api
|
||||||
|
}
|
||||||
|
|
||||||
|
// HealthCheckExpect defines expected response content for "api" type checks.
|
||||||
|
type HealthCheckExpect struct {
|
||||||
|
Status int `yaml:"status" json:"status"` // expected HTTP status code
|
||||||
|
BodyContains string `yaml:"body_contains" json:"body_contains"` // string that must appear in response body
|
||||||
|
}
|
||||||
|
|
||||||
// LoadMetadata reads .felhom.yml from a stack directory.
|
// LoadMetadata reads .felhom.yml from a stack directory.
|
||||||
// Returns default metadata if the file doesn't exist.
|
// Returns default metadata if the file doesn't exist.
|
||||||
func LoadMetadata(stackDir string) Metadata {
|
func LoadMetadata(stackDir string) Metadata {
|
||||||
@@ -113,6 +137,21 @@ func LoadMetadata(stackDir string) Metadata {
|
|||||||
meta.Category = "tools"
|
meta.Category = "tools"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Default healthcheck fields
|
||||||
|
if meta.HealthCheck != nil {
|
||||||
|
if meta.HealthCheck.Interval == "" {
|
||||||
|
meta.HealthCheck.Interval = "5m"
|
||||||
|
}
|
||||||
|
for i := range meta.HealthCheck.Checks {
|
||||||
|
if meta.HealthCheck.Checks[i].Path == "" && (meta.HealthCheck.Checks[i].Type == "http" || meta.HealthCheck.Checks[i].Type == "api") {
|
||||||
|
meta.HealthCheck.Checks[i].Path = "/"
|
||||||
|
}
|
||||||
|
if meta.HealthCheck.Checks[i].Method == "" && meta.HealthCheck.Checks[i].Type == "api" {
|
||||||
|
meta.HealthCheck.Checks[i].Method = "GET"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// DOMAIN and SUBDOMAIN fields are always auto-filled/required — mark implicitly
|
// DOMAIN and SUBDOMAIN fields are always auto-filled/required — mark implicitly
|
||||||
for i := range meta.DeployFields {
|
for i := range meta.DeployFields {
|
||||||
if meta.DeployFields[i].Type == "domain" || meta.DeployFields[i].Type == "subdomain" {
|
if meta.DeployFields[i].Type == "domain" || meta.DeployFields[i].Type == "subdomain" {
|
||||||
|
|||||||
Reference in New Issue
Block a user