4c5d430b1a
Add network-level health probing from the controller to deployed apps. The controller probes containers over the shared Docker network and overrides stack state to "unhealthy" if the service isn't responding. Three probe types: http (any response = alive), api (validates status code and body content), tcp (port reachability). Configured per-app via healthcheck: section in .felhom.yml. Runs every minute, per-app interval defaults to 5 minutes. This replaces Docker-level healthchecks for distroless images (e.g. Vikunja) that lack shell utilities, and complements existing Docker healthchecks for other apps. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
893 lines
25 KiB
Go
893 lines
25 KiB
Go
package stacks
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/crypto"
|
|
)
|
|
|
|
// ContainerState represents the current state of a container.
|
|
type ContainerState string
|
|
|
|
const (
|
|
StateRunning ContainerState = "running"
|
|
StateStarting ContainerState = "starting" // running but health: starting
|
|
StateUnhealthy ContainerState = "unhealthy" // running but health: unhealthy
|
|
StateStopped ContainerState = "stopped"
|
|
StateRestarting ContainerState = "restarting"
|
|
StateExited ContainerState = "exited"
|
|
StatePaused ContainerState = "paused"
|
|
StateUnknown ContainerState = "unknown"
|
|
StateNotDeployed ContainerState = "not_deployed"
|
|
StateDeploying ContainerState = "deploying" // compose up in progress (image pull, etc.)
|
|
StateOrphaned ContainerState = "orphaned"
|
|
)
|
|
|
|
// ContainerInfo holds status info about a single container within a stack.
|
|
type ContainerInfo struct {
|
|
Name string `json:"name"`
|
|
Image string `json:"image"`
|
|
State ContainerState `json:"state"`
|
|
Status string `json:"status"` // e.g. "Up 3 hours (healthy)"
|
|
}
|
|
|
|
// HealthProbeResult holds the latest controller-side health probe result.
|
|
type HealthProbeResult struct {
|
|
Healthy bool `json:"healthy"`
|
|
LastCheck time.Time `json:"last_check"`
|
|
Details []HealthCheckDetail `json:"details"`
|
|
}
|
|
|
|
// HealthCheckDetail holds the result of a single health check item.
|
|
type HealthCheckDetail struct {
|
|
Type string `json:"type"` // "http", "api", "tcp"
|
|
Target string `json:"target"` // e.g. ":3456/api/v1/info"
|
|
Healthy bool `json:"healthy"`
|
|
Status int `json:"status,omitempty"` // HTTP status code (for http/api)
|
|
Latency string `json:"latency"` // e.g. "45ms"
|
|
Error string `json:"error,omitempty"` // error message if unhealthy
|
|
}
|
|
|
|
// Stack represents a docker compose stack on disk.
|
|
type Stack struct {
|
|
Name string `json:"name"`
|
|
Meta Metadata `json:"meta"`
|
|
ComposePath string `json:"compose_path"`
|
|
State ContainerState `json:"state"`
|
|
Deployed bool `json:"deployed"` // Has app.yaml with deployed=true
|
|
Protected bool `json:"protected"`
|
|
Orphaned bool `json:"orphaned"` // Deployed but no catalog template
|
|
Containers []ContainerInfo `json:"containers"`
|
|
AppConfig *AppConfig `json:"app_config,omitempty"`
|
|
Deploying bool `json:"deploying"` // compose up in progress
|
|
DeployError string `json:"deploy_error,omitempty"` // last async deploy error
|
|
HealthProbe *HealthProbeResult `json:"health_probe,omitempty"` // controller-side probe result
|
|
LastUpdated time.Time `json:"last_updated"`
|
|
}
|
|
|
|
// Manager handles all docker compose stack operations.
|
|
type Manager struct {
|
|
cfg *config.Config
|
|
logger *log.Logger
|
|
composeCmd string
|
|
stacks map[string]*Stack
|
|
mu sync.RWMutex
|
|
encKey []byte // AES-256 key for encrypting sensitive values in app.yaml
|
|
}
|
|
|
|
// NewManager creates a new stack manager.
|
|
func NewManager(cfg *config.Config, logger *log.Logger) (*Manager, error) {
|
|
composeCmd := cfg.Stacks.ComposeCommand
|
|
if composeCmd == "" {
|
|
composeCmd = detectComposeCommand()
|
|
}
|
|
if composeCmd == "" {
|
|
return nil, fmt.Errorf("docker compose not found (tried 'docker compose' and 'docker-compose')")
|
|
}
|
|
|
|
logger.Printf("[INFO] Using compose command: %s", composeCmd)
|
|
|
|
if err := os.MkdirAll(cfg.Paths.StacksDir, 0755); err != nil {
|
|
return nil, fmt.Errorf("creating stacks directory %s: %w", cfg.Paths.StacksDir, err)
|
|
}
|
|
|
|
return &Manager{
|
|
cfg: cfg,
|
|
logger: logger,
|
|
composeCmd: composeCmd,
|
|
stacks: make(map[string]*Stack),
|
|
}, nil
|
|
}
|
|
|
|
// SetEncryptionKey sets the AES-256 key used to encrypt/decrypt sensitive values in app.yaml.
|
|
func (m *Manager) SetEncryptionKey(key []byte) {
|
|
m.encKey = key
|
|
}
|
|
|
|
// MigrateEncryption re-saves app.yaml for deployed stacks that still have
|
|
// plaintext values in sensitive fields. Called once on startup.
|
|
func (m *Manager) MigrateEncryption() {
|
|
if m.encKey == nil {
|
|
return
|
|
}
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
migrated := 0
|
|
for _, s := range m.stacks {
|
|
if !s.Deployed {
|
|
continue
|
|
}
|
|
stackDir := filepath.Dir(s.ComposePath)
|
|
appCfg := LoadAppConfig(stackDir)
|
|
if appCfg == nil {
|
|
continue
|
|
}
|
|
meta := LoadMetadata(stackDir)
|
|
sensitive := SensitiveEnvVars(&meta)
|
|
if len(sensitive) == 0 {
|
|
continue
|
|
}
|
|
needsMigration := false
|
|
for _, envVar := range sensitive {
|
|
if v, ok := appCfg.Env[envVar]; ok && v != "" && !crypto.IsEncrypted(v) {
|
|
needsMigration = true
|
|
break
|
|
}
|
|
}
|
|
if needsMigration {
|
|
if err := SaveAppConfig(stackDir, appCfg, m.encKey, sensitive); err != nil {
|
|
m.logger.Printf("[WARN] Encryption migration failed for %s: %v", s.Name, err)
|
|
} else {
|
|
migrated++
|
|
}
|
|
}
|
|
}
|
|
if migrated > 0 {
|
|
m.logger.Printf("[INFO] Encrypted sensitive values in %d app.yaml file(s)", migrated)
|
|
}
|
|
}
|
|
|
|
// toTitleCase capitalizes the first letter of each word.
|
|
func toTitleCase(s string) string {
|
|
words := strings.Fields(s)
|
|
for i, w := range words {
|
|
if len(w) > 0 {
|
|
words[i] = strings.ToUpper(w[:1]) + w[1:]
|
|
}
|
|
}
|
|
return strings.Join(words, " ")
|
|
}
|
|
|
|
func detectComposeCommand() string {
|
|
if err := exec.Command("docker", "compose", "version").Run(); err == nil {
|
|
return "docker compose"
|
|
}
|
|
if _, err := exec.LookPath("docker-compose"); err == nil {
|
|
return "docker-compose"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// DeployedStackNames returns the names of all deployed stacks.
|
|
func (m *Manager) DeployedStackNames() []string {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
var names []string
|
|
for name, stack := range m.stacks {
|
|
if stack.Deployed {
|
|
names = append(names, name)
|
|
}
|
|
}
|
|
return names
|
|
}
|
|
|
|
// ScanStacks discovers all compose stacks in the stacks directory.
|
|
func (m *Manager) ScanStacks() error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
entries, err := os.ReadDir(m.cfg.Paths.StacksDir)
|
|
if err != nil {
|
|
return fmt.Errorf("reading stacks directory: %w", err)
|
|
}
|
|
|
|
found := make(map[string]bool)
|
|
|
|
for _, entry := range entries {
|
|
if !entry.IsDir() {
|
|
continue
|
|
}
|
|
|
|
name := entry.Name()
|
|
stackDir := filepath.Join(m.cfg.Paths.StacksDir, name)
|
|
composePath := filepath.Join(stackDir, "docker-compose.yml")
|
|
|
|
if _, err := os.Stat(composePath); os.IsNotExist(err) {
|
|
composePath = filepath.Join(stackDir, "docker-compose.yaml")
|
|
if _, err := os.Stat(composePath); os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
}
|
|
|
|
found[name] = true
|
|
|
|
meta := LoadMetadata(stackDir)
|
|
appCfg := LoadAppConfig(stackDir)
|
|
deployed := appCfg != nil && appCfg.Deployed
|
|
|
|
if existing, ok := m.stacks[name]; ok {
|
|
existing.ComposePath = composePath
|
|
existing.Meta = meta
|
|
existing.Protected = m.cfg.IsProtectedStack(name)
|
|
existing.Deployed = deployed
|
|
existing.AppConfig = appCfg
|
|
} else {
|
|
m.stacks[name] = &Stack{
|
|
Name: name,
|
|
Meta: meta,
|
|
ComposePath: composePath,
|
|
State: StateNotDeployed,
|
|
Deployed: deployed,
|
|
Protected: m.cfg.IsProtectedStack(name),
|
|
AppConfig: appCfg,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remove stacks no longer on disk
|
|
for name := range m.stacks {
|
|
if !found[name] {
|
|
delete(m.stacks, name)
|
|
}
|
|
}
|
|
|
|
// Detect orphaned stacks (deployed but no longer in catalog)
|
|
catalogTemplates := m.getCatalogTemplateSlugs()
|
|
if catalogTemplates != nil {
|
|
orphanCount := 0
|
|
for _, stack := range m.stacks {
|
|
if stack.Protected || !stack.Deployed {
|
|
stack.Orphaned = false
|
|
continue
|
|
}
|
|
stack.Orphaned = !catalogTemplates[stack.Name]
|
|
if stack.Orphaned {
|
|
orphanCount++
|
|
}
|
|
}
|
|
if orphanCount > 0 {
|
|
m.logger.Printf("[INFO] Detected %d orphaned stack(s)", orphanCount)
|
|
}
|
|
}
|
|
|
|
deployedCount := 0
|
|
for _, s := range m.stacks {
|
|
if s.Deployed {
|
|
deployedCount++
|
|
}
|
|
}
|
|
m.logger.Printf("[INFO] Scanned stacks: %d found (%d deployed, %d available)",
|
|
len(m.stacks), deployedCount, len(m.stacks)-deployedCount)
|
|
return m.refreshStatusLocked()
|
|
}
|
|
|
|
// RefreshStatus updates container status for all known stacks.
|
|
func (m *Manager) RefreshStatus() error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
return m.refreshStatusLocked()
|
|
}
|
|
|
|
func (m *Manager) refreshStatusLocked() error {
|
|
output, err := m.execCommand("docker", "ps", "-a",
|
|
"--format", "{{.Names}}\t{{.Image}}\t{{.State}}\t{{.Status}}\t{{.Label \"com.docker.compose.project\"}}",
|
|
"--no-trunc")
|
|
if err != nil {
|
|
return fmt.Errorf("docker ps: %w", err)
|
|
}
|
|
|
|
projectContainers := make(map[string][]ContainerInfo)
|
|
|
|
for _, line := range strings.Split(strings.TrimSpace(output), "\n") {
|
|
if line == "" {
|
|
continue
|
|
}
|
|
parts := strings.SplitN(line, "\t", 5)
|
|
if len(parts) < 5 || parts[4] == "" {
|
|
continue
|
|
}
|
|
|
|
ci := ContainerInfo{
|
|
Name: parts[0],
|
|
Image: parts[1],
|
|
State: resolveContainerState(parts[2], parts[3]),
|
|
Status: parts[3],
|
|
}
|
|
projectContainers[parts[4]] = append(projectContainers[parts[4]], ci)
|
|
}
|
|
|
|
for name, stack := range m.stacks {
|
|
containers, exists := projectContainers[name]
|
|
if !exists {
|
|
stack.Containers = nil
|
|
if stack.Deploying {
|
|
stack.State = StateDeploying
|
|
} else if stack.Deployed {
|
|
stack.State = StateStopped
|
|
} else {
|
|
stack.State = StateNotDeployed
|
|
}
|
|
} else {
|
|
stack.Containers = containers
|
|
stack.State = aggregateState(containers)
|
|
}
|
|
|
|
// Re-apply controller-side health probe results: if the last probe
|
|
// failed and Docker thinks the container is running, override to unhealthy.
|
|
if stack.State == StateRunning && stack.HealthProbe != nil && !stack.HealthProbe.Healthy {
|
|
stack.State = StateUnhealthy
|
|
}
|
|
|
|
stack.LastUpdated = time.Now()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// resolveContainerState determines the effective state by combining Docker's
|
|
// State field (running/exited/etc.) with the Status field that contains health info.
|
|
//
|
|
// Docker State: "running", "exited", "restarting", "paused", "created", "dead", "removing"
|
|
// Docker Status: "Up 3 hours (healthy)", "Up 9 seconds (health: starting)", "Up 2 min (unhealthy)"
|
|
func resolveContainerState(dockerState, dockerStatus string) ContainerState {
|
|
state := strings.ToLower(strings.TrimSpace(dockerState))
|
|
status := strings.ToLower(dockerStatus)
|
|
|
|
switch state {
|
|
case "running":
|
|
// Check health sub-status for containers with healthchecks
|
|
if strings.Contains(status, "(health: starting)") {
|
|
return StateStarting
|
|
}
|
|
if strings.Contains(status, "(unhealthy)") {
|
|
return StateUnhealthy
|
|
}
|
|
// "(healthy)" or no healthcheck = running
|
|
return StateRunning
|
|
|
|
case "exited":
|
|
return StateExited
|
|
case "restarting":
|
|
return StateRestarting
|
|
case "paused":
|
|
return StatePaused
|
|
case "created", "dead", "removing":
|
|
return StateStopped
|
|
default:
|
|
return StateUnknown
|
|
}
|
|
}
|
|
|
|
// aggregateState determines the overall stack state from its containers.
|
|
// Priority: unhealthy/starting > restarting > all-running > stopped
|
|
func aggregateState(containers []ContainerInfo) ContainerState {
|
|
if len(containers) == 0 {
|
|
return StateNotDeployed
|
|
}
|
|
|
|
running := 0
|
|
starting := 0
|
|
unhealthy := 0
|
|
restarting := 0
|
|
stopped := 0
|
|
|
|
for _, c := range containers {
|
|
switch c.State {
|
|
case StateRunning:
|
|
running++
|
|
case StateStarting:
|
|
starting++
|
|
case StateUnhealthy:
|
|
unhealthy++
|
|
case StateRestarting:
|
|
restarting++
|
|
case StateStopped, StateExited:
|
|
stopped++
|
|
}
|
|
}
|
|
|
|
total := len(containers)
|
|
|
|
// Any unhealthy → whole stack is unhealthy
|
|
if unhealthy > 0 {
|
|
return StateUnhealthy
|
|
}
|
|
// Any still starting → stack is starting
|
|
if starting > 0 {
|
|
return StateStarting
|
|
}
|
|
// Any restarting → stack is restarting
|
|
if restarting > 0 {
|
|
return StateRestarting
|
|
}
|
|
// All running (and healthy) → stack is running
|
|
if running == total {
|
|
return StateRunning
|
|
}
|
|
// All stopped → stack is stopped
|
|
if stopped == total {
|
|
return StateStopped
|
|
}
|
|
// Mix (some running, some stopped) — report as running (partial)
|
|
if running > 0 {
|
|
return StateRunning
|
|
}
|
|
|
|
return StateStopped
|
|
}
|
|
|
|
// --- Stack accessors ---
|
|
|
|
func (m *Manager) GetStacks() []Stack {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
result := make([]Stack, 0, len(m.stacks))
|
|
for _, s := range m.stacks {
|
|
result = append(result, *s)
|
|
}
|
|
|
|
// Sort alphabetically by display name for consistent UI ordering
|
|
sort.Slice(result, func(i, j int) bool {
|
|
return result[i].Meta.DisplayName < result[j].Meta.DisplayName
|
|
})
|
|
|
|
return result
|
|
}
|
|
|
|
func (m *Manager) GetStack(name string) (*Stack, bool) {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
s, ok := m.stacks[name]
|
|
if !ok {
|
|
return nil, false
|
|
}
|
|
copy := *s
|
|
return ©, true
|
|
}
|
|
|
|
// --- Stack operations ---
|
|
// StartStack, StopStack, etc. now load app.yaml env for deployed stacks.
|
|
|
|
func (m *Manager) StartStack(name string) error {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Starting stack: %s", name)
|
|
start := time.Now()
|
|
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
env := m.stackEnv(dir)
|
|
|
|
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s start failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("starting stack %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s started successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
m.logPostStartStatus(name, dir, env)
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) StopStack(name string) error {
|
|
if m.cfg.IsProtectedStack(name) {
|
|
return fmt.Errorf("stack %q is protected and cannot be stopped", name)
|
|
}
|
|
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stopping stack: %s", name)
|
|
start := time.Now()
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
|
|
if _, err := m.composeExec(dir, "down"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s stop failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("stopping stack %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s stopped successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) RestartStack(name string) error {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Restarting stack: %s", name)
|
|
start := time.Now()
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
env := m.stackEnv(dir)
|
|
|
|
// Use "up -d" instead of bare "restart" so that env vars from app.yaml
|
|
// are injected and any template changes (new images, healthchecks) are
|
|
// picked up. Plain "docker compose restart" only sends SIGTERM+start
|
|
// to existing containers without re-reading the compose file or env.
|
|
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s restart failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("restarting stack %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s restarted successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
m.logPostStartStatus(name, dir, env)
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) UpdateStack(name string) error {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Updating stack: %s", name)
|
|
start := time.Now()
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
env := m.stackEnv(dir)
|
|
|
|
if m.isDebug() {
|
|
m.checkLocalImages(name, dir)
|
|
}
|
|
|
|
if _, err := m.composeExecCustomEnv(dir, env, "pull"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s update (pull) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("pulling images for %s: %w", name, err)
|
|
}
|
|
|
|
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d", "--remove-orphans"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s update (up) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("recreating %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s updated successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
m.logPostStartStatus(name, dir, env)
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) GetLogs(name string, lines int) (string, error) {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return "", fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
if lines <= 0 {
|
|
lines = 100
|
|
}
|
|
if lines > 1000 {
|
|
lines = 1000
|
|
}
|
|
|
|
m.logger.Printf("[DEBUG] Fetching logs for %s (tail %d)", name, lines)
|
|
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
output, err := m.composeExec(dir, "logs", "--tail", fmt.Sprintf("%d", lines), "--no-color")
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Failed to fetch logs for %s: %v", name, err)
|
|
return "", fmt.Errorf("getting logs for %s: %w", name, err)
|
|
}
|
|
|
|
if len(output) == 0 {
|
|
m.logger.Printf("[DEBUG] Logs result for %s: 0 bytes returned (empty)", name)
|
|
} else {
|
|
m.logger.Printf("[DEBUG] Logs result for %s: %d bytes returned", name, len(output))
|
|
}
|
|
return output, nil
|
|
}
|
|
|
|
// --- Env and compose helpers ---
|
|
|
|
// stackEnv builds the full OS env slice for a stack, merging app.yaml values.
|
|
func (m *Manager) stackEnv(stackDir string) []string {
|
|
env := os.Environ()
|
|
|
|
// Always inject DOMAIN
|
|
env = append(env, fmt.Sprintf("DOMAIN=%s", m.cfg.Customer.Domain))
|
|
|
|
// Load app.yaml if it exists — merge its env vars (decrypted for docker-compose)
|
|
appCfg := LoadAppConfigDecrypted(stackDir, m.encKey)
|
|
if appCfg != nil {
|
|
for k, v := range appCfg.Env {
|
|
env = append(env, fmt.Sprintf("%s=%s", k, v))
|
|
}
|
|
}
|
|
|
|
return env
|
|
}
|
|
|
|
func (m *Manager) composeExec(dir string, args ...string) (string, error) {
|
|
return m.composeExecCustomEnv(dir, nil, args...)
|
|
}
|
|
|
|
func (m *Manager) composeExecCustomEnv(dir string, env []string, args ...string) (string, error) {
|
|
var cmd *exec.Cmd
|
|
|
|
if m.composeCmd == "docker compose" {
|
|
fullArgs := append([]string{"compose"}, args...)
|
|
cmd = exec.Command("docker", fullArgs...)
|
|
} else {
|
|
cmd = exec.Command("docker-compose", args...)
|
|
}
|
|
|
|
cmd.Dir = dir
|
|
|
|
if env != nil {
|
|
cmd.Env = env
|
|
} else {
|
|
env = m.stackEnv(dir)
|
|
cmd.Env = env
|
|
}
|
|
|
|
// Log env var keys at debug level
|
|
if m.isDebug() {
|
|
var appKeys []string
|
|
sysCount := 0
|
|
for _, e := range env {
|
|
parts := strings.SplitN(e, "=", 2)
|
|
if len(parts) == 2 {
|
|
key := parts[0]
|
|
// Only log non-system env vars (skip PATH, HOME, etc.)
|
|
if strings.ToUpper(key) == key && !strings.HasPrefix(key, "_") {
|
|
appKeys = append(appKeys, key)
|
|
} else {
|
|
sysCount++
|
|
}
|
|
}
|
|
}
|
|
if len(appKeys) > 0 {
|
|
m.logger.Printf("[DEBUG] Env vars for compose: [%s] (%d app + %d system)",
|
|
strings.Join(appKeys, ", "), len(appKeys), sysCount)
|
|
}
|
|
}
|
|
|
|
var stdout, stderr bytes.Buffer
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
|
|
m.logger.Printf("[DEBUG] Running: %s %s (in %s)", m.composeCmd, strings.Join(args, " "), dir)
|
|
|
|
start := time.Now()
|
|
if err := cmd.Run(); err != nil {
|
|
elapsed := time.Since(start)
|
|
exitCode := -1
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
exitCode = exitErr.ExitCode()
|
|
}
|
|
m.logger.Printf("[ERROR] Command failed: %s %s (in %s) — exit code %d (took %.1fs)",
|
|
m.composeCmd, strings.Join(args, " "), dir, exitCode, elapsed.Seconds())
|
|
if stdoutStr := truncateStr(stdout.String(), 500); stdoutStr != "" {
|
|
m.logger.Printf("[ERROR] stdout: %s", stdoutStr)
|
|
}
|
|
if stderrStr := truncateStr(stderr.String(), 500); stderrStr != "" {
|
|
m.logger.Printf("[ERROR] stderr: %s", stderrStr)
|
|
}
|
|
return stdout.String(), fmt.Errorf("exit code %d\nstderr: %s", exitCode, truncateStr(stderr.String(), 500))
|
|
}
|
|
|
|
m.logger.Printf("[DEBUG] Command completed: %s %s (took %.1fs)", m.composeCmd, strings.Join(args, " "), time.Since(start).Seconds())
|
|
return stdout.String(), nil
|
|
}
|
|
|
|
func (m *Manager) execCommand(name string, args ...string) (string, error) {
|
|
cmd := exec.Command(name, args...)
|
|
|
|
var stdout, stderr bytes.Buffer
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
return "", fmt.Errorf("exec %s %s: %w\nstderr: %s", name, strings.Join(args, " "), err, stderr.String())
|
|
}
|
|
|
|
return stdout.String(), nil
|
|
}
|
|
|
|
// isDebug returns true if logging level is "debug".
|
|
func (m *Manager) isDebug() bool {
|
|
return m.cfg.Logging.Level == "debug"
|
|
}
|
|
|
|
// truncateStr truncates a string to maxLen characters, appending "..." if truncated.
|
|
func truncateStr(s string, maxLen int) string {
|
|
s = strings.TrimSpace(s)
|
|
if len(s) <= maxLen {
|
|
return s
|
|
}
|
|
return s[:maxLen] + "..."
|
|
}
|
|
|
|
// logPostStartStatus queries container states after a start/deploy operation
|
|
// and logs them. This runs asynchronously to avoid blocking the HTTP response.
|
|
func (m *Manager) logPostStartStatus(name, stackDir string, env []string) {
|
|
envCopy := make([]string, len(env))
|
|
copy(envCopy, env)
|
|
go func() {
|
|
time.Sleep(3 * time.Second)
|
|
|
|
output, err := m.composeExecCustomEnv(stackDir, envCopy, "ps", "-a", "--format", "table {{.Name}}\t{{.Image}}\t{{.State}}\t{{.Status}}")
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Post-start status check failed for %s: %v", name, err)
|
|
return
|
|
}
|
|
|
|
lines := strings.Split(strings.TrimSpace(output), "\n")
|
|
if len(lines) <= 1 {
|
|
m.logger.Printf("[WARN] Post-start status for %s: no containers found", name)
|
|
return
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s post-start status:", name)
|
|
// Skip header line
|
|
for _, line := range lines[1:] {
|
|
m.logger.Printf("[INFO] %s", line)
|
|
}
|
|
}()
|
|
}
|
|
|
|
// checkLocalImages parses docker-compose.yml for image: lines and checks which
|
|
// are available locally. Informational only — logs results but never fails.
|
|
func (m *Manager) checkLocalImages(name, stackDir string) {
|
|
composePath := filepath.Join(stackDir, "docker-compose.yml")
|
|
data, err := os.ReadFile(composePath)
|
|
if err != nil {
|
|
composePath = filepath.Join(stackDir, "docker-compose.yaml")
|
|
data, err = os.ReadFile(composePath)
|
|
if err != nil {
|
|
m.logger.Printf("[DEBUG] Could not read compose file for image check: %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
var images []string
|
|
for _, line := range strings.Split(string(data), "\n") {
|
|
trimmed := strings.TrimSpace(line)
|
|
if strings.HasPrefix(trimmed, "image:") {
|
|
img := strings.TrimSpace(strings.TrimPrefix(trimmed, "image:"))
|
|
img = strings.Trim(img, "\"'")
|
|
if img != "" && !strings.Contains(img, "${") {
|
|
images = append(images, img)
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(images) == 0 {
|
|
m.logger.Printf("[DEBUG] No static image references found in %s compose file", name)
|
|
return
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Deploying stack %s — checking %d images...", name, len(images))
|
|
for _, img := range images {
|
|
cmd := exec.Command("docker", "image", "inspect", img)
|
|
if err := cmd.Run(); err != nil {
|
|
m.logger.Printf("[DEBUG] %s — not found locally, will pull", img)
|
|
} else {
|
|
m.logger.Printf("[DEBUG] %s — found locally", img)
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- Memory helpers ---
|
|
|
|
// ParseMemoryMB parses a memory string like "500M", "1G", "1.5G", "1024M", "768"
|
|
// into megabytes. Returns 0 for empty or unparseable values. Case-insensitive.
|
|
func ParseMemoryMB(s string) int {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" {
|
|
return 0
|
|
}
|
|
upper := strings.ToUpper(s)
|
|
|
|
if strings.HasSuffix(upper, "GB") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "GB"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val * 1024)
|
|
}
|
|
if strings.HasSuffix(upper, "G") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "G"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val * 1024)
|
|
}
|
|
if strings.HasSuffix(upper, "MB") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "MB"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val)
|
|
}
|
|
if strings.HasSuffix(upper, "M") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "M"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val)
|
|
}
|
|
|
|
// Plain number — assume MB
|
|
val, err := strconv.ParseFloat(s, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val)
|
|
}
|
|
|
|
// CommittedMemory returns the sum of mem_request and mem_limit across all
|
|
// deployed stacks that are currently running (or starting/unhealthy/restarting).
|
|
// Stopped and exited apps are excluded since they do not consume memory.
|
|
func (m *Manager) CommittedMemory() (requestMB int, limitMB int) {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
for _, s := range m.stacks {
|
|
if !s.Deployed {
|
|
continue
|
|
}
|
|
if s.State == StateStopped || s.State == StateExited {
|
|
continue
|
|
}
|
|
requestMB += ParseMemoryMB(s.Meta.Resources.MemRequest)
|
|
limitMB += ParseMemoryMB(s.Meta.Resources.MemLimit)
|
|
}
|
|
return
|
|
}
|
|
|
|
// StackMemoryMB returns the mem_request for a specific stack.
|
|
func (m *Manager) StackMemoryMB(name string) int {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
if s, ok := m.stacks[name]; ok {
|
|
return ParseMemoryMB(s.Meta.Resources.MemRequest)
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// getCatalogTemplateSlugs reads the synced catalog cache and returns a set of
|
|
// template slugs (directory names) that have a docker-compose.yml.
|
|
func (m *Manager) getCatalogTemplateSlugs() map[string]bool {
|
|
cacheDir := filepath.Join(m.cfg.Paths.DataDir, "catalog-cache", "templates")
|
|
entries, err := os.ReadDir(cacheDir)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Cannot read catalog cache for orphan detection: %v", err)
|
|
return nil
|
|
}
|
|
slugs := make(map[string]bool, len(entries))
|
|
for _, e := range entries {
|
|
if e.IsDir() {
|
|
composePath := filepath.Join(cacheDir, e.Name(), "docker-compose.yml")
|
|
if _, err := os.Stat(composePath); err == nil {
|
|
slugs[e.Name()] = true
|
|
}
|
|
}
|
|
}
|
|
return slugs
|
|
} |