Files
deploy-felhom-compose/controller/internal/stacks/manager.go
T
admin 2e9634e50f health-probes: clear stale results on start/restart, fast 10s probing until healthy
- Clear HealthProbe on StartStack/RestartStack so stale unhealthy state
  isn't re-applied by RefreshStatus
- Use 10s probe interval for unhealthy/new stacks (nil HealthProbe probes
  immediately on next tick), switch to normal 5m interval once healthy
- Scheduler frequency 1m → 10s to support fast probing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 14:59:25 +01:00

995 lines
28 KiB
Go

package stacks
import (
"bytes"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/crypto"
)
// ContainerState represents the current state of a container.
type ContainerState string
const (
StateRunning ContainerState = "running"
StateStarting ContainerState = "starting" // running but health: starting
StateUnhealthy ContainerState = "unhealthy" // running but health: unhealthy
StateStopped ContainerState = "stopped"
StateRestarting ContainerState = "restarting"
StateExited ContainerState = "exited"
StatePaused ContainerState = "paused"
StateUnknown ContainerState = "unknown"
StateNotDeployed ContainerState = "not_deployed"
StateDeploying ContainerState = "deploying" // compose up in progress (image pull, etc.)
StateOrphaned ContainerState = "orphaned"
)
// ContainerInfo holds status info about a single container within a stack.
type ContainerInfo struct {
Name string `json:"name"`
Image string `json:"image"`
State ContainerState `json:"state"`
Status string `json:"status"` // e.g. "Up 3 hours (healthy)"
}
// HealthProbeResult holds the latest controller-side health probe result.
type HealthProbeResult struct {
Healthy bool `json:"healthy"`
LastCheck time.Time `json:"last_check"`
Details []HealthCheckDetail `json:"details"`
}
// HealthCheckDetail holds the result of a single health check item.
type HealthCheckDetail struct {
Type string `json:"type"` // "http", "api", "tcp"
Target string `json:"target"` // e.g. ":3456/api/v1/info"
Healthy bool `json:"healthy"`
Status int `json:"status,omitempty"` // HTTP status code (for http/api)
Latency string `json:"latency"` // e.g. "45ms"
Error string `json:"error,omitempty"` // error message if unhealthy
}
// Stack represents a docker compose stack on disk.
type Stack struct {
Name string `json:"name"`
Meta Metadata `json:"meta"`
ComposePath string `json:"compose_path"`
State ContainerState `json:"state"`
Deployed bool `json:"deployed"` // Has app.yaml with deployed=true
Protected bool `json:"protected"`
Orphaned bool `json:"orphaned"` // Deployed but no catalog template
Containers []ContainerInfo `json:"containers"`
AppConfig *AppConfig `json:"app_config,omitempty"`
Deploying bool `json:"deploying"` // compose up in progress
DeployError string `json:"deploy_error,omitempty"` // last async deploy error
HealthProbe *HealthProbeResult `json:"health_probe,omitempty"` // controller-side probe result
LastUpdated time.Time `json:"last_updated"`
}
// Manager handles all docker compose stack operations.
type Manager struct {
cfg *config.Config
logger *log.Logger
composeCmd string
stacks map[string]*Stack
mu sync.RWMutex
encKey []byte // AES-256 key for encrypting sensitive values in app.yaml
}
// NewManager creates a new stack manager.
func NewManager(cfg *config.Config, logger *log.Logger) (*Manager, error) {
composeCmd := cfg.Stacks.ComposeCommand
if composeCmd == "" {
composeCmd = detectComposeCommand()
}
if composeCmd == "" {
return nil, fmt.Errorf("docker compose not found (tried 'docker compose' and 'docker-compose')")
}
logger.Printf("[INFO] Using compose command: %s", composeCmd)
if err := os.MkdirAll(cfg.Paths.StacksDir, 0755); err != nil {
return nil, fmt.Errorf("creating stacks directory %s: %w", cfg.Paths.StacksDir, err)
}
return &Manager{
cfg: cfg,
logger: logger,
composeCmd: composeCmd,
stacks: make(map[string]*Stack),
}, nil
}
// SetEncryptionKey sets the AES-256 key used to encrypt/decrypt sensitive values in app.yaml.
func (m *Manager) SetEncryptionKey(key []byte) {
m.mu.Lock()
defer m.mu.Unlock()
m.encKey = key
}
// MigrateEncryption re-saves app.yaml for deployed stacks that still have
// plaintext values in sensitive fields. Called once on startup.
func (m *Manager) MigrateEncryption() {
m.mu.Lock()
defer m.mu.Unlock()
if m.encKey == nil {
return
}
migrated := 0
for _, s := range m.stacks {
if !s.Deployed {
continue
}
stackDir := filepath.Dir(s.ComposePath)
appCfg := LoadAppConfig(stackDir)
if appCfg == nil {
continue
}
meta := LoadMetadata(stackDir)
sensitive := SensitiveEnvVars(&meta)
if len(sensitive) == 0 {
continue
}
needsMigration := false
for _, envVar := range sensitive {
if v, ok := appCfg.Env[envVar]; ok && v != "" && !crypto.IsEncrypted(v) {
needsMigration = true
break
}
}
if needsMigration {
if err := SaveAppConfig(stackDir, appCfg, m.encKey, sensitive); err != nil {
m.logger.Printf("[WARN] Encryption migration failed for %s: %v", s.Name, err)
} else {
migrated++
}
}
}
if migrated > 0 {
m.logger.Printf("[INFO] Encrypted sensitive values in %d app.yaml file(s)", migrated)
}
}
// toTitleCase capitalizes the first letter of each word.
func toTitleCase(s string) string {
words := strings.Fields(s)
for i, w := range words {
if len(w) > 0 {
words[i] = strings.ToUpper(w[:1]) + w[1:]
}
}
return strings.Join(words, " ")
}
func detectComposeCommand() string {
if err := exec.Command("docker", "compose", "version").Run(); err == nil {
return "docker compose"
}
if _, err := exec.LookPath("docker-compose"); err == nil {
return "docker-compose"
}
return ""
}
// DeployedStackNames returns the names of all deployed stacks.
func (m *Manager) DeployedStackNames() []string {
m.mu.RLock()
defer m.mu.RUnlock()
var names []string
for name, stack := range m.stacks {
if stack.Deployed {
names = append(names, name)
}
}
return names
}
// ScanStacks discovers all compose stacks in the stacks directory.
func (m *Manager) ScanStacks() error {
m.mu.Lock()
defer m.mu.Unlock()
entries, err := os.ReadDir(m.cfg.Paths.StacksDir)
if err != nil {
return fmt.Errorf("reading stacks directory: %w", err)
}
found := make(map[string]bool)
for _, entry := range entries {
if !entry.IsDir() {
continue
}
name := entry.Name()
stackDir := filepath.Join(m.cfg.Paths.StacksDir, name)
composePath := filepath.Join(stackDir, "docker-compose.yml")
if _, err := os.Stat(composePath); os.IsNotExist(err) {
composePath = filepath.Join(stackDir, "docker-compose.yaml")
if _, err := os.Stat(composePath); os.IsNotExist(err) {
continue
}
}
found[name] = true
meta := LoadMetadata(stackDir)
appCfg := LoadAppConfig(stackDir)
deployed := appCfg != nil && appCfg.Deployed
if existing, ok := m.stacks[name]; ok {
existing.ComposePath = composePath
existing.Meta = meta
existing.Protected = m.cfg.IsProtectedStack(name)
// Don't overwrite Deployed/AppConfig while an async deploy is in
// progress — the goroutine manages these fields (H3 fix).
if !existing.Deploying {
existing.Deployed = deployed
existing.AppConfig = appCfg
}
} else {
m.stacks[name] = &Stack{
Name: name,
Meta: meta,
ComposePath: composePath,
State: StateNotDeployed,
Deployed: deployed,
Protected: m.cfg.IsProtectedStack(name),
AppConfig: appCfg,
}
}
}
// Remove stacks no longer on disk
for name := range m.stacks {
if !found[name] {
delete(m.stacks, name)
}
}
// Detect orphaned stacks (deployed but no longer in catalog)
catalogTemplates := m.getCatalogTemplateSlugs()
if catalogTemplates != nil {
orphanCount := 0
for _, stack := range m.stacks {
if stack.Protected || !stack.Deployed {
stack.Orphaned = false
continue
}
stack.Orphaned = !catalogTemplates[stack.Name]
if stack.Orphaned {
orphanCount++
}
}
if orphanCount > 0 {
m.logger.Printf("[INFO] Detected %d orphaned stack(s)", orphanCount)
}
}
deployedCount := 0
for _, s := range m.stacks {
if s.Deployed {
deployedCount++
}
}
m.logger.Printf("[INFO] Scanned stacks: %d found (%d deployed, %d available)",
len(m.stacks), deployedCount, len(m.stacks)-deployedCount)
return m.refreshStatusLocked()
}
// RefreshStatus updates container status for all known stacks.
func (m *Manager) RefreshStatus() error {
m.mu.Lock()
defer m.mu.Unlock()
return m.refreshStatusLocked()
}
func (m *Manager) refreshStatusLocked() error {
output, err := m.execCommand("docker", "ps", "-a",
"--format", "{{.Names}}\t{{.Image}}\t{{.State}}\t{{.Status}}\t{{.Label \"com.docker.compose.project\"}}",
"--no-trunc")
if err != nil {
return fmt.Errorf("docker ps: %w", err)
}
projectContainers := make(map[string][]ContainerInfo)
for _, line := range strings.Split(strings.TrimSpace(output), "\n") {
if line == "" {
continue
}
parts := strings.SplitN(line, "\t", 5)
if len(parts) < 5 || parts[4] == "" {
continue
}
ci := ContainerInfo{
Name: parts[0],
Image: parts[1],
State: resolveContainerState(parts[2], parts[3]),
Status: parts[3],
}
projectContainers[parts[4]] = append(projectContainers[parts[4]], ci)
}
for name, stack := range m.stacks {
containers, exists := projectContainers[name]
if !exists {
stack.Containers = nil
if stack.Deploying {
stack.State = StateDeploying
} else if stack.Deployed {
stack.State = StateStopped
} else {
stack.State = StateNotDeployed
}
} else {
stack.Containers = containers
stack.State = aggregateState(containers)
}
// Re-apply controller-side health probe results: if the last probe
// failed and Docker thinks the container is running, override to unhealthy.
if stack.State == StateRunning && stack.HealthProbe != nil && !stack.HealthProbe.Healthy {
stack.State = StateUnhealthy
}
stack.LastUpdated = time.Now()
}
return nil
}
// resolveContainerState determines the effective state by combining Docker's
// State field (running/exited/etc.) with the Status field that contains health info.
//
// Docker State: "running", "exited", "restarting", "paused", "created", "dead", "removing"
// Docker Status: "Up 3 hours (healthy)", "Up 9 seconds (health: starting)", "Up 2 min (unhealthy)"
func resolveContainerState(dockerState, dockerStatus string) ContainerState {
state := strings.ToLower(strings.TrimSpace(dockerState))
status := strings.ToLower(dockerStatus)
switch state {
case "running":
// Check health sub-status for containers with healthchecks
if strings.Contains(status, "(health: starting)") {
return StateStarting
}
if strings.Contains(status, "(unhealthy)") {
return StateUnhealthy
}
// "(healthy)" or no healthcheck = running
return StateRunning
case "exited":
return StateExited
case "restarting":
return StateRestarting
case "paused":
return StatePaused
case "created", "dead", "removing":
return StateStopped
default:
return StateUnknown
}
}
// aggregateState determines the overall stack state from its containers.
// Priority: unhealthy/starting > restarting > all-running > stopped
func aggregateState(containers []ContainerInfo) ContainerState {
if len(containers) == 0 {
return StateNotDeployed
}
running := 0
starting := 0
unhealthy := 0
restarting := 0
stopped := 0
for _, c := range containers {
switch c.State {
case StateRunning:
running++
case StateStarting:
starting++
case StateUnhealthy:
unhealthy++
case StateRestarting:
restarting++
case StateStopped, StateExited:
stopped++
}
}
total := len(containers)
// Any unhealthy → whole stack is unhealthy
if unhealthy > 0 {
return StateUnhealthy
}
// Any still starting → stack is starting
if starting > 0 {
return StateStarting
}
// Any restarting → stack is restarting
if restarting > 0 {
return StateRestarting
}
// All running (and healthy) → stack is running
if running == total {
return StateRunning
}
// All stopped → stack is stopped
if stopped == total {
return StateStopped
}
// Mix (some running, some stopped) — report as running (partial)
if running > 0 {
return StateRunning
}
return StateStopped
}
// --- Stack accessors ---
func (m *Manager) GetStacks() []Stack {
m.mu.RLock()
defer m.mu.RUnlock()
result := make([]Stack, 0, len(m.stacks))
for _, s := range m.stacks {
result = append(result, deepCopyStack(s))
}
// Sort alphabetically by display name for consistent UI ordering
sort.Slice(result, func(i, j int) bool {
return result[i].Meta.DisplayName < result[j].Meta.DisplayName
})
return result
}
func (m *Manager) GetStack(name string) (*Stack, bool) {
m.mu.RLock()
defer m.mu.RUnlock()
s, ok := m.stacks[name]
if !ok {
return nil, false
}
cp := deepCopyStack(s)
return &cp, true
}
// deepCopyStack creates a deep copy of a Stack, including pointer fields.
func deepCopyStack(s *Stack) Stack {
cp := *s
// Deep-copy Containers slice
if s.Containers != nil {
cp.Containers = make([]ContainerInfo, len(s.Containers))
copy(cp.Containers, s.Containers)
}
// Deep-copy AppConfig pointer
if s.AppConfig != nil {
acCopy := *s.AppConfig
if s.AppConfig.Env != nil {
acCopy.Env = make(map[string]string, len(s.AppConfig.Env))
for k, v := range s.AppConfig.Env {
acCopy.Env[k] = v
}
}
if s.AppConfig.LockedFields != nil {
acCopy.LockedFields = make([]string, len(s.AppConfig.LockedFields))
copy(acCopy.LockedFields, s.AppConfig.LockedFields)
}
cp.AppConfig = &acCopy
}
// Deep-copy HealthProbe pointer
if s.HealthProbe != nil {
hpCopy := *s.HealthProbe
if s.HealthProbe.Details != nil {
hpCopy.Details = make([]HealthCheckDetail, len(s.HealthProbe.Details))
copy(hpCopy.Details, s.HealthProbe.Details)
}
cp.HealthProbe = &hpCopy
}
// Deep-copy Meta.DeployFields slice (including nested Options)
if s.Meta.DeployFields != nil {
cp.Meta.DeployFields = make([]DeployField, len(s.Meta.DeployFields))
copy(cp.Meta.DeployFields, s.Meta.DeployFields)
for i, f := range s.Meta.DeployFields {
if f.Options != nil {
cp.Meta.DeployFields[i].Options = make([]SelectOption, len(f.Options))
copy(cp.Meta.DeployFields[i].Options, f.Options)
}
}
}
// Deep-copy Meta.OptionalConfig (slice of groups with nested Fields slices)
if s.Meta.OptionalConfig != nil {
cp.Meta.OptionalConfig = make([]OptionalConfigGroup, len(s.Meta.OptionalConfig))
copy(cp.Meta.OptionalConfig, s.Meta.OptionalConfig)
for i, g := range s.Meta.OptionalConfig {
if g.Fields != nil {
cp.Meta.OptionalConfig[i].Fields = make([]OptionalConfigField, len(g.Fields))
copy(cp.Meta.OptionalConfig[i].Fields, g.Fields)
}
}
}
// Deep-copy Meta.HealthCheck pointer
if s.Meta.HealthCheck != nil {
hcCopy := *s.Meta.HealthCheck
if s.Meta.HealthCheck.Checks != nil {
hcCopy.Checks = make([]HealthCheckItem, len(s.Meta.HealthCheck.Checks))
copy(hcCopy.Checks, s.Meta.HealthCheck.Checks)
for i, c := range s.Meta.HealthCheck.Checks {
if c.Expect != nil {
eCopy := *c.Expect
hcCopy.Checks[i].Expect = &eCopy
}
}
}
cp.Meta.HealthCheck = &hcCopy
}
return cp
}
// --- Stack operations ---
// StartStack, StopStack, etc. now load app.yaml env for deployed stacks.
func (m *Manager) StartStack(name string) error {
stack, ok := m.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
m.logger.Printf("[INFO] Starting stack: %s", name)
start := time.Now()
dir := filepath.Dir(stack.ComposePath)
env := m.stackEnv(dir)
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
m.logger.Printf("[ERROR] Stack %s start failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("starting stack %s: %w", name, err)
}
m.logger.Printf("[INFO] Stack %s started successfully (took %.1fs)", name, time.Since(start).Seconds())
m.logPostStartStatus(name, dir, env)
// Clear stale health probe so refreshStatus won't re-apply an old unhealthy override.
// The next health-probes tick (≤10s) will run a fresh probe.
m.mu.Lock()
if s, ok := m.stacks[name]; ok {
s.HealthProbe = nil
}
m.mu.Unlock()
return m.RefreshStatus()
}
func (m *Manager) StopStack(name string) error {
if m.cfg.IsProtectedStack(name) {
return fmt.Errorf("stack %q is protected and cannot be stopped", name)
}
stack, ok := m.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
m.logger.Printf("[INFO] Stopping stack: %s", name)
start := time.Now()
dir := filepath.Dir(stack.ComposePath)
if _, err := m.composeExec(dir, "down"); err != nil {
m.logger.Printf("[ERROR] Stack %s stop failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("stopping stack %s: %w", name, err)
}
m.logger.Printf("[INFO] Stack %s stopped successfully (took %.1fs)", name, time.Since(start).Seconds())
return m.RefreshStatus()
}
func (m *Manager) RestartStack(name string) error {
stack, ok := m.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
m.logger.Printf("[INFO] Restarting stack: %s", name)
start := time.Now()
dir := filepath.Dir(stack.ComposePath)
env := m.stackEnv(dir)
// Use "up -d" instead of bare "restart" so that env vars from app.yaml
// are injected and any template changes (new images, healthchecks) are
// picked up. Plain "docker compose restart" only sends SIGTERM+start
// to existing containers without re-reading the compose file or env.
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
m.logger.Printf("[ERROR] Stack %s restart failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("restarting stack %s: %w", name, err)
}
m.logger.Printf("[INFO] Stack %s restarted successfully (took %.1fs)", name, time.Since(start).Seconds())
m.logPostStartStatus(name, dir, env)
// Clear stale health probe so refreshStatus won't re-apply an old unhealthy override.
m.mu.Lock()
if s, ok := m.stacks[name]; ok {
s.HealthProbe = nil
}
m.mu.Unlock()
return m.RefreshStatus()
}
func (m *Manager) UpdateStack(name string) error {
stack, ok := m.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
m.logger.Printf("[INFO] Updating stack: %s", name)
start := time.Now()
dir := filepath.Dir(stack.ComposePath)
env := m.stackEnv(dir)
if m.isDebug() {
m.checkLocalImages(name, dir)
}
if _, err := m.composeExecCustomEnv(dir, env, "pull"); err != nil {
m.logger.Printf("[ERROR] Stack %s update (pull) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("pulling images for %s: %w", name, err)
}
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d", "--remove-orphans"); err != nil {
m.logger.Printf("[ERROR] Stack %s update (up) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("recreating %s: %w", name, err)
}
m.logger.Printf("[INFO] Stack %s updated successfully (took %.1fs)", name, time.Since(start).Seconds())
m.logPostStartStatus(name, dir, env)
return m.RefreshStatus()
}
func (m *Manager) GetLogs(name string, lines int) (string, error) {
stack, ok := m.GetStack(name)
if !ok {
return "", fmt.Errorf("stack %q not found", name)
}
if lines <= 0 {
lines = 100
}
if lines > 1000 {
lines = 1000
}
m.logger.Printf("[DEBUG] Fetching logs for %s (tail %d)", name, lines)
dir := filepath.Dir(stack.ComposePath)
output, err := m.composeExec(dir, "logs", "--tail", fmt.Sprintf("%d", lines), "--no-color")
if err != nil {
m.logger.Printf("[WARN] Failed to fetch logs for %s: %v", name, err)
return "", fmt.Errorf("getting logs for %s: %w", name, err)
}
if len(output) == 0 {
m.logger.Printf("[DEBUG] Logs result for %s: 0 bytes returned (empty)", name)
} else {
m.logger.Printf("[DEBUG] Logs result for %s: %d bytes returned", name, len(output))
}
return output, nil
}
// --- Env and compose helpers ---
// stackEnv builds the full OS env slice for a stack, merging app.yaml values.
func (m *Manager) stackEnv(stackDir string) []string {
env := os.Environ()
// Always inject DOMAIN
env = append(env, fmt.Sprintf("DOMAIN=%s", m.cfg.Customer.Domain))
// Load app.yaml if it exists — merge its env vars (decrypted for docker-compose)
appCfg := LoadAppConfigDecrypted(stackDir, m.encKey)
if appCfg != nil {
for k, v := range appCfg.Env {
env = append(env, fmt.Sprintf("%s=%s", k, v))
}
}
return env
}
func (m *Manager) composeExec(dir string, args ...string) (string, error) {
return m.composeExecCustomEnv(dir, nil, args...)
}
func (m *Manager) composeExecCustomEnv(dir string, env []string, args ...string) (string, error) {
var cmd *exec.Cmd
if m.composeCmd == "docker compose" {
fullArgs := append([]string{"compose"}, args...)
cmd = exec.Command("docker", fullArgs...)
} else {
cmd = exec.Command("docker-compose", args...)
}
cmd.Dir = dir
if env != nil {
cmd.Env = env
} else {
env = m.stackEnv(dir)
cmd.Env = env
}
// Log env var keys at debug level
if m.isDebug() {
var appKeys []string
sysCount := 0
for _, e := range env {
parts := strings.SplitN(e, "=", 2)
if len(parts) == 2 {
key := parts[0]
// Only log non-system env vars (skip PATH, HOME, etc.)
if strings.ToUpper(key) == key && !strings.HasPrefix(key, "_") {
appKeys = append(appKeys, key)
} else {
sysCount++
}
}
}
if len(appKeys) > 0 {
m.logger.Printf("[DEBUG] Env vars for compose: [%s] (%d app + %d system)",
strings.Join(appKeys, ", "), len(appKeys), sysCount)
}
}
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
m.logger.Printf("[DEBUG] Running: %s %s (in %s)", m.composeCmd, strings.Join(args, " "), dir)
start := time.Now()
if err := cmd.Run(); err != nil {
elapsed := time.Since(start)
exitCode := -1
if exitErr, ok := err.(*exec.ExitError); ok {
exitCode = exitErr.ExitCode()
}
m.logger.Printf("[ERROR] Command failed: %s %s (in %s) — exit code %d (took %.1fs)",
m.composeCmd, strings.Join(args, " "), dir, exitCode, elapsed.Seconds())
if stdoutStr := truncateStr(stdout.String(), 500); stdoutStr != "" {
m.logger.Printf("[ERROR] stdout: %s", stdoutStr)
}
if stderrStr := truncateStr(stderr.String(), 500); stderrStr != "" {
m.logger.Printf("[ERROR] stderr: %s", stderrStr)
}
return stdout.String(), fmt.Errorf("exit code %d\nstderr: %s", exitCode, truncateStr(stderr.String(), 500))
}
m.logger.Printf("[DEBUG] Command completed: %s %s (took %.1fs)", m.composeCmd, strings.Join(args, " "), time.Since(start).Seconds())
return stdout.String(), nil
}
func (m *Manager) execCommand(name string, args ...string) (string, error) {
cmd := exec.Command(name, args...)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return "", fmt.Errorf("exec %s %s: %w\nstderr: %s", name, strings.Join(args, " "), err, stderr.String())
}
return stdout.String(), nil
}
// isDebug returns true if logging level is "debug".
func (m *Manager) isDebug() bool {
return m.cfg.Logging.Level == "debug"
}
// truncateStr truncates a string to maxLen characters, appending "..." if truncated.
func truncateStr(s string, maxLen int) string {
s = strings.TrimSpace(s)
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
// logPostStartStatus queries container states after a start/deploy operation
// and logs them. This runs asynchronously to avoid blocking the HTTP response.
func (m *Manager) logPostStartStatus(name, stackDir string, env []string) {
envCopy := make([]string, len(env))
copy(envCopy, env)
go func() {
time.Sleep(3 * time.Second)
output, err := m.composeExecCustomEnv(stackDir, envCopy, "ps", "-a", "--format", "table {{.Name}}\t{{.Image}}\t{{.State}}\t{{.Status}}")
if err != nil {
m.logger.Printf("[WARN] Post-start status check failed for %s: %v", name, err)
return
}
lines := strings.Split(strings.TrimSpace(output), "\n")
if len(lines) <= 1 {
m.logger.Printf("[WARN] Post-start status for %s: no containers found", name)
return
}
m.logger.Printf("[INFO] Stack %s post-start status:", name)
// Skip header line
for _, line := range lines[1:] {
m.logger.Printf("[INFO] %s", line)
}
}()
}
// checkLocalImages parses docker-compose.yml for image: lines and checks which
// are available locally. Informational only — logs results but never fails.
func (m *Manager) checkLocalImages(name, stackDir string) {
composePath := filepath.Join(stackDir, "docker-compose.yml")
data, err := os.ReadFile(composePath)
if err != nil {
composePath = filepath.Join(stackDir, "docker-compose.yaml")
data, err = os.ReadFile(composePath)
if err != nil {
m.logger.Printf("[DEBUG] Could not read compose file for image check: %v", err)
return
}
}
var images []string
for _, line := range strings.Split(string(data), "\n") {
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, "image:") {
img := strings.TrimSpace(strings.TrimPrefix(trimmed, "image:"))
img = strings.Trim(img, "\"'")
if img != "" && !strings.Contains(img, "${") {
images = append(images, img)
}
}
}
if len(images) == 0 {
m.logger.Printf("[DEBUG] No static image references found in %s compose file", name)
return
}
m.logger.Printf("[INFO] Deploying stack %s — checking %d images...", name, len(images))
for _, img := range images {
cmd := exec.Command("docker", "image", "inspect", img)
if err := cmd.Run(); err != nil {
m.logger.Printf("[DEBUG] %s — not found locally, will pull", img)
} else {
m.logger.Printf("[DEBUG] %s — found locally", img)
}
}
}
// --- Memory helpers ---
// ParseMemoryMB parses a memory string like "500M", "1G", "1.5G", "1024M", "768"
// into megabytes. Returns 0 for empty or unparseable values. Case-insensitive.
func ParseMemoryMB(s string) int {
s = strings.TrimSpace(s)
if s == "" {
return 0
}
upper := strings.ToUpper(s)
if strings.HasSuffix(upper, "GB") {
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "GB"), 64)
if err != nil {
return 0
}
return int(val * 1024)
}
if strings.HasSuffix(upper, "G") {
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "G"), 64)
if err != nil {
return 0
}
return int(val * 1024)
}
if strings.HasSuffix(upper, "MB") {
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "MB"), 64)
if err != nil {
return 0
}
return int(val)
}
if strings.HasSuffix(upper, "M") {
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "M"), 64)
if err != nil {
return 0
}
return int(val)
}
// Plain number — assume MB
val, err := strconv.ParseFloat(s, 64)
if err != nil {
return 0
}
return int(val)
}
// CommittedMemory returns the sum of mem_request and mem_limit across all
// deployed stacks that are currently running (or starting/unhealthy/restarting).
// Stopped and exited apps are excluded since they do not consume memory.
func (m *Manager) CommittedMemory() (requestMB int, limitMB int) {
m.mu.RLock()
defer m.mu.RUnlock()
for _, s := range m.stacks {
if !s.Deployed {
continue
}
if s.State == StateStopped || s.State == StateExited {
continue
}
requestMB += ParseMemoryMB(s.Meta.Resources.MemRequest)
limitMB += ParseMemoryMB(s.Meta.Resources.MemLimit)
}
return
}
// StackMemoryMB returns the mem_request for a specific stack.
func (m *Manager) StackMemoryMB(name string) int {
m.mu.RLock()
defer m.mu.RUnlock()
if s, ok := m.stacks[name]; ok {
return ParseMemoryMB(s.Meta.Resources.MemRequest)
}
return 0
}
// getCatalogTemplateSlugs reads the synced catalog cache and returns a set of
// template slugs (directory names) that have a docker-compose.yml.
func (m *Manager) getCatalogTemplateSlugs() map[string]bool {
cacheDir := filepath.Join(m.cfg.Paths.DataDir, "catalog-cache", "templates")
entries, err := os.ReadDir(cacheDir)
if err != nil {
m.logger.Printf("[WARN] Cannot read catalog cache for orphan detection: %v", err)
return nil
}
slugs := make(map[string]bool, len(entries))
for _, e := range entries {
if e.IsDir() {
composePath := filepath.Join(cacheDir, e.Name(), "docker-compose.yml")
if _, err := os.Stat(composePath); err == nil {
slugs[e.Name()] = true
}
}
}
return slugs
}