Files
deploy-felhom-compose/controller/internal/stacks/manager.go
T
admin 95c821deb2 feat: comprehensive debug logging across all controller modules
Add detailed [DEBUG] logging to every controller module when
logging.level is set to "debug". Each module with stateful debug
uses SetDebug(bool) wired from main.go. Covers stacks, backup,
cloudflare, integrations, system, monitor, settings, scheduler,
web handlers, storage, metrics, API, selfupdate, and assets.

Also includes the app export/import (.fab bundles) feature from
v0.32.0 and its debug page integration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 18:14:43 +01:00

1070 lines
30 KiB
Go

package stacks
import (
"bytes"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/crypto"
)
// ContainerState represents the current state of a container.
type ContainerState string
const (
StateRunning ContainerState = "running"
StateStarting ContainerState = "starting" // running but health: starting
StateUnhealthy ContainerState = "unhealthy" // running but health: unhealthy
StateStopped ContainerState = "stopped"
StateRestarting ContainerState = "restarting"
StateExited ContainerState = "exited"
StatePaused ContainerState = "paused"
StateUnknown ContainerState = "unknown"
StateNotDeployed ContainerState = "not_deployed"
StateDeploying ContainerState = "deploying" // compose up in progress (image pull, etc.)
StateOrphaned ContainerState = "orphaned"
)
// ContainerInfo holds status info about a single container within a stack.
type ContainerInfo struct {
Name string `json:"name"`
Image string `json:"image"`
State ContainerState `json:"state"`
Status string `json:"status"` // e.g. "Up 3 hours (healthy)"
}
// HealthProbeResult holds the latest controller-side health probe result.
type HealthProbeResult struct {
Healthy bool `json:"healthy"`
LastCheck time.Time `json:"last_check"`
Details []HealthCheckDetail `json:"details"`
}
// HealthCheckDetail holds the result of a single health check item.
type HealthCheckDetail struct {
Type string `json:"type"` // "http", "api", "tcp"
Target string `json:"target"` // e.g. ":3456/api/v1/info"
Healthy bool `json:"healthy"`
Status int `json:"status,omitempty"` // HTTP status code (for http/api)
Latency string `json:"latency"` // e.g. "45ms"
Error string `json:"error,omitempty"` // error message if unhealthy
}
// Stack represents a docker compose stack on disk.
type Stack struct {
Name string `json:"name"`
Meta Metadata `json:"meta"`
ComposePath string `json:"compose_path"`
State ContainerState `json:"state"`
Deployed bool `json:"deployed"` // Has app.yaml with deployed=true
Protected bool `json:"protected"`
Orphaned bool `json:"orphaned"` // Deployed but no catalog template
Containers []ContainerInfo `json:"containers"`
AppConfig *AppConfig `json:"app_config,omitempty"`
Deploying bool `json:"deploying"` // compose up in progress
DeployError string `json:"deploy_error,omitempty"` // last async deploy error
HealthProbe *HealthProbeResult `json:"health_probe,omitempty"` // controller-side probe result
LastUpdated time.Time `json:"last_updated"`
}
// Manager handles all docker compose stack operations.
type Manager struct {
cfg *config.Config
logger *log.Logger
composeCmd string
stacks map[string]*Stack
mu sync.RWMutex
encKey []byte // AES-256 key for encrypting sensitive values in app.yaml
}
// NewManager creates a new stack manager.
func NewManager(cfg *config.Config, logger *log.Logger) (*Manager, error) {
composeCmd := cfg.Stacks.ComposeCommand
if composeCmd == "" {
composeCmd = detectComposeCommand()
}
if composeCmd == "" {
return nil, fmt.Errorf("docker compose not found (tried 'docker compose' and 'docker-compose')")
}
logger.Printf("[INFO] Using compose command: %s", composeCmd)
if err := os.MkdirAll(cfg.Paths.StacksDir, 0755); err != nil {
return nil, fmt.Errorf("creating stacks directory %s: %w", cfg.Paths.StacksDir, err)
}
return &Manager{
cfg: cfg,
logger: logger,
composeCmd: composeCmd,
stacks: make(map[string]*Stack),
}, nil
}
// SetEncryptionKey sets the AES-256 key used to encrypt/decrypt sensitive values in app.yaml.
func (m *Manager) SetEncryptionKey(key []byte) {
m.mu.Lock()
defer m.mu.Unlock()
m.encKey = key
}
// GetStacksBaseDir returns the base directory where stacks live.
func (m *Manager) GetStacksBaseDir() string {
return m.cfg.Paths.StacksDir
}
// MigrateEncryption re-saves app.yaml for deployed stacks that still have
// plaintext values in sensitive fields. Called once on startup.
func (m *Manager) MigrateEncryption() {
m.mu.Lock()
defer m.mu.Unlock()
if m.encKey == nil {
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] MigrateEncryption: no encryption key set, skipping")
}
return
}
if m.isDebug() {
deployedCount := 0
for _, s := range m.stacks {
if s.Deployed {
deployedCount++
}
}
m.logger.Printf("[DEBUG] [stacks] MigrateEncryption: checking %d deployed stacks for plaintext sensitive values", deployedCount)
}
migrated := 0
for _, s := range m.stacks {
if !s.Deployed {
continue
}
stackDir := filepath.Dir(s.ComposePath)
appCfg := LoadAppConfig(stackDir)
if appCfg == nil {
continue
}
meta := LoadMetadata(stackDir)
sensitive := SensitiveEnvVars(&meta)
if len(sensitive) == 0 {
continue
}
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] MigrateEncryption: checking stack %q (%d sensitive fields)", s.Name, len(sensitive))
}
needsMigration := false
for _, envVar := range sensitive {
if v, ok := appCfg.Env[envVar]; ok && v != "" && !crypto.IsEncrypted(v) {
needsMigration = true
break
}
}
if needsMigration {
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] MigrateEncryption: stack %q needs migration — re-saving with encryption", s.Name)
}
if err := SaveAppConfig(stackDir, appCfg, m.encKey, sensitive); err != nil {
m.logger.Printf("[WARN] Encryption migration failed for %s: %v", s.Name, err)
} else {
migrated++
}
}
}
if migrated > 0 {
m.logger.Printf("[INFO] Encrypted sensitive values in %d app.yaml file(s)", migrated)
}
}
// toTitleCase capitalizes the first letter of each word.
func toTitleCase(s string) string {
words := strings.Fields(s)
for i, w := range words {
if len(w) > 0 {
words[i] = strings.ToUpper(w[:1]) + w[1:]
}
}
return strings.Join(words, " ")
}
func detectComposeCommand() string {
if err := exec.Command("docker", "compose", "version").Run(); err == nil {
return "docker compose"
}
if _, err := exec.LookPath("docker-compose"); err == nil {
return "docker-compose"
}
return ""
}
// DeployedStackNames returns the names of all deployed stacks.
func (m *Manager) DeployedStackNames() []string {
m.mu.RLock()
defer m.mu.RUnlock()
var names []string
for name, stack := range m.stacks {
if stack.Deployed {
names = append(names, name)
}
}
return names
}
// ScanStacks discovers all compose stacks in the stacks directory.
func (m *Manager) ScanStacks() error {
m.mu.Lock()
defer m.mu.Unlock()
entries, err := os.ReadDir(m.cfg.Paths.StacksDir)
if err != nil {
return fmt.Errorf("reading stacks directory: %w", err)
}
found := make(map[string]bool)
for _, entry := range entries {
if !entry.IsDir() {
continue
}
name := entry.Name()
stackDir := filepath.Join(m.cfg.Paths.StacksDir, name)
composePath := filepath.Join(stackDir, "docker-compose.yml")
if _, err := os.Stat(composePath); os.IsNotExist(err) {
composePath = filepath.Join(stackDir, "docker-compose.yaml")
if _, err := os.Stat(composePath); os.IsNotExist(err) {
continue
}
}
found[name] = true
meta := LoadMetadata(stackDir)
appCfg := LoadAppConfig(stackDir)
deployed := appCfg != nil && appCfg.Deployed
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] ScanStacks: found stack %q deployed=%v composePath=%s", name, deployed, composePath)
}
if existing, ok := m.stacks[name]; ok {
existing.ComposePath = composePath
existing.Meta = meta
existing.Protected = m.cfg.IsProtectedStack(name)
// Don't overwrite Deployed/AppConfig while an async deploy is in
// progress — the goroutine manages these fields (H3 fix).
if !existing.Deploying {
existing.Deployed = deployed
existing.AppConfig = appCfg
}
} else {
m.stacks[name] = &Stack{
Name: name,
Meta: meta,
ComposePath: composePath,
State: StateNotDeployed,
Deployed: deployed,
Protected: m.cfg.IsProtectedStack(name),
AppConfig: appCfg,
}
}
}
// Remove stacks no longer on disk
for name := range m.stacks {
if !found[name] {
delete(m.stacks, name)
}
}
// Detect orphaned stacks (deployed but no longer in catalog)
catalogTemplates := m.getCatalogTemplateSlugs()
if m.isDebug() {
if catalogTemplates != nil {
m.logger.Printf("[DEBUG] [stacks] ScanStacks: catalog has %d template slugs for orphan detection", len(catalogTemplates))
} else {
m.logger.Printf("[DEBUG] [stacks] ScanStacks: catalog templates unavailable, skipping orphan detection")
}
}
if catalogTemplates != nil {
orphanCount := 0
for _, stack := range m.stacks {
if stack.Protected || !stack.Deployed {
stack.Orphaned = false
continue
}
stack.Orphaned = !catalogTemplates[stack.Name]
if stack.Orphaned {
orphanCount++
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] ScanStacks: stack %q is orphaned (deployed but not in catalog)", stack.Name)
}
}
}
if orphanCount > 0 {
m.logger.Printf("[INFO] Detected %d orphaned stack(s)", orphanCount)
}
}
deployedCount := 0
for _, s := range m.stacks {
if s.Deployed {
deployedCount++
}
}
m.logger.Printf("[INFO] Scanned stacks: %d found (%d deployed, %d available)",
len(m.stacks), deployedCount, len(m.stacks)-deployedCount)
return m.refreshStatusLocked()
}
// RefreshStatus updates container status for all known stacks.
func (m *Manager) RefreshStatus() error {
m.mu.Lock()
defer m.mu.Unlock()
return m.refreshStatusLocked()
}
func (m *Manager) refreshStatusLocked() error {
output, err := m.execCommand("docker", "ps", "-a",
"--format", "{{.Names}}\t{{.Image}}\t{{.State}}\t{{.Status}}\t{{.Label \"com.docker.compose.project\"}}",
"--no-trunc")
if err != nil {
return fmt.Errorf("docker ps: %w", err)
}
projectContainers := make(map[string][]ContainerInfo)
totalContainers := 0
for _, line := range strings.Split(strings.TrimSpace(output), "\n") {
if line == "" {
continue
}
parts := strings.SplitN(line, "\t", 5)
if len(parts) < 5 || parts[4] == "" {
continue
}
ci := ContainerInfo{
Name: parts[0],
Image: parts[1],
State: resolveContainerState(parts[2], parts[3]),
Status: parts[3],
}
projectContainers[parts[4]] = append(projectContainers[parts[4]], ci)
totalContainers++
}
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] refreshStatusLocked: docker ps returned %d containers across %d projects", totalContainers, len(projectContainers))
}
for name, stack := range m.stacks {
containers, exists := projectContainers[name]
if !exists {
stack.Containers = nil
if stack.Deploying {
stack.State = StateDeploying
} else if stack.Deployed {
stack.State = StateStopped
} else {
stack.State = StateNotDeployed
}
} else {
stack.Containers = containers
stack.State = aggregateState(containers)
}
// Re-apply controller-side health probe results: if the last probe
// failed and Docker thinks the container is running, override to unhealthy.
if stack.State == StateRunning && stack.HealthProbe != nil && !stack.HealthProbe.Healthy {
stack.State = StateUnhealthy
}
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] refreshStatusLocked: stack %q → state=%s containers=%d", name, stack.State, len(stack.Containers))
}
stack.LastUpdated = time.Now()
}
return nil
}
// resolveContainerState determines the effective state by combining Docker's
// State field (running/exited/etc.) with the Status field that contains health info.
//
// Docker State: "running", "exited", "restarting", "paused", "created", "dead", "removing"
// Docker Status: "Up 3 hours (healthy)", "Up 9 seconds (health: starting)", "Up 2 min (unhealthy)"
func resolveContainerState(dockerState, dockerStatus string) ContainerState {
state := strings.ToLower(strings.TrimSpace(dockerState))
status := strings.ToLower(dockerStatus)
switch state {
case "running":
// Check health sub-status for containers with healthchecks
if strings.Contains(status, "(health: starting)") {
return StateStarting
}
if strings.Contains(status, "(unhealthy)") {
return StateUnhealthy
}
// "(healthy)" or no healthcheck = running
return StateRunning
case "exited":
return StateExited
case "restarting":
return StateRestarting
case "paused":
return StatePaused
case "created", "dead", "removing":
return StateStopped
default:
return StateUnknown
}
}
// aggregateState determines the overall stack state from its containers.
// Priority: unhealthy/starting > restarting > all-running > stopped
func aggregateState(containers []ContainerInfo) ContainerState {
if len(containers) == 0 {
return StateNotDeployed
}
running := 0
starting := 0
unhealthy := 0
restarting := 0
stopped := 0
for _, c := range containers {
switch c.State {
case StateRunning:
running++
case StateStarting:
starting++
case StateUnhealthy:
unhealthy++
case StateRestarting:
restarting++
case StateStopped, StateExited:
stopped++
}
}
total := len(containers)
// Any unhealthy → whole stack is unhealthy
if unhealthy > 0 {
return StateUnhealthy
}
// Any still starting → stack is starting
if starting > 0 {
return StateStarting
}
// Any restarting → stack is restarting
if restarting > 0 {
return StateRestarting
}
// All running (and healthy) → stack is running
if running == total {
return StateRunning
}
// All stopped → stack is stopped
if stopped == total {
return StateStopped
}
// Mix (some running, some stopped) — report as running (partial)
if running > 0 {
return StateRunning
}
return StateStopped
}
// --- Stack accessors ---
func (m *Manager) GetStacks() []Stack {
m.mu.RLock()
defer m.mu.RUnlock()
result := make([]Stack, 0, len(m.stacks))
for _, s := range m.stacks {
result = append(result, deepCopyStack(s))
}
// Sort alphabetically by display name for consistent UI ordering
sort.Slice(result, func(i, j int) bool {
return result[i].Meta.DisplayName < result[j].Meta.DisplayName
})
return result
}
func (m *Manager) GetStack(name string) (*Stack, bool) {
m.mu.RLock()
defer m.mu.RUnlock()
s, ok := m.stacks[name]
if !ok {
return nil, false
}
cp := deepCopyStack(s)
return &cp, true
}
// deepCopyStack creates a deep copy of a Stack, including pointer fields.
func deepCopyStack(s *Stack) Stack {
cp := *s
// Deep-copy Containers slice
if s.Containers != nil {
cp.Containers = make([]ContainerInfo, len(s.Containers))
copy(cp.Containers, s.Containers)
}
// Deep-copy AppConfig pointer
if s.AppConfig != nil {
acCopy := *s.AppConfig
if s.AppConfig.Env != nil {
acCopy.Env = make(map[string]string, len(s.AppConfig.Env))
for k, v := range s.AppConfig.Env {
acCopy.Env[k] = v
}
}
if s.AppConfig.LockedFields != nil {
acCopy.LockedFields = make([]string, len(s.AppConfig.LockedFields))
copy(acCopy.LockedFields, s.AppConfig.LockedFields)
}
cp.AppConfig = &acCopy
}
// Deep-copy HealthProbe pointer
if s.HealthProbe != nil {
hpCopy := *s.HealthProbe
if s.HealthProbe.Details != nil {
hpCopy.Details = make([]HealthCheckDetail, len(s.HealthProbe.Details))
copy(hpCopy.Details, s.HealthProbe.Details)
}
cp.HealthProbe = &hpCopy
}
// Deep-copy Meta.DeployFields slice (including nested Options)
if s.Meta.DeployFields != nil {
cp.Meta.DeployFields = make([]DeployField, len(s.Meta.DeployFields))
copy(cp.Meta.DeployFields, s.Meta.DeployFields)
for i, f := range s.Meta.DeployFields {
if f.Options != nil {
cp.Meta.DeployFields[i].Options = make([]SelectOption, len(f.Options))
copy(cp.Meta.DeployFields[i].Options, f.Options)
}
}
}
// Deep-copy Meta.OptionalConfig (slice of groups with nested Fields slices)
if s.Meta.OptionalConfig != nil {
cp.Meta.OptionalConfig = make([]OptionalConfigGroup, len(s.Meta.OptionalConfig))
copy(cp.Meta.OptionalConfig, s.Meta.OptionalConfig)
for i, g := range s.Meta.OptionalConfig {
if g.Fields != nil {
cp.Meta.OptionalConfig[i].Fields = make([]OptionalConfigField, len(g.Fields))
copy(cp.Meta.OptionalConfig[i].Fields, g.Fields)
}
}
}
// Deep-copy Meta.Integrations
if s.Meta.Integrations != nil {
cp.Meta.Integrations = make([]IntegrationDef, len(s.Meta.Integrations))
copy(cp.Meta.Integrations, s.Meta.Integrations)
}
// Deep-copy Meta.HealthCheck pointer
if s.Meta.HealthCheck != nil {
hcCopy := *s.Meta.HealthCheck
if s.Meta.HealthCheck.Checks != nil {
hcCopy.Checks = make([]HealthCheckItem, len(s.Meta.HealthCheck.Checks))
copy(hcCopy.Checks, s.Meta.HealthCheck.Checks)
for i, c := range s.Meta.HealthCheck.Checks {
if c.Expect != nil {
eCopy := *c.Expect
hcCopy.Checks[i].Expect = &eCopy
}
}
}
cp.Meta.HealthCheck = &hcCopy
}
return cp
}
// --- Stack operations ---
// StartStack, StopStack, etc. now load app.yaml env for deployed stacks.
func (m *Manager) StartStack(name string) error {
stack, ok := m.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] StartStack %s: current state=%s deployed=%v", name, stack.State, stack.Deployed)
}
m.logger.Printf("[INFO] Starting stack: %s", name)
start := time.Now()
dir := filepath.Dir(stack.ComposePath)
env := m.stackEnv(dir)
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] StartStack %s: prepared %d env vars for compose", name, len(env))
}
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
m.logger.Printf("[ERROR] Stack %s start failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("starting stack %s: %w", name, err)
}
m.logger.Printf("[INFO] Stack %s started successfully (took %.1fs)", name, time.Since(start).Seconds())
m.logPostStartStatus(name, dir, env)
// Clear stale health probe so refreshStatus won't re-apply an old unhealthy override.
// The next health-probes tick (≤10s) will run a fresh probe.
m.mu.Lock()
if s, ok := m.stacks[name]; ok {
s.HealthProbe = nil
}
m.mu.Unlock()
return m.RefreshStatus()
}
func (m *Manager) StopStack(name string) error {
if m.cfg.IsProtectedStack(name) {
return fmt.Errorf("stack %q is protected and cannot be stopped", name)
}
stack, ok := m.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] StopStack %s: current state=%s deployed=%v containers=%d", name, stack.State, stack.Deployed, len(stack.Containers))
}
m.logger.Printf("[INFO] Stopping stack: %s", name)
start := time.Now()
dir := filepath.Dir(stack.ComposePath)
if _, err := m.composeExec(dir, "down"); err != nil {
m.logger.Printf("[ERROR] Stack %s stop failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("stopping stack %s: %w", name, err)
}
m.logger.Printf("[INFO] Stack %s stopped successfully (took %.1fs)", name, time.Since(start).Seconds())
return m.RefreshStatus()
}
func (m *Manager) RestartStack(name string) error {
stack, ok := m.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] RestartStack %s: current state=%s deployed=%v containers=%d", name, stack.State, stack.Deployed, len(stack.Containers))
}
m.logger.Printf("[INFO] Restarting stack: %s", name)
start := time.Now()
dir := filepath.Dir(stack.ComposePath)
env := m.stackEnv(dir)
// Use "up -d" instead of bare "restart" so that env vars from app.yaml
// are injected and any template changes (new images, healthchecks) are
// picked up. Plain "docker compose restart" only sends SIGTERM+start
// to existing containers without re-reading the compose file or env.
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
m.logger.Printf("[ERROR] Stack %s restart failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("restarting stack %s: %w", name, err)
}
m.logger.Printf("[INFO] Stack %s restarted successfully (took %.1fs)", name, time.Since(start).Seconds())
m.logPostStartStatus(name, dir, env)
// Clear stale health probe so refreshStatus won't re-apply an old unhealthy override.
m.mu.Lock()
if s, ok := m.stacks[name]; ok {
s.HealthProbe = nil
}
m.mu.Unlock()
return m.RefreshStatus()
}
func (m *Manager) UpdateStack(name string) error {
stack, ok := m.GetStack(name)
if !ok {
return fmt.Errorf("stack %q not found", name)
}
m.logger.Printf("[INFO] Updating stack: %s", name)
start := time.Now()
dir := filepath.Dir(stack.ComposePath)
env := m.stackEnv(dir)
if m.isDebug() {
m.checkLocalImages(name, dir)
}
if _, err := m.composeExecCustomEnv(dir, env, "pull"); err != nil {
m.logger.Printf("[ERROR] Stack %s update (pull) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("pulling images for %s: %w", name, err)
}
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d", "--remove-orphans"); err != nil {
m.logger.Printf("[ERROR] Stack %s update (up) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
return fmt.Errorf("recreating %s: %w", name, err)
}
m.logger.Printf("[INFO] Stack %s updated successfully (took %.1fs)", name, time.Since(start).Seconds())
m.logPostStartStatus(name, dir, env)
return m.RefreshStatus()
}
func (m *Manager) GetLogs(name string, lines int) (string, error) {
stack, ok := m.GetStack(name)
if !ok {
return "", fmt.Errorf("stack %q not found", name)
}
if lines <= 0 {
lines = 100
}
if lines > 1000 {
lines = 1000
}
m.logger.Printf("[DEBUG] Fetching logs for %s (tail %d)", name, lines)
dir := filepath.Dir(stack.ComposePath)
output, err := m.composeExec(dir, "logs", "--tail", fmt.Sprintf("%d", lines), "--no-color")
if err != nil {
m.logger.Printf("[WARN] Failed to fetch logs for %s: %v", name, err)
return "", fmt.Errorf("getting logs for %s: %w", name, err)
}
if len(output) == 0 {
m.logger.Printf("[DEBUG] Logs result for %s: 0 bytes returned (empty)", name)
} else {
m.logger.Printf("[DEBUG] Logs result for %s: %d bytes returned", name, len(output))
}
return output, nil
}
// --- Env and compose helpers ---
// stackEnv builds the full OS env slice for a stack, merging app.yaml values.
func (m *Manager) stackEnv(stackDir string) []string {
env := os.Environ()
// Always inject DOMAIN
env = append(env, fmt.Sprintf("DOMAIN=%s", m.cfg.Customer.Domain))
// Load app.yaml if it exists — merge its env vars (decrypted for docker-compose)
appCfg := LoadAppConfigDecrypted(stackDir, m.encKey)
if appCfg != nil {
for k, v := range appCfg.Env {
env = append(env, fmt.Sprintf("%s=%s", k, v))
}
}
return env
}
func (m *Manager) composeExec(dir string, args ...string) (string, error) {
return m.composeExecCustomEnv(dir, nil, args...)
}
func (m *Manager) composeExecCustomEnv(dir string, env []string, args ...string) (string, error) {
var cmd *exec.Cmd
if m.composeCmd == "docker compose" {
fullArgs := append([]string{"compose"}, args...)
cmd = exec.Command("docker", fullArgs...)
} else {
cmd = exec.Command("docker-compose", args...)
}
cmd.Dir = dir
if env != nil {
cmd.Env = env
} else {
env = m.stackEnv(dir)
cmd.Env = env
}
// Log env var keys at debug level
if m.isDebug() {
var appKeys []string
sysCount := 0
for _, e := range env {
parts := strings.SplitN(e, "=", 2)
if len(parts) == 2 {
key := parts[0]
// Only log non-system env vars (skip PATH, HOME, etc.)
if strings.ToUpper(key) == key && !strings.HasPrefix(key, "_") {
appKeys = append(appKeys, key)
} else {
sysCount++
}
}
}
if len(appKeys) > 0 {
m.logger.Printf("[DEBUG] Env vars for compose: [%s] (%d app + %d system)",
strings.Join(appKeys, ", "), len(appKeys), sysCount)
}
}
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
m.logger.Printf("[DEBUG] Running: %s %s (in %s)", m.composeCmd, strings.Join(args, " "), dir)
start := time.Now()
if err := cmd.Run(); err != nil {
elapsed := time.Since(start)
exitCode := -1
if exitErr, ok := err.(*exec.ExitError); ok {
exitCode = exitErr.ExitCode()
}
m.logger.Printf("[ERROR] Command failed: %s %s (in %s) — exit code %d (took %.1fs)",
m.composeCmd, strings.Join(args, " "), dir, exitCode, elapsed.Seconds())
if stdoutStr := truncateStr(stdout.String(), 500); stdoutStr != "" {
m.logger.Printf("[ERROR] stdout: %s", stdoutStr)
}
if stderrStr := truncateStr(stderr.String(), 500); stderrStr != "" {
m.logger.Printf("[ERROR] stderr: %s", stderrStr)
}
return stdout.String(), fmt.Errorf("exit code %d\nstderr: %s", exitCode, truncateStr(stderr.String(), 500))
}
m.logger.Printf("[DEBUG] Command completed: %s %s (took %.1fs)", m.composeCmd, strings.Join(args, " "), time.Since(start).Seconds())
return stdout.String(), nil
}
func (m *Manager) execCommand(name string, args ...string) (string, error) {
cmd := exec.Command(name, args...)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return "", fmt.Errorf("exec %s %s: %w\nstderr: %s", name, strings.Join(args, " "), err, stderr.String())
}
return stdout.String(), nil
}
// isDebug returns true if logging level is "debug".
func (m *Manager) isDebug() bool {
return m.cfg.Logging.Level == "debug"
}
// truncateStr truncates a string to maxLen characters, appending "..." if truncated.
func truncateStr(s string, maxLen int) string {
s = strings.TrimSpace(s)
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
// logPostStartStatus queries container states after a start/deploy operation
// and logs them. This runs asynchronously to avoid blocking the HTTP response.
func (m *Manager) logPostStartStatus(name, stackDir string, env []string) {
envCopy := make([]string, len(env))
copy(envCopy, env)
go func() {
time.Sleep(3 * time.Second)
output, err := m.composeExecCustomEnv(stackDir, envCopy, "ps", "-a", "--format", "table {{.Name}}\t{{.Image}}\t{{.State}}\t{{.Status}}")
if err != nil {
m.logger.Printf("[WARN] Post-start status check failed for %s: %v", name, err)
return
}
lines := strings.Split(strings.TrimSpace(output), "\n")
if len(lines) <= 1 {
m.logger.Printf("[WARN] Post-start status for %s: no containers found", name)
return
}
m.logger.Printf("[INFO] Stack %s post-start status:", name)
// Skip header line
for _, line := range lines[1:] {
m.logger.Printf("[INFO] %s", line)
}
}()
}
// checkLocalImages parses docker-compose.yml for image: lines and checks which
// are available locally. Informational only — logs results but never fails.
func (m *Manager) checkLocalImages(name, stackDir string) {
composePath := filepath.Join(stackDir, "docker-compose.yml")
data, err := os.ReadFile(composePath)
if err != nil {
composePath = filepath.Join(stackDir, "docker-compose.yaml")
data, err = os.ReadFile(composePath)
if err != nil {
m.logger.Printf("[DEBUG] Could not read compose file for image check: %v", err)
return
}
}
var images []string
for _, line := range strings.Split(string(data), "\n") {
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, "image:") {
img := strings.TrimSpace(strings.TrimPrefix(trimmed, "image:"))
img = strings.Trim(img, "\"'")
if img != "" && !strings.Contains(img, "${") {
images = append(images, img)
}
}
}
if len(images) == 0 {
m.logger.Printf("[DEBUG] No static image references found in %s compose file", name)
return
}
m.logger.Printf("[INFO] Deploying stack %s — checking %d images...", name, len(images))
for _, img := range images {
cmd := exec.Command("docker", "image", "inspect", img)
if err := cmd.Run(); err != nil {
m.logger.Printf("[DEBUG] %s — not found locally, will pull", img)
} else {
m.logger.Printf("[DEBUG] %s — found locally", img)
}
}
}
// --- Memory helpers ---
// ParseMemoryMB parses a memory string like "500M", "1G", "1.5G", "1024M", "768"
// into megabytes. Returns 0 for empty or unparseable values. Case-insensitive.
func ParseMemoryMB(s string) int {
s = strings.TrimSpace(s)
if s == "" {
return 0
}
upper := strings.ToUpper(s)
if strings.HasSuffix(upper, "GB") {
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "GB"), 64)
if err != nil {
return 0
}
return int(val * 1024)
}
if strings.HasSuffix(upper, "G") {
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "G"), 64)
if err != nil {
return 0
}
return int(val * 1024)
}
if strings.HasSuffix(upper, "MB") {
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "MB"), 64)
if err != nil {
return 0
}
return int(val)
}
if strings.HasSuffix(upper, "M") {
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "M"), 64)
if err != nil {
return 0
}
return int(val)
}
// Plain number — assume MB
val, err := strconv.ParseFloat(s, 64)
if err != nil {
return 0
}
return int(val)
}
// CommittedMemory returns the sum of mem_request and mem_limit across all
// deployed stacks that are currently running (or starting/unhealthy/restarting).
// Stopped and exited apps are excluded since they do not consume memory.
func (m *Manager) CommittedMemory() (requestMB int, limitMB int) {
m.mu.RLock()
defer m.mu.RUnlock()
for _, s := range m.stacks {
if !s.Deployed {
continue
}
if s.State == StateStopped || s.State == StateExited {
continue
}
requestMB += ParseMemoryMB(s.Meta.Resources.MemRequest)
limitMB += ParseMemoryMB(s.Meta.Resources.MemLimit)
}
return
}
// StackMemoryMB returns the mem_request for a specific stack.
func (m *Manager) StackMemoryMB(name string) int {
m.mu.RLock()
defer m.mu.RUnlock()
if s, ok := m.stacks[name]; ok {
return ParseMemoryMB(s.Meta.Resources.MemRequest)
}
return 0
}
// getCatalogTemplateSlugs reads the synced catalog cache and returns a set of
// template slugs (directory names) that have a docker-compose.yml.
func (m *Manager) getCatalogTemplateSlugs() map[string]bool {
cacheDir := filepath.Join(m.cfg.Paths.DataDir, "catalog-cache", "templates")
entries, err := os.ReadDir(cacheDir)
if err != nil {
m.logger.Printf("[WARN] Cannot read catalog cache for orphan detection: %v", err)
return nil
}
slugs := make(map[string]bool, len(entries))
for _, e := range entries {
if e.IsDir() {
composePath := filepath.Join(cacheDir, e.Name(), "docker-compose.yml")
if _, err := os.Stat(composePath); err == nil {
slugs[e.Name()] = true
}
}
}
if m.isDebug() {
m.logger.Printf("[DEBUG] [stacks] getCatalogTemplateSlugs: found %d template slugs in %s", len(slugs), cacheDir)
}
return slugs
}