db83db383c
Critical: watchdog mutex panic safety, SetGeoAppOverride nil guard, SSD-only app DB restore fallback. High: double deploy race (atomic Deploying flag), delete/remove during deploy guard, ScanStacks overwrite protection, FileBrowser mount mutex, PushEvent history, PushOnce error handling, DB dump sync+close before rename, restic retry fresh context, encrypt failure logging, cross-backup path traversal validation, deepCopyStack completeness. Security: constant-time API key comparison, login rate limiting (5/min), git credential masking in logs, storage path prefix traversal fix. Concurrency: MigrateEncryption lock ordering, SubdomainInUse I/O outside lock, scheduler late-registered jobs, SQLite WAL verification, metrics shutdown context, telemetry scan error logging, asset sync lock scope. Optimization: streaming file copy for DB dumps, restic stats dedup, atomic infra config copy. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
978 lines
28 KiB
Go
978 lines
28 KiB
Go
package stacks
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/crypto"
|
|
)
|
|
|
|
// ContainerState represents the current state of a container.
|
|
type ContainerState string
|
|
|
|
const (
|
|
StateRunning ContainerState = "running"
|
|
StateStarting ContainerState = "starting" // running but health: starting
|
|
StateUnhealthy ContainerState = "unhealthy" // running but health: unhealthy
|
|
StateStopped ContainerState = "stopped"
|
|
StateRestarting ContainerState = "restarting"
|
|
StateExited ContainerState = "exited"
|
|
StatePaused ContainerState = "paused"
|
|
StateUnknown ContainerState = "unknown"
|
|
StateNotDeployed ContainerState = "not_deployed"
|
|
StateDeploying ContainerState = "deploying" // compose up in progress (image pull, etc.)
|
|
StateOrphaned ContainerState = "orphaned"
|
|
)
|
|
|
|
// ContainerInfo holds status info about a single container within a stack.
|
|
type ContainerInfo struct {
|
|
Name string `json:"name"`
|
|
Image string `json:"image"`
|
|
State ContainerState `json:"state"`
|
|
Status string `json:"status"` // e.g. "Up 3 hours (healthy)"
|
|
}
|
|
|
|
// HealthProbeResult holds the latest controller-side health probe result.
|
|
type HealthProbeResult struct {
|
|
Healthy bool `json:"healthy"`
|
|
LastCheck time.Time `json:"last_check"`
|
|
Details []HealthCheckDetail `json:"details"`
|
|
}
|
|
|
|
// HealthCheckDetail holds the result of a single health check item.
|
|
type HealthCheckDetail struct {
|
|
Type string `json:"type"` // "http", "api", "tcp"
|
|
Target string `json:"target"` // e.g. ":3456/api/v1/info"
|
|
Healthy bool `json:"healthy"`
|
|
Status int `json:"status,omitempty"` // HTTP status code (for http/api)
|
|
Latency string `json:"latency"` // e.g. "45ms"
|
|
Error string `json:"error,omitempty"` // error message if unhealthy
|
|
}
|
|
|
|
// Stack represents a docker compose stack on disk.
|
|
type Stack struct {
|
|
Name string `json:"name"`
|
|
Meta Metadata `json:"meta"`
|
|
ComposePath string `json:"compose_path"`
|
|
State ContainerState `json:"state"`
|
|
Deployed bool `json:"deployed"` // Has app.yaml with deployed=true
|
|
Protected bool `json:"protected"`
|
|
Orphaned bool `json:"orphaned"` // Deployed but no catalog template
|
|
Containers []ContainerInfo `json:"containers"`
|
|
AppConfig *AppConfig `json:"app_config,omitempty"`
|
|
Deploying bool `json:"deploying"` // compose up in progress
|
|
DeployError string `json:"deploy_error,omitempty"` // last async deploy error
|
|
HealthProbe *HealthProbeResult `json:"health_probe,omitempty"` // controller-side probe result
|
|
LastUpdated time.Time `json:"last_updated"`
|
|
}
|
|
|
|
// Manager handles all docker compose stack operations.
|
|
type Manager struct {
|
|
cfg *config.Config
|
|
logger *log.Logger
|
|
composeCmd string
|
|
stacks map[string]*Stack
|
|
mu sync.RWMutex
|
|
encKey []byte // AES-256 key for encrypting sensitive values in app.yaml
|
|
}
|
|
|
|
// NewManager creates a new stack manager.
|
|
func NewManager(cfg *config.Config, logger *log.Logger) (*Manager, error) {
|
|
composeCmd := cfg.Stacks.ComposeCommand
|
|
if composeCmd == "" {
|
|
composeCmd = detectComposeCommand()
|
|
}
|
|
if composeCmd == "" {
|
|
return nil, fmt.Errorf("docker compose not found (tried 'docker compose' and 'docker-compose')")
|
|
}
|
|
|
|
logger.Printf("[INFO] Using compose command: %s", composeCmd)
|
|
|
|
if err := os.MkdirAll(cfg.Paths.StacksDir, 0755); err != nil {
|
|
return nil, fmt.Errorf("creating stacks directory %s: %w", cfg.Paths.StacksDir, err)
|
|
}
|
|
|
|
return &Manager{
|
|
cfg: cfg,
|
|
logger: logger,
|
|
composeCmd: composeCmd,
|
|
stacks: make(map[string]*Stack),
|
|
}, nil
|
|
}
|
|
|
|
// SetEncryptionKey sets the AES-256 key used to encrypt/decrypt sensitive values in app.yaml.
|
|
func (m *Manager) SetEncryptionKey(key []byte) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
m.encKey = key
|
|
}
|
|
|
|
// MigrateEncryption re-saves app.yaml for deployed stacks that still have
|
|
// plaintext values in sensitive fields. Called once on startup.
|
|
func (m *Manager) MigrateEncryption() {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
if m.encKey == nil {
|
|
return
|
|
}
|
|
|
|
migrated := 0
|
|
for _, s := range m.stacks {
|
|
if !s.Deployed {
|
|
continue
|
|
}
|
|
stackDir := filepath.Dir(s.ComposePath)
|
|
appCfg := LoadAppConfig(stackDir)
|
|
if appCfg == nil {
|
|
continue
|
|
}
|
|
meta := LoadMetadata(stackDir)
|
|
sensitive := SensitiveEnvVars(&meta)
|
|
if len(sensitive) == 0 {
|
|
continue
|
|
}
|
|
needsMigration := false
|
|
for _, envVar := range sensitive {
|
|
if v, ok := appCfg.Env[envVar]; ok && v != "" && !crypto.IsEncrypted(v) {
|
|
needsMigration = true
|
|
break
|
|
}
|
|
}
|
|
if needsMigration {
|
|
if err := SaveAppConfig(stackDir, appCfg, m.encKey, sensitive); err != nil {
|
|
m.logger.Printf("[WARN] Encryption migration failed for %s: %v", s.Name, err)
|
|
} else {
|
|
migrated++
|
|
}
|
|
}
|
|
}
|
|
if migrated > 0 {
|
|
m.logger.Printf("[INFO] Encrypted sensitive values in %d app.yaml file(s)", migrated)
|
|
}
|
|
}
|
|
|
|
// toTitleCase capitalizes the first letter of each word.
|
|
func toTitleCase(s string) string {
|
|
words := strings.Fields(s)
|
|
for i, w := range words {
|
|
if len(w) > 0 {
|
|
words[i] = strings.ToUpper(w[:1]) + w[1:]
|
|
}
|
|
}
|
|
return strings.Join(words, " ")
|
|
}
|
|
|
|
func detectComposeCommand() string {
|
|
if err := exec.Command("docker", "compose", "version").Run(); err == nil {
|
|
return "docker compose"
|
|
}
|
|
if _, err := exec.LookPath("docker-compose"); err == nil {
|
|
return "docker-compose"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// DeployedStackNames returns the names of all deployed stacks.
|
|
func (m *Manager) DeployedStackNames() []string {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
var names []string
|
|
for name, stack := range m.stacks {
|
|
if stack.Deployed {
|
|
names = append(names, name)
|
|
}
|
|
}
|
|
return names
|
|
}
|
|
|
|
// ScanStacks discovers all compose stacks in the stacks directory.
|
|
func (m *Manager) ScanStacks() error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
entries, err := os.ReadDir(m.cfg.Paths.StacksDir)
|
|
if err != nil {
|
|
return fmt.Errorf("reading stacks directory: %w", err)
|
|
}
|
|
|
|
found := make(map[string]bool)
|
|
|
|
for _, entry := range entries {
|
|
if !entry.IsDir() {
|
|
continue
|
|
}
|
|
|
|
name := entry.Name()
|
|
stackDir := filepath.Join(m.cfg.Paths.StacksDir, name)
|
|
composePath := filepath.Join(stackDir, "docker-compose.yml")
|
|
|
|
if _, err := os.Stat(composePath); os.IsNotExist(err) {
|
|
composePath = filepath.Join(stackDir, "docker-compose.yaml")
|
|
if _, err := os.Stat(composePath); os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
}
|
|
|
|
found[name] = true
|
|
|
|
meta := LoadMetadata(stackDir)
|
|
appCfg := LoadAppConfig(stackDir)
|
|
deployed := appCfg != nil && appCfg.Deployed
|
|
|
|
if existing, ok := m.stacks[name]; ok {
|
|
existing.ComposePath = composePath
|
|
existing.Meta = meta
|
|
existing.Protected = m.cfg.IsProtectedStack(name)
|
|
// Don't overwrite Deployed/AppConfig while an async deploy is in
|
|
// progress — the goroutine manages these fields (H3 fix).
|
|
if !existing.Deploying {
|
|
existing.Deployed = deployed
|
|
existing.AppConfig = appCfg
|
|
}
|
|
} else {
|
|
m.stacks[name] = &Stack{
|
|
Name: name,
|
|
Meta: meta,
|
|
ComposePath: composePath,
|
|
State: StateNotDeployed,
|
|
Deployed: deployed,
|
|
Protected: m.cfg.IsProtectedStack(name),
|
|
AppConfig: appCfg,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remove stacks no longer on disk
|
|
for name := range m.stacks {
|
|
if !found[name] {
|
|
delete(m.stacks, name)
|
|
}
|
|
}
|
|
|
|
// Detect orphaned stacks (deployed but no longer in catalog)
|
|
catalogTemplates := m.getCatalogTemplateSlugs()
|
|
if catalogTemplates != nil {
|
|
orphanCount := 0
|
|
for _, stack := range m.stacks {
|
|
if stack.Protected || !stack.Deployed {
|
|
stack.Orphaned = false
|
|
continue
|
|
}
|
|
stack.Orphaned = !catalogTemplates[stack.Name]
|
|
if stack.Orphaned {
|
|
orphanCount++
|
|
}
|
|
}
|
|
if orphanCount > 0 {
|
|
m.logger.Printf("[INFO] Detected %d orphaned stack(s)", orphanCount)
|
|
}
|
|
}
|
|
|
|
deployedCount := 0
|
|
for _, s := range m.stacks {
|
|
if s.Deployed {
|
|
deployedCount++
|
|
}
|
|
}
|
|
m.logger.Printf("[INFO] Scanned stacks: %d found (%d deployed, %d available)",
|
|
len(m.stacks), deployedCount, len(m.stacks)-deployedCount)
|
|
return m.refreshStatusLocked()
|
|
}
|
|
|
|
// RefreshStatus updates container status for all known stacks.
|
|
func (m *Manager) RefreshStatus() error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
return m.refreshStatusLocked()
|
|
}
|
|
|
|
func (m *Manager) refreshStatusLocked() error {
|
|
output, err := m.execCommand("docker", "ps", "-a",
|
|
"--format", "{{.Names}}\t{{.Image}}\t{{.State}}\t{{.Status}}\t{{.Label \"com.docker.compose.project\"}}",
|
|
"--no-trunc")
|
|
if err != nil {
|
|
return fmt.Errorf("docker ps: %w", err)
|
|
}
|
|
|
|
projectContainers := make(map[string][]ContainerInfo)
|
|
|
|
for _, line := range strings.Split(strings.TrimSpace(output), "\n") {
|
|
if line == "" {
|
|
continue
|
|
}
|
|
parts := strings.SplitN(line, "\t", 5)
|
|
if len(parts) < 5 || parts[4] == "" {
|
|
continue
|
|
}
|
|
|
|
ci := ContainerInfo{
|
|
Name: parts[0],
|
|
Image: parts[1],
|
|
State: resolveContainerState(parts[2], parts[3]),
|
|
Status: parts[3],
|
|
}
|
|
projectContainers[parts[4]] = append(projectContainers[parts[4]], ci)
|
|
}
|
|
|
|
for name, stack := range m.stacks {
|
|
containers, exists := projectContainers[name]
|
|
if !exists {
|
|
stack.Containers = nil
|
|
if stack.Deploying {
|
|
stack.State = StateDeploying
|
|
} else if stack.Deployed {
|
|
stack.State = StateStopped
|
|
} else {
|
|
stack.State = StateNotDeployed
|
|
}
|
|
} else {
|
|
stack.Containers = containers
|
|
stack.State = aggregateState(containers)
|
|
}
|
|
|
|
// Re-apply controller-side health probe results: if the last probe
|
|
// failed and Docker thinks the container is running, override to unhealthy.
|
|
if stack.State == StateRunning && stack.HealthProbe != nil && !stack.HealthProbe.Healthy {
|
|
stack.State = StateUnhealthy
|
|
}
|
|
|
|
stack.LastUpdated = time.Now()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// resolveContainerState determines the effective state by combining Docker's
|
|
// State field (running/exited/etc.) with the Status field that contains health info.
|
|
//
|
|
// Docker State: "running", "exited", "restarting", "paused", "created", "dead", "removing"
|
|
// Docker Status: "Up 3 hours (healthy)", "Up 9 seconds (health: starting)", "Up 2 min (unhealthy)"
|
|
func resolveContainerState(dockerState, dockerStatus string) ContainerState {
|
|
state := strings.ToLower(strings.TrimSpace(dockerState))
|
|
status := strings.ToLower(dockerStatus)
|
|
|
|
switch state {
|
|
case "running":
|
|
// Check health sub-status for containers with healthchecks
|
|
if strings.Contains(status, "(health: starting)") {
|
|
return StateStarting
|
|
}
|
|
if strings.Contains(status, "(unhealthy)") {
|
|
return StateUnhealthy
|
|
}
|
|
// "(healthy)" or no healthcheck = running
|
|
return StateRunning
|
|
|
|
case "exited":
|
|
return StateExited
|
|
case "restarting":
|
|
return StateRestarting
|
|
case "paused":
|
|
return StatePaused
|
|
case "created", "dead", "removing":
|
|
return StateStopped
|
|
default:
|
|
return StateUnknown
|
|
}
|
|
}
|
|
|
|
// aggregateState determines the overall stack state from its containers.
|
|
// Priority: unhealthy/starting > restarting > all-running > stopped
|
|
func aggregateState(containers []ContainerInfo) ContainerState {
|
|
if len(containers) == 0 {
|
|
return StateNotDeployed
|
|
}
|
|
|
|
running := 0
|
|
starting := 0
|
|
unhealthy := 0
|
|
restarting := 0
|
|
stopped := 0
|
|
|
|
for _, c := range containers {
|
|
switch c.State {
|
|
case StateRunning:
|
|
running++
|
|
case StateStarting:
|
|
starting++
|
|
case StateUnhealthy:
|
|
unhealthy++
|
|
case StateRestarting:
|
|
restarting++
|
|
case StateStopped, StateExited:
|
|
stopped++
|
|
}
|
|
}
|
|
|
|
total := len(containers)
|
|
|
|
// Any unhealthy → whole stack is unhealthy
|
|
if unhealthy > 0 {
|
|
return StateUnhealthy
|
|
}
|
|
// Any still starting → stack is starting
|
|
if starting > 0 {
|
|
return StateStarting
|
|
}
|
|
// Any restarting → stack is restarting
|
|
if restarting > 0 {
|
|
return StateRestarting
|
|
}
|
|
// All running (and healthy) → stack is running
|
|
if running == total {
|
|
return StateRunning
|
|
}
|
|
// All stopped → stack is stopped
|
|
if stopped == total {
|
|
return StateStopped
|
|
}
|
|
// Mix (some running, some stopped) — report as running (partial)
|
|
if running > 0 {
|
|
return StateRunning
|
|
}
|
|
|
|
return StateStopped
|
|
}
|
|
|
|
// --- Stack accessors ---
|
|
|
|
func (m *Manager) GetStacks() []Stack {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
result := make([]Stack, 0, len(m.stacks))
|
|
for _, s := range m.stacks {
|
|
result = append(result, deepCopyStack(s))
|
|
}
|
|
|
|
// Sort alphabetically by display name for consistent UI ordering
|
|
sort.Slice(result, func(i, j int) bool {
|
|
return result[i].Meta.DisplayName < result[j].Meta.DisplayName
|
|
})
|
|
|
|
return result
|
|
}
|
|
|
|
func (m *Manager) GetStack(name string) (*Stack, bool) {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
s, ok := m.stacks[name]
|
|
if !ok {
|
|
return nil, false
|
|
}
|
|
cp := deepCopyStack(s)
|
|
return &cp, true
|
|
}
|
|
|
|
// deepCopyStack creates a deep copy of a Stack, including pointer fields.
|
|
func deepCopyStack(s *Stack) Stack {
|
|
cp := *s
|
|
|
|
// Deep-copy Containers slice
|
|
if s.Containers != nil {
|
|
cp.Containers = make([]ContainerInfo, len(s.Containers))
|
|
copy(cp.Containers, s.Containers)
|
|
}
|
|
|
|
// Deep-copy AppConfig pointer
|
|
if s.AppConfig != nil {
|
|
acCopy := *s.AppConfig
|
|
if s.AppConfig.Env != nil {
|
|
acCopy.Env = make(map[string]string, len(s.AppConfig.Env))
|
|
for k, v := range s.AppConfig.Env {
|
|
acCopy.Env[k] = v
|
|
}
|
|
}
|
|
if s.AppConfig.LockedFields != nil {
|
|
acCopy.LockedFields = make([]string, len(s.AppConfig.LockedFields))
|
|
copy(acCopy.LockedFields, s.AppConfig.LockedFields)
|
|
}
|
|
cp.AppConfig = &acCopy
|
|
}
|
|
|
|
// Deep-copy HealthProbe pointer
|
|
if s.HealthProbe != nil {
|
|
hpCopy := *s.HealthProbe
|
|
if s.HealthProbe.Details != nil {
|
|
hpCopy.Details = make([]HealthCheckDetail, len(s.HealthProbe.Details))
|
|
copy(hpCopy.Details, s.HealthProbe.Details)
|
|
}
|
|
cp.HealthProbe = &hpCopy
|
|
}
|
|
|
|
// Deep-copy Meta.DeployFields slice (including nested Options)
|
|
if s.Meta.DeployFields != nil {
|
|
cp.Meta.DeployFields = make([]DeployField, len(s.Meta.DeployFields))
|
|
copy(cp.Meta.DeployFields, s.Meta.DeployFields)
|
|
for i, f := range s.Meta.DeployFields {
|
|
if f.Options != nil {
|
|
cp.Meta.DeployFields[i].Options = make([]SelectOption, len(f.Options))
|
|
copy(cp.Meta.DeployFields[i].Options, f.Options)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Deep-copy Meta.OptionalConfig (slice of groups with nested Fields slices)
|
|
if s.Meta.OptionalConfig != nil {
|
|
cp.Meta.OptionalConfig = make([]OptionalConfigGroup, len(s.Meta.OptionalConfig))
|
|
copy(cp.Meta.OptionalConfig, s.Meta.OptionalConfig)
|
|
for i, g := range s.Meta.OptionalConfig {
|
|
if g.Fields != nil {
|
|
cp.Meta.OptionalConfig[i].Fields = make([]OptionalConfigField, len(g.Fields))
|
|
copy(cp.Meta.OptionalConfig[i].Fields, g.Fields)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Deep-copy Meta.HealthCheck pointer
|
|
if s.Meta.HealthCheck != nil {
|
|
hcCopy := *s.Meta.HealthCheck
|
|
if s.Meta.HealthCheck.Checks != nil {
|
|
hcCopy.Checks = make([]HealthCheckItem, len(s.Meta.HealthCheck.Checks))
|
|
copy(hcCopy.Checks, s.Meta.HealthCheck.Checks)
|
|
for i, c := range s.Meta.HealthCheck.Checks {
|
|
if c.Expect != nil {
|
|
eCopy := *c.Expect
|
|
hcCopy.Checks[i].Expect = &eCopy
|
|
}
|
|
}
|
|
}
|
|
cp.Meta.HealthCheck = &hcCopy
|
|
}
|
|
|
|
return cp
|
|
}
|
|
|
|
// --- Stack operations ---
|
|
// StartStack, StopStack, etc. now load app.yaml env for deployed stacks.
|
|
|
|
func (m *Manager) StartStack(name string) error {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Starting stack: %s", name)
|
|
start := time.Now()
|
|
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
env := m.stackEnv(dir)
|
|
|
|
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s start failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("starting stack %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s started successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
m.logPostStartStatus(name, dir, env)
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) StopStack(name string) error {
|
|
if m.cfg.IsProtectedStack(name) {
|
|
return fmt.Errorf("stack %q is protected and cannot be stopped", name)
|
|
}
|
|
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stopping stack: %s", name)
|
|
start := time.Now()
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
|
|
if _, err := m.composeExec(dir, "down"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s stop failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("stopping stack %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s stopped successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) RestartStack(name string) error {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Restarting stack: %s", name)
|
|
start := time.Now()
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
env := m.stackEnv(dir)
|
|
|
|
// Use "up -d" instead of bare "restart" so that env vars from app.yaml
|
|
// are injected and any template changes (new images, healthchecks) are
|
|
// picked up. Plain "docker compose restart" only sends SIGTERM+start
|
|
// to existing containers without re-reading the compose file or env.
|
|
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s restart failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("restarting stack %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s restarted successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
m.logPostStartStatus(name, dir, env)
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) UpdateStack(name string) error {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Updating stack: %s", name)
|
|
start := time.Now()
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
env := m.stackEnv(dir)
|
|
|
|
if m.isDebug() {
|
|
m.checkLocalImages(name, dir)
|
|
}
|
|
|
|
if _, err := m.composeExecCustomEnv(dir, env, "pull"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s update (pull) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("pulling images for %s: %w", name, err)
|
|
}
|
|
|
|
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d", "--remove-orphans"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s update (up) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("recreating %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s updated successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
m.logPostStartStatus(name, dir, env)
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) GetLogs(name string, lines int) (string, error) {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return "", fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
if lines <= 0 {
|
|
lines = 100
|
|
}
|
|
if lines > 1000 {
|
|
lines = 1000
|
|
}
|
|
|
|
m.logger.Printf("[DEBUG] Fetching logs for %s (tail %d)", name, lines)
|
|
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
output, err := m.composeExec(dir, "logs", "--tail", fmt.Sprintf("%d", lines), "--no-color")
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Failed to fetch logs for %s: %v", name, err)
|
|
return "", fmt.Errorf("getting logs for %s: %w", name, err)
|
|
}
|
|
|
|
if len(output) == 0 {
|
|
m.logger.Printf("[DEBUG] Logs result for %s: 0 bytes returned (empty)", name)
|
|
} else {
|
|
m.logger.Printf("[DEBUG] Logs result for %s: %d bytes returned", name, len(output))
|
|
}
|
|
return output, nil
|
|
}
|
|
|
|
// --- Env and compose helpers ---
|
|
|
|
// stackEnv builds the full OS env slice for a stack, merging app.yaml values.
|
|
func (m *Manager) stackEnv(stackDir string) []string {
|
|
env := os.Environ()
|
|
|
|
// Always inject DOMAIN
|
|
env = append(env, fmt.Sprintf("DOMAIN=%s", m.cfg.Customer.Domain))
|
|
|
|
// Load app.yaml if it exists — merge its env vars (decrypted for docker-compose)
|
|
appCfg := LoadAppConfigDecrypted(stackDir, m.encKey)
|
|
if appCfg != nil {
|
|
for k, v := range appCfg.Env {
|
|
env = append(env, fmt.Sprintf("%s=%s", k, v))
|
|
}
|
|
}
|
|
|
|
return env
|
|
}
|
|
|
|
func (m *Manager) composeExec(dir string, args ...string) (string, error) {
|
|
return m.composeExecCustomEnv(dir, nil, args...)
|
|
}
|
|
|
|
func (m *Manager) composeExecCustomEnv(dir string, env []string, args ...string) (string, error) {
|
|
var cmd *exec.Cmd
|
|
|
|
if m.composeCmd == "docker compose" {
|
|
fullArgs := append([]string{"compose"}, args...)
|
|
cmd = exec.Command("docker", fullArgs...)
|
|
} else {
|
|
cmd = exec.Command("docker-compose", args...)
|
|
}
|
|
|
|
cmd.Dir = dir
|
|
|
|
if env != nil {
|
|
cmd.Env = env
|
|
} else {
|
|
env = m.stackEnv(dir)
|
|
cmd.Env = env
|
|
}
|
|
|
|
// Log env var keys at debug level
|
|
if m.isDebug() {
|
|
var appKeys []string
|
|
sysCount := 0
|
|
for _, e := range env {
|
|
parts := strings.SplitN(e, "=", 2)
|
|
if len(parts) == 2 {
|
|
key := parts[0]
|
|
// Only log non-system env vars (skip PATH, HOME, etc.)
|
|
if strings.ToUpper(key) == key && !strings.HasPrefix(key, "_") {
|
|
appKeys = append(appKeys, key)
|
|
} else {
|
|
sysCount++
|
|
}
|
|
}
|
|
}
|
|
if len(appKeys) > 0 {
|
|
m.logger.Printf("[DEBUG] Env vars for compose: [%s] (%d app + %d system)",
|
|
strings.Join(appKeys, ", "), len(appKeys), sysCount)
|
|
}
|
|
}
|
|
|
|
var stdout, stderr bytes.Buffer
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
|
|
m.logger.Printf("[DEBUG] Running: %s %s (in %s)", m.composeCmd, strings.Join(args, " "), dir)
|
|
|
|
start := time.Now()
|
|
if err := cmd.Run(); err != nil {
|
|
elapsed := time.Since(start)
|
|
exitCode := -1
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
exitCode = exitErr.ExitCode()
|
|
}
|
|
m.logger.Printf("[ERROR] Command failed: %s %s (in %s) — exit code %d (took %.1fs)",
|
|
m.composeCmd, strings.Join(args, " "), dir, exitCode, elapsed.Seconds())
|
|
if stdoutStr := truncateStr(stdout.String(), 500); stdoutStr != "" {
|
|
m.logger.Printf("[ERROR] stdout: %s", stdoutStr)
|
|
}
|
|
if stderrStr := truncateStr(stderr.String(), 500); stderrStr != "" {
|
|
m.logger.Printf("[ERROR] stderr: %s", stderrStr)
|
|
}
|
|
return stdout.String(), fmt.Errorf("exit code %d\nstderr: %s", exitCode, truncateStr(stderr.String(), 500))
|
|
}
|
|
|
|
m.logger.Printf("[DEBUG] Command completed: %s %s (took %.1fs)", m.composeCmd, strings.Join(args, " "), time.Since(start).Seconds())
|
|
return stdout.String(), nil
|
|
}
|
|
|
|
func (m *Manager) execCommand(name string, args ...string) (string, error) {
|
|
cmd := exec.Command(name, args...)
|
|
|
|
var stdout, stderr bytes.Buffer
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
return "", fmt.Errorf("exec %s %s: %w\nstderr: %s", name, strings.Join(args, " "), err, stderr.String())
|
|
}
|
|
|
|
return stdout.String(), nil
|
|
}
|
|
|
|
// isDebug returns true if logging level is "debug".
|
|
func (m *Manager) isDebug() bool {
|
|
return m.cfg.Logging.Level == "debug"
|
|
}
|
|
|
|
// truncateStr truncates a string to maxLen characters, appending "..." if truncated.
|
|
func truncateStr(s string, maxLen int) string {
|
|
s = strings.TrimSpace(s)
|
|
if len(s) <= maxLen {
|
|
return s
|
|
}
|
|
return s[:maxLen] + "..."
|
|
}
|
|
|
|
// logPostStartStatus queries container states after a start/deploy operation
|
|
// and logs them. This runs asynchronously to avoid blocking the HTTP response.
|
|
func (m *Manager) logPostStartStatus(name, stackDir string, env []string) {
|
|
envCopy := make([]string, len(env))
|
|
copy(envCopy, env)
|
|
go func() {
|
|
time.Sleep(3 * time.Second)
|
|
|
|
output, err := m.composeExecCustomEnv(stackDir, envCopy, "ps", "-a", "--format", "table {{.Name}}\t{{.Image}}\t{{.State}}\t{{.Status}}")
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Post-start status check failed for %s: %v", name, err)
|
|
return
|
|
}
|
|
|
|
lines := strings.Split(strings.TrimSpace(output), "\n")
|
|
if len(lines) <= 1 {
|
|
m.logger.Printf("[WARN] Post-start status for %s: no containers found", name)
|
|
return
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s post-start status:", name)
|
|
// Skip header line
|
|
for _, line := range lines[1:] {
|
|
m.logger.Printf("[INFO] %s", line)
|
|
}
|
|
}()
|
|
}
|
|
|
|
// checkLocalImages parses docker-compose.yml for image: lines and checks which
|
|
// are available locally. Informational only — logs results but never fails.
|
|
func (m *Manager) checkLocalImages(name, stackDir string) {
|
|
composePath := filepath.Join(stackDir, "docker-compose.yml")
|
|
data, err := os.ReadFile(composePath)
|
|
if err != nil {
|
|
composePath = filepath.Join(stackDir, "docker-compose.yaml")
|
|
data, err = os.ReadFile(composePath)
|
|
if err != nil {
|
|
m.logger.Printf("[DEBUG] Could not read compose file for image check: %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
var images []string
|
|
for _, line := range strings.Split(string(data), "\n") {
|
|
trimmed := strings.TrimSpace(line)
|
|
if strings.HasPrefix(trimmed, "image:") {
|
|
img := strings.TrimSpace(strings.TrimPrefix(trimmed, "image:"))
|
|
img = strings.Trim(img, "\"'")
|
|
if img != "" && !strings.Contains(img, "${") {
|
|
images = append(images, img)
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(images) == 0 {
|
|
m.logger.Printf("[DEBUG] No static image references found in %s compose file", name)
|
|
return
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Deploying stack %s — checking %d images...", name, len(images))
|
|
for _, img := range images {
|
|
cmd := exec.Command("docker", "image", "inspect", img)
|
|
if err := cmd.Run(); err != nil {
|
|
m.logger.Printf("[DEBUG] %s — not found locally, will pull", img)
|
|
} else {
|
|
m.logger.Printf("[DEBUG] %s — found locally", img)
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- Memory helpers ---
|
|
|
|
// ParseMemoryMB parses a memory string like "500M", "1G", "1.5G", "1024M", "768"
|
|
// into megabytes. Returns 0 for empty or unparseable values. Case-insensitive.
|
|
func ParseMemoryMB(s string) int {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" {
|
|
return 0
|
|
}
|
|
upper := strings.ToUpper(s)
|
|
|
|
if strings.HasSuffix(upper, "GB") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "GB"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val * 1024)
|
|
}
|
|
if strings.HasSuffix(upper, "G") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "G"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val * 1024)
|
|
}
|
|
if strings.HasSuffix(upper, "MB") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "MB"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val)
|
|
}
|
|
if strings.HasSuffix(upper, "M") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "M"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val)
|
|
}
|
|
|
|
// Plain number — assume MB
|
|
val, err := strconv.ParseFloat(s, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val)
|
|
}
|
|
|
|
// CommittedMemory returns the sum of mem_request and mem_limit across all
|
|
// deployed stacks that are currently running (or starting/unhealthy/restarting).
|
|
// Stopped and exited apps are excluded since they do not consume memory.
|
|
func (m *Manager) CommittedMemory() (requestMB int, limitMB int) {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
for _, s := range m.stacks {
|
|
if !s.Deployed {
|
|
continue
|
|
}
|
|
if s.State == StateStopped || s.State == StateExited {
|
|
continue
|
|
}
|
|
requestMB += ParseMemoryMB(s.Meta.Resources.MemRequest)
|
|
limitMB += ParseMemoryMB(s.Meta.Resources.MemLimit)
|
|
}
|
|
return
|
|
}
|
|
|
|
// StackMemoryMB returns the mem_request for a specific stack.
|
|
func (m *Manager) StackMemoryMB(name string) int {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
if s, ok := m.stacks[name]; ok {
|
|
return ParseMemoryMB(s.Meta.Resources.MemRequest)
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// getCatalogTemplateSlugs reads the synced catalog cache and returns a set of
|
|
// template slugs (directory names) that have a docker-compose.yml.
|
|
func (m *Manager) getCatalogTemplateSlugs() map[string]bool {
|
|
cacheDir := filepath.Join(m.cfg.Paths.DataDir, "catalog-cache", "templates")
|
|
entries, err := os.ReadDir(cacheDir)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Cannot read catalog cache for orphan detection: %v", err)
|
|
return nil
|
|
}
|
|
slugs := make(map[string]bool, len(entries))
|
|
for _, e := range entries {
|
|
if e.IsDir() {
|
|
composePath := filepath.Join(cacheDir, e.Name(), "docker-compose.yml")
|
|
if _, err := os.Stat(composePath); err == nil {
|
|
slugs[e.Name()] = true
|
|
}
|
|
}
|
|
}
|
|
return slugs
|
|
} |