95c821deb2
Add detailed [DEBUG] logging to every controller module when logging.level is set to "debug". Each module with stateful debug uses SetDebug(bool) wired from main.go. Covers stacks, backup, cloudflare, integrations, system, monitor, settings, scheduler, web handlers, storage, metrics, API, selfupdate, and assets. Also includes the app export/import (.fab bundles) feature from v0.32.0 and its debug page integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1070 lines
30 KiB
Go
1070 lines
30 KiB
Go
package stacks
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/crypto"
|
|
)
|
|
|
|
// ContainerState represents the current state of a container.
|
|
type ContainerState string
|
|
|
|
const (
|
|
StateRunning ContainerState = "running"
|
|
StateStarting ContainerState = "starting" // running but health: starting
|
|
StateUnhealthy ContainerState = "unhealthy" // running but health: unhealthy
|
|
StateStopped ContainerState = "stopped"
|
|
StateRestarting ContainerState = "restarting"
|
|
StateExited ContainerState = "exited"
|
|
StatePaused ContainerState = "paused"
|
|
StateUnknown ContainerState = "unknown"
|
|
StateNotDeployed ContainerState = "not_deployed"
|
|
StateDeploying ContainerState = "deploying" // compose up in progress (image pull, etc.)
|
|
StateOrphaned ContainerState = "orphaned"
|
|
)
|
|
|
|
// ContainerInfo holds status info about a single container within a stack.
|
|
type ContainerInfo struct {
|
|
Name string `json:"name"`
|
|
Image string `json:"image"`
|
|
State ContainerState `json:"state"`
|
|
Status string `json:"status"` // e.g. "Up 3 hours (healthy)"
|
|
}
|
|
|
|
// HealthProbeResult holds the latest controller-side health probe result.
|
|
type HealthProbeResult struct {
|
|
Healthy bool `json:"healthy"`
|
|
LastCheck time.Time `json:"last_check"`
|
|
Details []HealthCheckDetail `json:"details"`
|
|
}
|
|
|
|
// HealthCheckDetail holds the result of a single health check item.
|
|
type HealthCheckDetail struct {
|
|
Type string `json:"type"` // "http", "api", "tcp"
|
|
Target string `json:"target"` // e.g. ":3456/api/v1/info"
|
|
Healthy bool `json:"healthy"`
|
|
Status int `json:"status,omitempty"` // HTTP status code (for http/api)
|
|
Latency string `json:"latency"` // e.g. "45ms"
|
|
Error string `json:"error,omitempty"` // error message if unhealthy
|
|
}
|
|
|
|
// Stack represents a docker compose stack on disk.
|
|
type Stack struct {
|
|
Name string `json:"name"`
|
|
Meta Metadata `json:"meta"`
|
|
ComposePath string `json:"compose_path"`
|
|
State ContainerState `json:"state"`
|
|
Deployed bool `json:"deployed"` // Has app.yaml with deployed=true
|
|
Protected bool `json:"protected"`
|
|
Orphaned bool `json:"orphaned"` // Deployed but no catalog template
|
|
Containers []ContainerInfo `json:"containers"`
|
|
AppConfig *AppConfig `json:"app_config,omitempty"`
|
|
Deploying bool `json:"deploying"` // compose up in progress
|
|
DeployError string `json:"deploy_error,omitempty"` // last async deploy error
|
|
HealthProbe *HealthProbeResult `json:"health_probe,omitempty"` // controller-side probe result
|
|
LastUpdated time.Time `json:"last_updated"`
|
|
}
|
|
|
|
// Manager handles all docker compose stack operations.
|
|
type Manager struct {
|
|
cfg *config.Config
|
|
logger *log.Logger
|
|
composeCmd string
|
|
stacks map[string]*Stack
|
|
mu sync.RWMutex
|
|
encKey []byte // AES-256 key for encrypting sensitive values in app.yaml
|
|
}
|
|
|
|
// NewManager creates a new stack manager.
|
|
func NewManager(cfg *config.Config, logger *log.Logger) (*Manager, error) {
|
|
composeCmd := cfg.Stacks.ComposeCommand
|
|
if composeCmd == "" {
|
|
composeCmd = detectComposeCommand()
|
|
}
|
|
if composeCmd == "" {
|
|
return nil, fmt.Errorf("docker compose not found (tried 'docker compose' and 'docker-compose')")
|
|
}
|
|
|
|
logger.Printf("[INFO] Using compose command: %s", composeCmd)
|
|
|
|
if err := os.MkdirAll(cfg.Paths.StacksDir, 0755); err != nil {
|
|
return nil, fmt.Errorf("creating stacks directory %s: %w", cfg.Paths.StacksDir, err)
|
|
}
|
|
|
|
return &Manager{
|
|
cfg: cfg,
|
|
logger: logger,
|
|
composeCmd: composeCmd,
|
|
stacks: make(map[string]*Stack),
|
|
}, nil
|
|
}
|
|
|
|
// SetEncryptionKey sets the AES-256 key used to encrypt/decrypt sensitive values in app.yaml.
|
|
func (m *Manager) SetEncryptionKey(key []byte) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
m.encKey = key
|
|
}
|
|
|
|
// GetStacksBaseDir returns the base directory where stacks live.
|
|
func (m *Manager) GetStacksBaseDir() string {
|
|
return m.cfg.Paths.StacksDir
|
|
}
|
|
|
|
// MigrateEncryption re-saves app.yaml for deployed stacks that still have
|
|
// plaintext values in sensitive fields. Called once on startup.
|
|
func (m *Manager) MigrateEncryption() {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
if m.encKey == nil {
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] MigrateEncryption: no encryption key set, skipping")
|
|
}
|
|
return
|
|
}
|
|
|
|
if m.isDebug() {
|
|
deployedCount := 0
|
|
for _, s := range m.stacks {
|
|
if s.Deployed {
|
|
deployedCount++
|
|
}
|
|
}
|
|
m.logger.Printf("[DEBUG] [stacks] MigrateEncryption: checking %d deployed stacks for plaintext sensitive values", deployedCount)
|
|
}
|
|
|
|
migrated := 0
|
|
for _, s := range m.stacks {
|
|
if !s.Deployed {
|
|
continue
|
|
}
|
|
stackDir := filepath.Dir(s.ComposePath)
|
|
appCfg := LoadAppConfig(stackDir)
|
|
if appCfg == nil {
|
|
continue
|
|
}
|
|
meta := LoadMetadata(stackDir)
|
|
sensitive := SensitiveEnvVars(&meta)
|
|
if len(sensitive) == 0 {
|
|
continue
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] MigrateEncryption: checking stack %q (%d sensitive fields)", s.Name, len(sensitive))
|
|
}
|
|
|
|
needsMigration := false
|
|
for _, envVar := range sensitive {
|
|
if v, ok := appCfg.Env[envVar]; ok && v != "" && !crypto.IsEncrypted(v) {
|
|
needsMigration = true
|
|
break
|
|
}
|
|
}
|
|
if needsMigration {
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] MigrateEncryption: stack %q needs migration — re-saving with encryption", s.Name)
|
|
}
|
|
if err := SaveAppConfig(stackDir, appCfg, m.encKey, sensitive); err != nil {
|
|
m.logger.Printf("[WARN] Encryption migration failed for %s: %v", s.Name, err)
|
|
} else {
|
|
migrated++
|
|
}
|
|
}
|
|
}
|
|
if migrated > 0 {
|
|
m.logger.Printf("[INFO] Encrypted sensitive values in %d app.yaml file(s)", migrated)
|
|
}
|
|
}
|
|
|
|
// toTitleCase capitalizes the first letter of each word.
|
|
func toTitleCase(s string) string {
|
|
words := strings.Fields(s)
|
|
for i, w := range words {
|
|
if len(w) > 0 {
|
|
words[i] = strings.ToUpper(w[:1]) + w[1:]
|
|
}
|
|
}
|
|
return strings.Join(words, " ")
|
|
}
|
|
|
|
func detectComposeCommand() string {
|
|
if err := exec.Command("docker", "compose", "version").Run(); err == nil {
|
|
return "docker compose"
|
|
}
|
|
if _, err := exec.LookPath("docker-compose"); err == nil {
|
|
return "docker-compose"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// DeployedStackNames returns the names of all deployed stacks.
|
|
func (m *Manager) DeployedStackNames() []string {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
var names []string
|
|
for name, stack := range m.stacks {
|
|
if stack.Deployed {
|
|
names = append(names, name)
|
|
}
|
|
}
|
|
return names
|
|
}
|
|
|
|
// ScanStacks discovers all compose stacks in the stacks directory.
|
|
func (m *Manager) ScanStacks() error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
entries, err := os.ReadDir(m.cfg.Paths.StacksDir)
|
|
if err != nil {
|
|
return fmt.Errorf("reading stacks directory: %w", err)
|
|
}
|
|
|
|
found := make(map[string]bool)
|
|
|
|
for _, entry := range entries {
|
|
if !entry.IsDir() {
|
|
continue
|
|
}
|
|
|
|
name := entry.Name()
|
|
stackDir := filepath.Join(m.cfg.Paths.StacksDir, name)
|
|
composePath := filepath.Join(stackDir, "docker-compose.yml")
|
|
|
|
if _, err := os.Stat(composePath); os.IsNotExist(err) {
|
|
composePath = filepath.Join(stackDir, "docker-compose.yaml")
|
|
if _, err := os.Stat(composePath); os.IsNotExist(err) {
|
|
continue
|
|
}
|
|
}
|
|
|
|
found[name] = true
|
|
|
|
meta := LoadMetadata(stackDir)
|
|
appCfg := LoadAppConfig(stackDir)
|
|
deployed := appCfg != nil && appCfg.Deployed
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] ScanStacks: found stack %q deployed=%v composePath=%s", name, deployed, composePath)
|
|
}
|
|
|
|
if existing, ok := m.stacks[name]; ok {
|
|
existing.ComposePath = composePath
|
|
existing.Meta = meta
|
|
existing.Protected = m.cfg.IsProtectedStack(name)
|
|
// Don't overwrite Deployed/AppConfig while an async deploy is in
|
|
// progress — the goroutine manages these fields (H3 fix).
|
|
if !existing.Deploying {
|
|
existing.Deployed = deployed
|
|
existing.AppConfig = appCfg
|
|
}
|
|
} else {
|
|
m.stacks[name] = &Stack{
|
|
Name: name,
|
|
Meta: meta,
|
|
ComposePath: composePath,
|
|
State: StateNotDeployed,
|
|
Deployed: deployed,
|
|
Protected: m.cfg.IsProtectedStack(name),
|
|
AppConfig: appCfg,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remove stacks no longer on disk
|
|
for name := range m.stacks {
|
|
if !found[name] {
|
|
delete(m.stacks, name)
|
|
}
|
|
}
|
|
|
|
// Detect orphaned stacks (deployed but no longer in catalog)
|
|
catalogTemplates := m.getCatalogTemplateSlugs()
|
|
if m.isDebug() {
|
|
if catalogTemplates != nil {
|
|
m.logger.Printf("[DEBUG] [stacks] ScanStacks: catalog has %d template slugs for orphan detection", len(catalogTemplates))
|
|
} else {
|
|
m.logger.Printf("[DEBUG] [stacks] ScanStacks: catalog templates unavailable, skipping orphan detection")
|
|
}
|
|
}
|
|
if catalogTemplates != nil {
|
|
orphanCount := 0
|
|
for _, stack := range m.stacks {
|
|
if stack.Protected || !stack.Deployed {
|
|
stack.Orphaned = false
|
|
continue
|
|
}
|
|
stack.Orphaned = !catalogTemplates[stack.Name]
|
|
if stack.Orphaned {
|
|
orphanCount++
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] ScanStacks: stack %q is orphaned (deployed but not in catalog)", stack.Name)
|
|
}
|
|
}
|
|
}
|
|
if orphanCount > 0 {
|
|
m.logger.Printf("[INFO] Detected %d orphaned stack(s)", orphanCount)
|
|
}
|
|
}
|
|
|
|
deployedCount := 0
|
|
for _, s := range m.stacks {
|
|
if s.Deployed {
|
|
deployedCount++
|
|
}
|
|
}
|
|
m.logger.Printf("[INFO] Scanned stacks: %d found (%d deployed, %d available)",
|
|
len(m.stacks), deployedCount, len(m.stacks)-deployedCount)
|
|
return m.refreshStatusLocked()
|
|
}
|
|
|
|
// RefreshStatus updates container status for all known stacks.
|
|
func (m *Manager) RefreshStatus() error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
return m.refreshStatusLocked()
|
|
}
|
|
|
|
func (m *Manager) refreshStatusLocked() error {
|
|
output, err := m.execCommand("docker", "ps", "-a",
|
|
"--format", "{{.Names}}\t{{.Image}}\t{{.State}}\t{{.Status}}\t{{.Label \"com.docker.compose.project\"}}",
|
|
"--no-trunc")
|
|
if err != nil {
|
|
return fmt.Errorf("docker ps: %w", err)
|
|
}
|
|
|
|
projectContainers := make(map[string][]ContainerInfo)
|
|
|
|
totalContainers := 0
|
|
for _, line := range strings.Split(strings.TrimSpace(output), "\n") {
|
|
if line == "" {
|
|
continue
|
|
}
|
|
parts := strings.SplitN(line, "\t", 5)
|
|
if len(parts) < 5 || parts[4] == "" {
|
|
continue
|
|
}
|
|
|
|
ci := ContainerInfo{
|
|
Name: parts[0],
|
|
Image: parts[1],
|
|
State: resolveContainerState(parts[2], parts[3]),
|
|
Status: parts[3],
|
|
}
|
|
projectContainers[parts[4]] = append(projectContainers[parts[4]], ci)
|
|
totalContainers++
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] refreshStatusLocked: docker ps returned %d containers across %d projects", totalContainers, len(projectContainers))
|
|
}
|
|
|
|
for name, stack := range m.stacks {
|
|
containers, exists := projectContainers[name]
|
|
if !exists {
|
|
stack.Containers = nil
|
|
if stack.Deploying {
|
|
stack.State = StateDeploying
|
|
} else if stack.Deployed {
|
|
stack.State = StateStopped
|
|
} else {
|
|
stack.State = StateNotDeployed
|
|
}
|
|
} else {
|
|
stack.Containers = containers
|
|
stack.State = aggregateState(containers)
|
|
}
|
|
|
|
// Re-apply controller-side health probe results: if the last probe
|
|
// failed and Docker thinks the container is running, override to unhealthy.
|
|
if stack.State == StateRunning && stack.HealthProbe != nil && !stack.HealthProbe.Healthy {
|
|
stack.State = StateUnhealthy
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] refreshStatusLocked: stack %q → state=%s containers=%d", name, stack.State, len(stack.Containers))
|
|
}
|
|
|
|
stack.LastUpdated = time.Now()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// resolveContainerState determines the effective state by combining Docker's
|
|
// State field (running/exited/etc.) with the Status field that contains health info.
|
|
//
|
|
// Docker State: "running", "exited", "restarting", "paused", "created", "dead", "removing"
|
|
// Docker Status: "Up 3 hours (healthy)", "Up 9 seconds (health: starting)", "Up 2 min (unhealthy)"
|
|
func resolveContainerState(dockerState, dockerStatus string) ContainerState {
|
|
state := strings.ToLower(strings.TrimSpace(dockerState))
|
|
status := strings.ToLower(dockerStatus)
|
|
|
|
switch state {
|
|
case "running":
|
|
// Check health sub-status for containers with healthchecks
|
|
if strings.Contains(status, "(health: starting)") {
|
|
return StateStarting
|
|
}
|
|
if strings.Contains(status, "(unhealthy)") {
|
|
return StateUnhealthy
|
|
}
|
|
// "(healthy)" or no healthcheck = running
|
|
return StateRunning
|
|
|
|
case "exited":
|
|
return StateExited
|
|
case "restarting":
|
|
return StateRestarting
|
|
case "paused":
|
|
return StatePaused
|
|
case "created", "dead", "removing":
|
|
return StateStopped
|
|
default:
|
|
return StateUnknown
|
|
}
|
|
}
|
|
|
|
// aggregateState determines the overall stack state from its containers.
|
|
// Priority: unhealthy/starting > restarting > all-running > stopped
|
|
func aggregateState(containers []ContainerInfo) ContainerState {
|
|
if len(containers) == 0 {
|
|
return StateNotDeployed
|
|
}
|
|
|
|
running := 0
|
|
starting := 0
|
|
unhealthy := 0
|
|
restarting := 0
|
|
stopped := 0
|
|
|
|
for _, c := range containers {
|
|
switch c.State {
|
|
case StateRunning:
|
|
running++
|
|
case StateStarting:
|
|
starting++
|
|
case StateUnhealthy:
|
|
unhealthy++
|
|
case StateRestarting:
|
|
restarting++
|
|
case StateStopped, StateExited:
|
|
stopped++
|
|
}
|
|
}
|
|
|
|
total := len(containers)
|
|
|
|
// Any unhealthy → whole stack is unhealthy
|
|
if unhealthy > 0 {
|
|
return StateUnhealthy
|
|
}
|
|
// Any still starting → stack is starting
|
|
if starting > 0 {
|
|
return StateStarting
|
|
}
|
|
// Any restarting → stack is restarting
|
|
if restarting > 0 {
|
|
return StateRestarting
|
|
}
|
|
// All running (and healthy) → stack is running
|
|
if running == total {
|
|
return StateRunning
|
|
}
|
|
// All stopped → stack is stopped
|
|
if stopped == total {
|
|
return StateStopped
|
|
}
|
|
// Mix (some running, some stopped) — report as running (partial)
|
|
if running > 0 {
|
|
return StateRunning
|
|
}
|
|
|
|
return StateStopped
|
|
}
|
|
|
|
// --- Stack accessors ---
|
|
|
|
func (m *Manager) GetStacks() []Stack {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
result := make([]Stack, 0, len(m.stacks))
|
|
for _, s := range m.stacks {
|
|
result = append(result, deepCopyStack(s))
|
|
}
|
|
|
|
// Sort alphabetically by display name for consistent UI ordering
|
|
sort.Slice(result, func(i, j int) bool {
|
|
return result[i].Meta.DisplayName < result[j].Meta.DisplayName
|
|
})
|
|
|
|
return result
|
|
}
|
|
|
|
func (m *Manager) GetStack(name string) (*Stack, bool) {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
s, ok := m.stacks[name]
|
|
if !ok {
|
|
return nil, false
|
|
}
|
|
cp := deepCopyStack(s)
|
|
return &cp, true
|
|
}
|
|
|
|
// deepCopyStack creates a deep copy of a Stack, including pointer fields.
|
|
func deepCopyStack(s *Stack) Stack {
|
|
cp := *s
|
|
|
|
// Deep-copy Containers slice
|
|
if s.Containers != nil {
|
|
cp.Containers = make([]ContainerInfo, len(s.Containers))
|
|
copy(cp.Containers, s.Containers)
|
|
}
|
|
|
|
// Deep-copy AppConfig pointer
|
|
if s.AppConfig != nil {
|
|
acCopy := *s.AppConfig
|
|
if s.AppConfig.Env != nil {
|
|
acCopy.Env = make(map[string]string, len(s.AppConfig.Env))
|
|
for k, v := range s.AppConfig.Env {
|
|
acCopy.Env[k] = v
|
|
}
|
|
}
|
|
if s.AppConfig.LockedFields != nil {
|
|
acCopy.LockedFields = make([]string, len(s.AppConfig.LockedFields))
|
|
copy(acCopy.LockedFields, s.AppConfig.LockedFields)
|
|
}
|
|
cp.AppConfig = &acCopy
|
|
}
|
|
|
|
// Deep-copy HealthProbe pointer
|
|
if s.HealthProbe != nil {
|
|
hpCopy := *s.HealthProbe
|
|
if s.HealthProbe.Details != nil {
|
|
hpCopy.Details = make([]HealthCheckDetail, len(s.HealthProbe.Details))
|
|
copy(hpCopy.Details, s.HealthProbe.Details)
|
|
}
|
|
cp.HealthProbe = &hpCopy
|
|
}
|
|
|
|
// Deep-copy Meta.DeployFields slice (including nested Options)
|
|
if s.Meta.DeployFields != nil {
|
|
cp.Meta.DeployFields = make([]DeployField, len(s.Meta.DeployFields))
|
|
copy(cp.Meta.DeployFields, s.Meta.DeployFields)
|
|
for i, f := range s.Meta.DeployFields {
|
|
if f.Options != nil {
|
|
cp.Meta.DeployFields[i].Options = make([]SelectOption, len(f.Options))
|
|
copy(cp.Meta.DeployFields[i].Options, f.Options)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Deep-copy Meta.OptionalConfig (slice of groups with nested Fields slices)
|
|
if s.Meta.OptionalConfig != nil {
|
|
cp.Meta.OptionalConfig = make([]OptionalConfigGroup, len(s.Meta.OptionalConfig))
|
|
copy(cp.Meta.OptionalConfig, s.Meta.OptionalConfig)
|
|
for i, g := range s.Meta.OptionalConfig {
|
|
if g.Fields != nil {
|
|
cp.Meta.OptionalConfig[i].Fields = make([]OptionalConfigField, len(g.Fields))
|
|
copy(cp.Meta.OptionalConfig[i].Fields, g.Fields)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Deep-copy Meta.Integrations
|
|
if s.Meta.Integrations != nil {
|
|
cp.Meta.Integrations = make([]IntegrationDef, len(s.Meta.Integrations))
|
|
copy(cp.Meta.Integrations, s.Meta.Integrations)
|
|
}
|
|
|
|
// Deep-copy Meta.HealthCheck pointer
|
|
if s.Meta.HealthCheck != nil {
|
|
hcCopy := *s.Meta.HealthCheck
|
|
if s.Meta.HealthCheck.Checks != nil {
|
|
hcCopy.Checks = make([]HealthCheckItem, len(s.Meta.HealthCheck.Checks))
|
|
copy(hcCopy.Checks, s.Meta.HealthCheck.Checks)
|
|
for i, c := range s.Meta.HealthCheck.Checks {
|
|
if c.Expect != nil {
|
|
eCopy := *c.Expect
|
|
hcCopy.Checks[i].Expect = &eCopy
|
|
}
|
|
}
|
|
}
|
|
cp.Meta.HealthCheck = &hcCopy
|
|
}
|
|
|
|
return cp
|
|
}
|
|
|
|
// --- Stack operations ---
|
|
// StartStack, StopStack, etc. now load app.yaml env for deployed stacks.
|
|
|
|
func (m *Manager) StartStack(name string) error {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] StartStack %s: current state=%s deployed=%v", name, stack.State, stack.Deployed)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Starting stack: %s", name)
|
|
start := time.Now()
|
|
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
env := m.stackEnv(dir)
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] StartStack %s: prepared %d env vars for compose", name, len(env))
|
|
}
|
|
|
|
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s start failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("starting stack %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s started successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
m.logPostStartStatus(name, dir, env)
|
|
|
|
// Clear stale health probe so refreshStatus won't re-apply an old unhealthy override.
|
|
// The next health-probes tick (≤10s) will run a fresh probe.
|
|
m.mu.Lock()
|
|
if s, ok := m.stacks[name]; ok {
|
|
s.HealthProbe = nil
|
|
}
|
|
m.mu.Unlock()
|
|
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) StopStack(name string) error {
|
|
if m.cfg.IsProtectedStack(name) {
|
|
return fmt.Errorf("stack %q is protected and cannot be stopped", name)
|
|
}
|
|
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] StopStack %s: current state=%s deployed=%v containers=%d", name, stack.State, stack.Deployed, len(stack.Containers))
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stopping stack: %s", name)
|
|
start := time.Now()
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
|
|
if _, err := m.composeExec(dir, "down"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s stop failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("stopping stack %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s stopped successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) RestartStack(name string) error {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] RestartStack %s: current state=%s deployed=%v containers=%d", name, stack.State, stack.Deployed, len(stack.Containers))
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Restarting stack: %s", name)
|
|
start := time.Now()
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
env := m.stackEnv(dir)
|
|
|
|
// Use "up -d" instead of bare "restart" so that env vars from app.yaml
|
|
// are injected and any template changes (new images, healthchecks) are
|
|
// picked up. Plain "docker compose restart" only sends SIGTERM+start
|
|
// to existing containers without re-reading the compose file or env.
|
|
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s restart failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("restarting stack %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s restarted successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
m.logPostStartStatus(name, dir, env)
|
|
|
|
// Clear stale health probe so refreshStatus won't re-apply an old unhealthy override.
|
|
m.mu.Lock()
|
|
if s, ok := m.stacks[name]; ok {
|
|
s.HealthProbe = nil
|
|
}
|
|
m.mu.Unlock()
|
|
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) UpdateStack(name string) error {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Updating stack: %s", name)
|
|
start := time.Now()
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
env := m.stackEnv(dir)
|
|
|
|
if m.isDebug() {
|
|
m.checkLocalImages(name, dir)
|
|
}
|
|
|
|
if _, err := m.composeExecCustomEnv(dir, env, "pull"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s update (pull) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("pulling images for %s: %w", name, err)
|
|
}
|
|
|
|
if _, err := m.composeExecCustomEnv(dir, env, "up", "-d", "--remove-orphans"); err != nil {
|
|
m.logger.Printf("[ERROR] Stack %s update (up) failed after %.1fs: %v", name, time.Since(start).Seconds(), err)
|
|
return fmt.Errorf("recreating %s: %w", name, err)
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s updated successfully (took %.1fs)", name, time.Since(start).Seconds())
|
|
m.logPostStartStatus(name, dir, env)
|
|
return m.RefreshStatus()
|
|
}
|
|
|
|
func (m *Manager) GetLogs(name string, lines int) (string, error) {
|
|
stack, ok := m.GetStack(name)
|
|
if !ok {
|
|
return "", fmt.Errorf("stack %q not found", name)
|
|
}
|
|
|
|
if lines <= 0 {
|
|
lines = 100
|
|
}
|
|
if lines > 1000 {
|
|
lines = 1000
|
|
}
|
|
|
|
m.logger.Printf("[DEBUG] Fetching logs for %s (tail %d)", name, lines)
|
|
|
|
dir := filepath.Dir(stack.ComposePath)
|
|
output, err := m.composeExec(dir, "logs", "--tail", fmt.Sprintf("%d", lines), "--no-color")
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Failed to fetch logs for %s: %v", name, err)
|
|
return "", fmt.Errorf("getting logs for %s: %w", name, err)
|
|
}
|
|
|
|
if len(output) == 0 {
|
|
m.logger.Printf("[DEBUG] Logs result for %s: 0 bytes returned (empty)", name)
|
|
} else {
|
|
m.logger.Printf("[DEBUG] Logs result for %s: %d bytes returned", name, len(output))
|
|
}
|
|
return output, nil
|
|
}
|
|
|
|
// --- Env and compose helpers ---
|
|
|
|
// stackEnv builds the full OS env slice for a stack, merging app.yaml values.
|
|
func (m *Manager) stackEnv(stackDir string) []string {
|
|
env := os.Environ()
|
|
|
|
// Always inject DOMAIN
|
|
env = append(env, fmt.Sprintf("DOMAIN=%s", m.cfg.Customer.Domain))
|
|
|
|
// Load app.yaml if it exists — merge its env vars (decrypted for docker-compose)
|
|
appCfg := LoadAppConfigDecrypted(stackDir, m.encKey)
|
|
if appCfg != nil {
|
|
for k, v := range appCfg.Env {
|
|
env = append(env, fmt.Sprintf("%s=%s", k, v))
|
|
}
|
|
}
|
|
|
|
return env
|
|
}
|
|
|
|
func (m *Manager) composeExec(dir string, args ...string) (string, error) {
|
|
return m.composeExecCustomEnv(dir, nil, args...)
|
|
}
|
|
|
|
func (m *Manager) composeExecCustomEnv(dir string, env []string, args ...string) (string, error) {
|
|
var cmd *exec.Cmd
|
|
|
|
if m.composeCmd == "docker compose" {
|
|
fullArgs := append([]string{"compose"}, args...)
|
|
cmd = exec.Command("docker", fullArgs...)
|
|
} else {
|
|
cmd = exec.Command("docker-compose", args...)
|
|
}
|
|
|
|
cmd.Dir = dir
|
|
|
|
if env != nil {
|
|
cmd.Env = env
|
|
} else {
|
|
env = m.stackEnv(dir)
|
|
cmd.Env = env
|
|
}
|
|
|
|
// Log env var keys at debug level
|
|
if m.isDebug() {
|
|
var appKeys []string
|
|
sysCount := 0
|
|
for _, e := range env {
|
|
parts := strings.SplitN(e, "=", 2)
|
|
if len(parts) == 2 {
|
|
key := parts[0]
|
|
// Only log non-system env vars (skip PATH, HOME, etc.)
|
|
if strings.ToUpper(key) == key && !strings.HasPrefix(key, "_") {
|
|
appKeys = append(appKeys, key)
|
|
} else {
|
|
sysCount++
|
|
}
|
|
}
|
|
}
|
|
if len(appKeys) > 0 {
|
|
m.logger.Printf("[DEBUG] Env vars for compose: [%s] (%d app + %d system)",
|
|
strings.Join(appKeys, ", "), len(appKeys), sysCount)
|
|
}
|
|
}
|
|
|
|
var stdout, stderr bytes.Buffer
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
|
|
m.logger.Printf("[DEBUG] Running: %s %s (in %s)", m.composeCmd, strings.Join(args, " "), dir)
|
|
|
|
start := time.Now()
|
|
if err := cmd.Run(); err != nil {
|
|
elapsed := time.Since(start)
|
|
exitCode := -1
|
|
if exitErr, ok := err.(*exec.ExitError); ok {
|
|
exitCode = exitErr.ExitCode()
|
|
}
|
|
m.logger.Printf("[ERROR] Command failed: %s %s (in %s) — exit code %d (took %.1fs)",
|
|
m.composeCmd, strings.Join(args, " "), dir, exitCode, elapsed.Seconds())
|
|
if stdoutStr := truncateStr(stdout.String(), 500); stdoutStr != "" {
|
|
m.logger.Printf("[ERROR] stdout: %s", stdoutStr)
|
|
}
|
|
if stderrStr := truncateStr(stderr.String(), 500); stderrStr != "" {
|
|
m.logger.Printf("[ERROR] stderr: %s", stderrStr)
|
|
}
|
|
return stdout.String(), fmt.Errorf("exit code %d\nstderr: %s", exitCode, truncateStr(stderr.String(), 500))
|
|
}
|
|
|
|
m.logger.Printf("[DEBUG] Command completed: %s %s (took %.1fs)", m.composeCmd, strings.Join(args, " "), time.Since(start).Seconds())
|
|
return stdout.String(), nil
|
|
}
|
|
|
|
func (m *Manager) execCommand(name string, args ...string) (string, error) {
|
|
cmd := exec.Command(name, args...)
|
|
|
|
var stdout, stderr bytes.Buffer
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
return "", fmt.Errorf("exec %s %s: %w\nstderr: %s", name, strings.Join(args, " "), err, stderr.String())
|
|
}
|
|
|
|
return stdout.String(), nil
|
|
}
|
|
|
|
// isDebug returns true if logging level is "debug".
|
|
func (m *Manager) isDebug() bool {
|
|
return m.cfg.Logging.Level == "debug"
|
|
}
|
|
|
|
// truncateStr truncates a string to maxLen characters, appending "..." if truncated.
|
|
func truncateStr(s string, maxLen int) string {
|
|
s = strings.TrimSpace(s)
|
|
if len(s) <= maxLen {
|
|
return s
|
|
}
|
|
return s[:maxLen] + "..."
|
|
}
|
|
|
|
// logPostStartStatus queries container states after a start/deploy operation
|
|
// and logs them. This runs asynchronously to avoid blocking the HTTP response.
|
|
func (m *Manager) logPostStartStatus(name, stackDir string, env []string) {
|
|
envCopy := make([]string, len(env))
|
|
copy(envCopy, env)
|
|
go func() {
|
|
time.Sleep(3 * time.Second)
|
|
|
|
output, err := m.composeExecCustomEnv(stackDir, envCopy, "ps", "-a", "--format", "table {{.Name}}\t{{.Image}}\t{{.State}}\t{{.Status}}")
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Post-start status check failed for %s: %v", name, err)
|
|
return
|
|
}
|
|
|
|
lines := strings.Split(strings.TrimSpace(output), "\n")
|
|
if len(lines) <= 1 {
|
|
m.logger.Printf("[WARN] Post-start status for %s: no containers found", name)
|
|
return
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Stack %s post-start status:", name)
|
|
// Skip header line
|
|
for _, line := range lines[1:] {
|
|
m.logger.Printf("[INFO] %s", line)
|
|
}
|
|
}()
|
|
}
|
|
|
|
// checkLocalImages parses docker-compose.yml for image: lines and checks which
|
|
// are available locally. Informational only — logs results but never fails.
|
|
func (m *Manager) checkLocalImages(name, stackDir string) {
|
|
composePath := filepath.Join(stackDir, "docker-compose.yml")
|
|
data, err := os.ReadFile(composePath)
|
|
if err != nil {
|
|
composePath = filepath.Join(stackDir, "docker-compose.yaml")
|
|
data, err = os.ReadFile(composePath)
|
|
if err != nil {
|
|
m.logger.Printf("[DEBUG] Could not read compose file for image check: %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
var images []string
|
|
for _, line := range strings.Split(string(data), "\n") {
|
|
trimmed := strings.TrimSpace(line)
|
|
if strings.HasPrefix(trimmed, "image:") {
|
|
img := strings.TrimSpace(strings.TrimPrefix(trimmed, "image:"))
|
|
img = strings.Trim(img, "\"'")
|
|
if img != "" && !strings.Contains(img, "${") {
|
|
images = append(images, img)
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(images) == 0 {
|
|
m.logger.Printf("[DEBUG] No static image references found in %s compose file", name)
|
|
return
|
|
}
|
|
|
|
m.logger.Printf("[INFO] Deploying stack %s — checking %d images...", name, len(images))
|
|
for _, img := range images {
|
|
cmd := exec.Command("docker", "image", "inspect", img)
|
|
if err := cmd.Run(); err != nil {
|
|
m.logger.Printf("[DEBUG] %s — not found locally, will pull", img)
|
|
} else {
|
|
m.logger.Printf("[DEBUG] %s — found locally", img)
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- Memory helpers ---
|
|
|
|
// ParseMemoryMB parses a memory string like "500M", "1G", "1.5G", "1024M", "768"
|
|
// into megabytes. Returns 0 for empty or unparseable values. Case-insensitive.
|
|
func ParseMemoryMB(s string) int {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" {
|
|
return 0
|
|
}
|
|
upper := strings.ToUpper(s)
|
|
|
|
if strings.HasSuffix(upper, "GB") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "GB"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val * 1024)
|
|
}
|
|
if strings.HasSuffix(upper, "G") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "G"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val * 1024)
|
|
}
|
|
if strings.HasSuffix(upper, "MB") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "MB"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val)
|
|
}
|
|
if strings.HasSuffix(upper, "M") {
|
|
val, err := strconv.ParseFloat(strings.TrimSuffix(upper, "M"), 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val)
|
|
}
|
|
|
|
// Plain number — assume MB
|
|
val, err := strconv.ParseFloat(s, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return int(val)
|
|
}
|
|
|
|
// CommittedMemory returns the sum of mem_request and mem_limit across all
|
|
// deployed stacks that are currently running (or starting/unhealthy/restarting).
|
|
// Stopped and exited apps are excluded since they do not consume memory.
|
|
func (m *Manager) CommittedMemory() (requestMB int, limitMB int) {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
for _, s := range m.stacks {
|
|
if !s.Deployed {
|
|
continue
|
|
}
|
|
if s.State == StateStopped || s.State == StateExited {
|
|
continue
|
|
}
|
|
requestMB += ParseMemoryMB(s.Meta.Resources.MemRequest)
|
|
limitMB += ParseMemoryMB(s.Meta.Resources.MemLimit)
|
|
}
|
|
return
|
|
}
|
|
|
|
// StackMemoryMB returns the mem_request for a specific stack.
|
|
func (m *Manager) StackMemoryMB(name string) int {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
if s, ok := m.stacks[name]; ok {
|
|
return ParseMemoryMB(s.Meta.Resources.MemRequest)
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// getCatalogTemplateSlugs reads the synced catalog cache and returns a set of
|
|
// template slugs (directory names) that have a docker-compose.yml.
|
|
func (m *Manager) getCatalogTemplateSlugs() map[string]bool {
|
|
cacheDir := filepath.Join(m.cfg.Paths.DataDir, "catalog-cache", "templates")
|
|
entries, err := os.ReadDir(cacheDir)
|
|
if err != nil {
|
|
m.logger.Printf("[WARN] Cannot read catalog cache for orphan detection: %v", err)
|
|
return nil
|
|
}
|
|
slugs := make(map[string]bool, len(entries))
|
|
for _, e := range entries {
|
|
if e.IsDir() {
|
|
composePath := filepath.Join(cacheDir, e.Name(), "docker-compose.yml")
|
|
if _, err := os.Stat(composePath); err == nil {
|
|
slugs[e.Name()] = true
|
|
}
|
|
}
|
|
}
|
|
if m.isDebug() {
|
|
m.logger.Printf("[DEBUG] [stacks] getCatalogTemplateSlugs: found %d template slugs in %s", len(slugs), cacheDir)
|
|
}
|
|
return slugs
|
|
} |