fix: deep bug hunt II — concurrency, security & optimization (25 files)
Critical: watchdog mutex panic safety, SetGeoAppOverride nil guard, SSD-only app DB restore fallback. High: double deploy race (atomic Deploying flag), delete/remove during deploy guard, ScanStacks overwrite protection, FileBrowser mount mutex, PushEvent history, PushOnce error handling, DB dump sync+close before rename, restic retry fresh context, encrypt failure logging, cross-backup path traversal validation, deepCopyStack completeness. Security: constant-time API key comparison, login rate limiting (5/min), git credential masking in logs, storage path prefix traversal fix. Concurrency: MigrateEncryption lock ordering, SubdomainInUse I/O outside lock, scheduler late-registered jobs, SQLite WAL verification, metrics shutdown context, telemetry scan error logging, asset sync lock scope. Optimization: streaming file copy for DB dumps, restic stats dedup, atomic infra config copy. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,7 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"crypto/subtle"
|
||||
"strings"
|
||||
|
||||
"gitea.dooplex.hu/admin/felhom-controller/internal/api"
|
||||
@@ -688,7 +689,8 @@ func main() {
|
||||
}
|
||||
dc.HubConnectivityTest = func() (int, int64, error) {
|
||||
start := time.Now()
|
||||
resp, err := http.Get(cfg.Hub.URL + "/healthz")
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err := client.Get(cfg.Hub.URL + "/healthz")
|
||||
latency := time.Since(start).Milliseconds()
|
||||
if err != nil {
|
||||
return 0, latency, err
|
||||
@@ -817,7 +819,7 @@ func selfUpdateAuthMiddleware(cfg *config.Config, webServer *web.Server, next ht
|
||||
// Check bearer token first (for external API calls: hub, build scripts)
|
||||
if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "Bearer ") {
|
||||
token := strings.TrimPrefix(auth, "Bearer ")
|
||||
if token != "" && cfg.Hub.APIKey != "" && token == cfg.Hub.APIKey {
|
||||
if token != "" && cfg.Hub.APIKey != "" && subtle.ConstantTimeCompare([]byte(token), []byte(cfg.Hub.APIKey)) == 1 {
|
||||
next.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -49,6 +49,7 @@ type Syncer struct {
|
||||
logger *log.Logger
|
||||
debug bool
|
||||
mu sync.Mutex
|
||||
running bool
|
||||
status SyncStatus
|
||||
}
|
||||
|
||||
@@ -70,7 +71,18 @@ func New(hubURL, apiKey, assetsDir, fallbackDir string, logger *log.Logger, debu
|
||||
// changed/new files. It also removes local files not in the Hub manifest.
|
||||
func (s *Syncer) Sync(ctx context.Context) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.running {
|
||||
s.mu.Unlock()
|
||||
return fmt.Errorf("asset sync already in progress")
|
||||
}
|
||||
s.running = true
|
||||
s.mu.Unlock()
|
||||
|
||||
defer func() {
|
||||
s.mu.Lock()
|
||||
s.running = false
|
||||
s.mu.Unlock()
|
||||
}()
|
||||
|
||||
s.logger.Println("[INFO] Asset sync starting...")
|
||||
|
||||
@@ -145,13 +157,15 @@ func (s *Syncer) Sync(ctx context.Context) error {
|
||||
// 5. Save local manifest copy
|
||||
s.saveLocalManifest(manifest)
|
||||
|
||||
// 6. Update status
|
||||
// 6. Update status (under lock)
|
||||
s.mu.Lock()
|
||||
s.status = SyncStatus{
|
||||
LastSync: time.Now().UTC().Format(time.RFC3339),
|
||||
LastStatus: "ok",
|
||||
FileCount: len(manifest.Files),
|
||||
TotalBytes: totalBytes,
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
s.logger.Printf("[INFO] Asset sync complete: %d downloaded, %d unchanged, %d removed (%d total files)",
|
||||
downloaded, skipped, removed, len(manifest.Files))
|
||||
@@ -187,6 +201,7 @@ func (s *Syncer) Status() SyncStatus {
|
||||
}
|
||||
|
||||
func (s *Syncer) setError(err error) {
|
||||
s.mu.Lock()
|
||||
s.status = SyncStatus{
|
||||
LastSync: time.Now().UTC().Format(time.RFC3339),
|
||||
LastStatus: "error",
|
||||
@@ -194,6 +209,7 @@ func (s *Syncer) setError(err error) {
|
||||
FileCount: s.status.FileCount,
|
||||
TotalBytes: s.status.TotalBytes,
|
||||
}
|
||||
s.mu.Unlock()
|
||||
s.logger.Printf("[WARN] Asset sync failed: %v", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -68,6 +68,7 @@ type DriveRepoInfo struct {
|
||||
TotalSize string
|
||||
TotalSizeBytes int64
|
||||
SnapshotCount int
|
||||
LatestSnapshot *SnapshotInfo `json:"-"` // used for aggregation, not serialized
|
||||
}
|
||||
|
||||
// CrossDriveSummaryItem holds display data for one app's cross-drive backup.
|
||||
@@ -860,11 +861,33 @@ func (m *Manager) perDriveRepoStats() []DriveRepoInfo {
|
||||
TotalSize: stats.TotalSize,
|
||||
TotalSizeBytes: stats.TotalSizeBytes,
|
||||
SnapshotCount: stats.SnapshotCount,
|
||||
LatestSnapshot: stats.LatestSnapshot,
|
||||
})
|
||||
}
|
||||
return infos
|
||||
}
|
||||
|
||||
// aggregateFromDriveStats derives aggregate stats from already-computed per-drive stats,
|
||||
// avoiding a second round of restic subprocess calls.
|
||||
func aggregateFromDriveStats(drives []DriveRepoInfo, m *Manager) *RepoStats {
|
||||
agg := &RepoStats{}
|
||||
var totalBytes int64
|
||||
for _, d := range drives {
|
||||
agg.SnapshotCount += d.SnapshotCount
|
||||
totalBytes += d.TotalSizeBytes
|
||||
if d.LatestSnapshot != nil {
|
||||
if agg.LatestSnapshot == nil || d.LatestSnapshot.Time.After(agg.LatestSnapshot.Time) {
|
||||
agg.LatestSnapshot = d.LatestSnapshot
|
||||
}
|
||||
}
|
||||
}
|
||||
agg.TotalSizeBytes = totalBytes
|
||||
if totalBytes > 0 {
|
||||
agg.TotalSize = humanizeBytes(totalBytes)
|
||||
}
|
||||
return agg
|
||||
}
|
||||
|
||||
// aggregateRepoStats combines stats from all primary restic repos.
|
||||
func (m *Manager) aggregateRepoStats() *RepoStats {
|
||||
drives := m.activeDrives()
|
||||
@@ -1066,9 +1089,9 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
|
||||
Retention: m.cfg.Backup.Retention,
|
||||
}
|
||||
|
||||
// Expensive calls (outside lock)
|
||||
status.RepoStats = m.aggregateRepoStats()
|
||||
// Expensive calls (outside lock) — compute per-drive stats once, derive aggregate
|
||||
status.PerDriveRepoStats = m.perDriveRepoStats()
|
||||
status.RepoStats = aggregateFromDriveStats(status.PerDriveRepoStats, m)
|
||||
|
||||
// Scan dump files from per-drive per-stack paths
|
||||
files := m.listAllDumpFiles()
|
||||
|
||||
@@ -3,6 +3,7 @@ package backup
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
@@ -475,12 +476,8 @@ func (r *CrossDriveRunner) copyStackDBDumps(stackName, destDir string) error {
|
||||
}
|
||||
src := filepath.Join(dumpDir, e.Name())
|
||||
dst := filepath.Join(destDir, e.Name())
|
||||
data, err := os.ReadFile(src)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading %s: %w", e.Name(), err)
|
||||
}
|
||||
if err := os.WriteFile(dst, data, 0644); err != nil {
|
||||
return fmt.Errorf("writing %s: %w", e.Name(), err)
|
||||
if err := copyFile(src, dst); err != nil {
|
||||
return fmt.Errorf("copying %s: %w", e.Name(), err)
|
||||
}
|
||||
copied++
|
||||
}
|
||||
@@ -523,14 +520,11 @@ func (r *CrossDriveRunner) syncInfraConfig(ctx context.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// Copy controller.yaml → _infra/controller.yaml
|
||||
// Copy controller.yaml → _infra/controller.yaml (atomic via copyFile)
|
||||
if _, err := os.Stat(r.controllerYAMLPath); err == nil {
|
||||
yamlDest := filepath.Join(infraDir, "controller.yaml")
|
||||
data, err := os.ReadFile(r.controllerYAMLPath)
|
||||
if err != nil {
|
||||
r.logger.Printf("[WARN] Cannot read controller.yaml for infra backup: %v", err)
|
||||
} else if err := os.WriteFile(yamlDest, data, 0644); err != nil {
|
||||
r.logger.Printf("[WARN] Cannot write controller.yaml to %s: %v", yamlDest, err)
|
||||
if err := copyFile(r.controllerYAMLPath, yamlDest); err != nil {
|
||||
r.logger.Printf("[WARN] Cannot copy controller.yaml to %s: %v", yamlDest, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -628,6 +622,32 @@ func (r *CrossDriveRunner) updateStatus(stackName, status, errMsg string, durati
|
||||
})
|
||||
}
|
||||
|
||||
// copyFile copies src to dst using buffered streaming I/O (no full-file memory allocation).
|
||||
func copyFile(src, dst string) error {
|
||||
in, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer in.Close()
|
||||
|
||||
tmp := dst + ".tmp"
|
||||
out, err := os.Create(tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := io.Copy(out, in); err != nil {
|
||||
out.Close()
|
||||
os.Remove(tmp)
|
||||
return err
|
||||
}
|
||||
if err := out.Close(); err != nil {
|
||||
os.Remove(tmp)
|
||||
return err
|
||||
}
|
||||
return os.Rename(tmp, dst)
|
||||
}
|
||||
|
||||
// dirSizeBytes returns the total byte size of all files under path.
|
||||
// H7: Walk errors are now propagated instead of silently swallowed.
|
||||
func dirSizeBytes(path string) (int64, error) {
|
||||
|
||||
@@ -256,6 +256,20 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
|
||||
return result
|
||||
}
|
||||
|
||||
// Close and sync tmpFile before rename to ensure data is flushed to disk (H8 fix).
|
||||
if err := tmpFile.Sync(); err != nil {
|
||||
os.Remove(tmpPath)
|
||||
result.Error = fmt.Errorf("syncing dump file: %w", err)
|
||||
result.Duration = time.Since(start)
|
||||
return result
|
||||
}
|
||||
if err := tmpFile.Close(); err != nil {
|
||||
os.Remove(tmpPath)
|
||||
result.Error = fmt.Errorf("closing dump file: %w", err)
|
||||
result.Duration = time.Since(start)
|
||||
return result
|
||||
}
|
||||
|
||||
// Check file size
|
||||
stat, err := os.Stat(tmpPath)
|
||||
if err != nil || stat.Size() == 0 {
|
||||
|
||||
@@ -145,8 +145,10 @@ func (r *ResticManager) Snapshot(repoPath string, paths []string, tags []string)
|
||||
if unlockErr := unlockCmd.Run(); unlockErr != nil {
|
||||
r.logger.Printf("[WARN] Restic unlock failed: %v", unlockErr)
|
||||
}
|
||||
// Retry once
|
||||
cmd = r.command(ctx, repoPath, args...)
|
||||
// Retry once with a fresh context (H9 fix — original may be nearly expired).
|
||||
retryCtx, retryCancel := context.WithTimeout(context.Background(), 30*time.Minute)
|
||||
defer retryCancel()
|
||||
cmd = r.command(retryCtx, repoPath, args...)
|
||||
out, err = cmd.Output()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("restic backup failed after unlock: %v", err)
|
||||
|
||||
@@ -154,11 +154,22 @@ func restoreUserData(ctx context.Context, app *RestorableApp, logger *log.Logger
|
||||
|
||||
// restoreDBDumps copies DB dump files from cross-drive backup to the primary dump dir.
|
||||
func restoreDBDumps(app *RestorableApp, logger *log.Logger) error {
|
||||
if app.DBDumpPath == "" || app.HDDPath == "" {
|
||||
if app.DBDumpPath == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
destDir := AppDBDumpPath(app.HDDPath, app.Name)
|
||||
// Use HDDPath for apps with HDD data, fall back to DrivePath (system data path)
|
||||
// for SSD-only apps whose DB dumps live under the system drive.
|
||||
drivePath := app.HDDPath
|
||||
if drivePath == "" {
|
||||
drivePath = app.DrivePath
|
||||
}
|
||||
if drivePath == "" {
|
||||
logger.Printf("[WARN] Cannot restore DB dumps for %s: no drive path", app.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
destDir := AppDBDumpPath(drivePath, app.Name)
|
||||
if err := os.MkdirAll(destDir, 0755); err != nil {
|
||||
return fmt.Errorf("creating dump dir: %w", err)
|
||||
}
|
||||
|
||||
@@ -54,25 +54,25 @@ func (c *MetricsCollector) loop(ctx context.Context) {
|
||||
defer ticker.Stop()
|
||||
|
||||
// Sample immediately on start
|
||||
c.sample()
|
||||
c.sampleWith(ctx)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
c.sample()
|
||||
c.sampleWith(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *MetricsCollector) sample() {
|
||||
func (c *MetricsCollector) sampleWith(ctx context.Context) {
|
||||
sys := c.sampleSystem()
|
||||
if err := c.store.InsertSystemMetrics(sys); err != nil {
|
||||
c.logger.Printf("[WARN] Failed to store system metrics: %v", err)
|
||||
}
|
||||
|
||||
containers := c.sampleContainers()
|
||||
containers := c.sampleContainers(ctx)
|
||||
if err := c.store.InsertContainerMetrics(containers); err != nil {
|
||||
c.logger.Printf("[WARN] Failed to store container metrics: %v", err)
|
||||
}
|
||||
@@ -96,8 +96,8 @@ func (c *MetricsCollector) sampleSystem() SystemSample {
|
||||
}
|
||||
}
|
||||
|
||||
func (c *MetricsCollector) sampleContainers() []ContainerSample {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
func (c *MetricsCollector) sampleContainers(parentCtx context.Context) []ContainerSample {
|
||||
ctx, cancel := context.WithTimeout(parentCtx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, "docker", "stats", "--no-stream",
|
||||
|
||||
@@ -22,9 +22,18 @@ func NewMetricsStore(dbPath string, logger *log.Logger) (*MetricsStore, error) {
|
||||
return nil, fmt.Errorf("open sqlite: %w", err)
|
||||
}
|
||||
|
||||
// Set pragmas for performance and concurrency
|
||||
// Enable WAL mode and verify it took effect
|
||||
var walMode string
|
||||
if err := db.QueryRow("PRAGMA journal_mode=WAL").Scan(&walMode); err != nil {
|
||||
db.Close()
|
||||
return nil, fmt.Errorf("set WAL mode: %w", err)
|
||||
}
|
||||
if walMode != "wal" {
|
||||
db.Close()
|
||||
return nil, fmt.Errorf("WAL mode not supported on this filesystem (got %q)", walMode)
|
||||
}
|
||||
// Set remaining pragmas for performance and concurrency
|
||||
pragmas := []string{
|
||||
"PRAGMA journal_mode=WAL",
|
||||
"PRAGMA synchronous=NORMAL",
|
||||
"PRAGMA busy_timeout=5000",
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -38,6 +39,7 @@ func (s *MetricsStore) GetContainerTelemetry(since time.Time) ([]ContainerTeleme
|
||||
var ct ContainerTelemetry
|
||||
if err := rows.Scan(&ct.ContainerName, &ct.MemoryAvgMB, &ct.MemoryPeakMB,
|
||||
&ct.CPUAvgPercent, &ct.SampleCount); err != nil {
|
||||
log.Printf("[WARN] telemetry row scan failed: %v", err)
|
||||
continue
|
||||
}
|
||||
results = append(results, ct)
|
||||
|
||||
@@ -232,9 +232,14 @@ func (w *StorageWatchdog) handleConnectedProbe(sp settings.StoragePath, state *p
|
||||
sp.Path, state.consecutiveFailures, probeThreshold, result.Err)
|
||||
|
||||
if state.consecutiveFailures >= probeThreshold {
|
||||
state.mu.Unlock()
|
||||
w.handleDisconnect(sp, state, result)
|
||||
state.mu.Lock() // re-acquire for deferred Unlock
|
||||
// Release state.mu before calling handleDisconnect (which re-acquires it
|
||||
// internally). Re-acquire afterwards so the deferred Unlock stays balanced.
|
||||
// Wrap in a func to guarantee re-lock even if handleDisconnect panics.
|
||||
func() {
|
||||
state.mu.Unlock()
|
||||
defer state.mu.Lock()
|
||||
w.handleDisconnect(sp, state, result)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -210,11 +210,17 @@ func (n *Notifier) PushEvent(eventType, severity, message string, details interf
|
||||
n.logger.Printf("[DEBUG] PushEvent: %s pushed OK (HTTP %d)", eventType, resp.StatusCode)
|
||||
}
|
||||
n.logger.Printf("[INFO] Event pushed: %s (%s) — %s", eventType, severity, message)
|
||||
n.recordHistory(eventType, severity, message, resp.StatusCode, "")
|
||||
return
|
||||
}
|
||||
lastErr = fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
n.logger.Printf("[WARN] Event push failed after 3 attempts (%s/%s): %v", eventType, severity, lastErr)
|
||||
errMsg := ""
|
||||
if lastErr != nil {
|
||||
errMsg = lastErr.Error()
|
||||
}
|
||||
n.recordHistory(eventType, severity, message, 0, errMsg)
|
||||
}()
|
||||
}
|
||||
|
||||
|
||||
@@ -219,6 +219,7 @@ func (p *Pusher) PushOnce(report *Report) error {
|
||||
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||
p.logger.Printf("[INFO] Hub push-once sent (%d bytes)", len(data))
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
return fmt.Errorf("hub push-once: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
@@ -41,12 +41,13 @@ type Job struct {
|
||||
|
||||
// Scheduler manages periodic and daily jobs.
|
||||
type Scheduler struct {
|
||||
mu sync.Mutex
|
||||
jobs []*Job
|
||||
logger *log.Logger
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
mu sync.Mutex
|
||||
jobs []*Job
|
||||
logger *log.Logger
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
started bool
|
||||
}
|
||||
|
||||
// New creates a new Scheduler.
|
||||
@@ -57,6 +58,7 @@ func New(logger *log.Logger) *Scheduler {
|
||||
}
|
||||
|
||||
// Every registers a periodic job that runs every interval.
|
||||
// If the scheduler is already started, the job's goroutine is launched immediately.
|
||||
func (s *Scheduler) Every(name string, interval time.Duration, fn JobFunc) {
|
||||
if interval <= 0 {
|
||||
s.logger.Printf("[ERROR] Periodic job %s has invalid interval %s — job not registered", name, interval)
|
||||
@@ -66,15 +68,22 @@ func (s *Scheduler) Every(name string, interval time.Duration, fn JobFunc) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
s.jobs = append(s.jobs, &Job{
|
||||
job := &Job{
|
||||
Name: name,
|
||||
Fn: fn,
|
||||
Interval: interval,
|
||||
})
|
||||
}
|
||||
s.jobs = append(s.jobs, job)
|
||||
s.logger.Printf("[SCHED] Registered periodic job: %s (every %s)", name, interval)
|
||||
|
||||
if s.started {
|
||||
s.wg.Add(1)
|
||||
go s.runPeriodicJob(job)
|
||||
}
|
||||
}
|
||||
|
||||
// Daily registers a job that runs once per day at the specified time (HH:MM) in Europe/Budapest timezone.
|
||||
// If the scheduler is already started, the job's goroutine is launched immediately.
|
||||
func (s *Scheduler) Daily(name string, timeStr string, fn JobFunc) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
@@ -85,14 +94,20 @@ func (s *Scheduler) Daily(name string, timeStr string, fn JobFunc) {
|
||||
return
|
||||
}
|
||||
|
||||
s.jobs = append(s.jobs, &Job{
|
||||
job := &Job{
|
||||
Name: name,
|
||||
Fn: fn,
|
||||
Schedule: timeStr,
|
||||
})
|
||||
}
|
||||
s.jobs = append(s.jobs, job)
|
||||
|
||||
nextRun := nextDailyRun(timeStr)
|
||||
s.logger.Printf("[SCHED] Daily job %s scheduled for %s", name, nextRun.Format("2006-01-02 15:04 MST"))
|
||||
|
||||
if s.started {
|
||||
s.wg.Add(1)
|
||||
go s.runDailyJob(job)
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins running all registered jobs. Safe to call only once.
|
||||
@@ -104,6 +119,7 @@ func (s *Scheduler) Start(ctx context.Context) {
|
||||
return
|
||||
}
|
||||
s.ctx, s.cancel = context.WithCancel(ctx)
|
||||
s.started = true
|
||||
|
||||
for _, job := range s.jobs {
|
||||
if job.Interval > 0 {
|
||||
|
||||
@@ -873,9 +873,17 @@ func (s *Settings) SetGeoRestriction(geo *GeoRestriction) error {
|
||||
}
|
||||
|
||||
// SetGeoAppOverride sets a per-app geo override. Creates the GeoRestriction if nil.
|
||||
// Pass override=nil to remove the override (same as RemoveGeoAppOverride).
|
||||
func (s *Settings) SetGeoAppOverride(appName string, override *AppGeoOverride) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if override == nil {
|
||||
// nil override = remove (fall back to global)
|
||||
if s.GeoRestriction != nil && s.GeoRestriction.AppOverrides != nil {
|
||||
delete(s.GeoRestriction.AppOverrides, appName)
|
||||
}
|
||||
return s.save()
|
||||
}
|
||||
if s.GeoRestriction == nil {
|
||||
s.GeoRestriction = &GeoRestriction{AllowedCountries: []string{"HU"}}
|
||||
}
|
||||
|
||||
@@ -86,6 +86,11 @@ func (m *Manager) DeleteStack(name string, removeHDDData bool) (*DeleteResponse,
|
||||
return nil, fmt.Errorf("stack %q is not orphaned — only orphaned stacks can be deleted", name)
|
||||
}
|
||||
|
||||
// Must not be deploying (H2 fix)
|
||||
if stack.Deploying {
|
||||
return nil, fmt.Errorf("stack %q is currently being deployed — wait for deployment to finish", name)
|
||||
}
|
||||
|
||||
// Must be stopped (not running)
|
||||
if stack.State == StateRunning || stack.State == StateStarting || stack.State == StateRestarting {
|
||||
return nil, fmt.Errorf("stack %q is still running — stop it first before deleting", name)
|
||||
@@ -239,6 +244,11 @@ func (m *Manager) RemoveStack(name string, removeHDDData bool, backupPathsToRemo
|
||||
return nil, fmt.Errorf("stack %q is not deployed", name)
|
||||
}
|
||||
|
||||
// Must not be deploying (H2 fix)
|
||||
if stack.Deploying {
|
||||
return nil, fmt.Errorf("stack %q is currently being deployed — wait for deployment to finish", name)
|
||||
}
|
||||
|
||||
// Must be stopped (not running)
|
||||
if stack.State == StateRunning || stack.State == StateStarting || stack.State == StateRestarting {
|
||||
return nil, fmt.Errorf("stack %q is still running — stop it first before removing", name)
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"log"
|
||||
"math/big"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -56,24 +57,35 @@ func validateSubdomain(s string) error {
|
||||
// SubdomainInUse checks if a subdomain is already used by any deployed stack
|
||||
// other than excludeStack.
|
||||
func (m *Manager) SubdomainInUse(subdomain, excludeStack string) bool {
|
||||
// Collect stack dirs and metadata under lock, then do I/O outside the lock.
|
||||
type candidate struct {
|
||||
dir string
|
||||
metaSubdomain string
|
||||
}
|
||||
var candidates []candidate
|
||||
|
||||
m.mu.RLock()
|
||||
defer m.mu.RUnlock()
|
||||
for name, stack := range m.stacks {
|
||||
if name == excludeStack || !stack.Deployed {
|
||||
continue
|
||||
}
|
||||
stackDir := filepath.Dir(stack.ComposePath)
|
||||
appCfg := LoadAppConfig(stackDir)
|
||||
candidates = append(candidates, candidate{
|
||||
dir: filepath.Dir(stack.ComposePath),
|
||||
metaSubdomain: stack.Meta.Subdomain,
|
||||
})
|
||||
}
|
||||
m.mu.RUnlock()
|
||||
|
||||
for _, c := range candidates {
|
||||
appCfg := LoadAppConfig(c.dir)
|
||||
if appCfg == nil {
|
||||
continue
|
||||
}
|
||||
// Check stored SUBDOMAIN first
|
||||
if sd, ok := appCfg.Env["SUBDOMAIN"]; ok && sd == subdomain {
|
||||
return true
|
||||
}
|
||||
// Backward compat: check metadata subdomain for apps without SUBDOMAIN in env
|
||||
if _, hasSub := appCfg.Env["SUBDOMAIN"]; !hasSub {
|
||||
if stack.Meta.Subdomain == subdomain {
|
||||
if c.metaSubdomain == subdomain {
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -107,20 +119,43 @@ type DeployRequest struct {
|
||||
// 7. Run docker compose up -d with env vars
|
||||
// 8. Update in-memory stack state
|
||||
func (m *Manager) DeployStack(req DeployRequest) (string, error) {
|
||||
// Atomically check and set the Deploying flag to prevent concurrent deploys (H1 fix).
|
||||
m.mu.Lock()
|
||||
sPtr, sOk := m.stacks[req.StackName]
|
||||
if !sOk {
|
||||
m.mu.Unlock()
|
||||
return "", fmt.Errorf("stack %q not found", req.StackName)
|
||||
}
|
||||
if sPtr.Deploying {
|
||||
m.mu.Unlock()
|
||||
return "", fmt.Errorf("stack %q is already being deployed — please wait", req.StackName)
|
||||
}
|
||||
if sPtr.Deployed {
|
||||
m.mu.Unlock()
|
||||
return "", fmt.Errorf("stack %q is already deployed; use update instead", req.StackName)
|
||||
}
|
||||
sPtr.Deploying = true
|
||||
sPtr.DeployError = ""
|
||||
m.mu.Unlock()
|
||||
|
||||
// If any validation below fails, clear the Deploying flag.
|
||||
clearDeploying := func() {
|
||||
m.mu.Lock()
|
||||
if s, ok := m.stacks[req.StackName]; ok {
|
||||
s.Deploying = false
|
||||
}
|
||||
m.mu.Unlock()
|
||||
}
|
||||
|
||||
stack, ok := m.GetStack(req.StackName)
|
||||
if !ok {
|
||||
clearDeploying()
|
||||
return "", fmt.Errorf("stack %q not found", req.StackName)
|
||||
}
|
||||
|
||||
stackDir := filepath.Dir(stack.ComposePath)
|
||||
meta := LoadMetadata(stackDir)
|
||||
|
||||
// Check if already deployed
|
||||
existing := LoadAppConfig(stackDir)
|
||||
if existing != nil && existing.Deployed {
|
||||
return "", fmt.Errorf("stack %q is already deployed; use update instead", req.StackName)
|
||||
}
|
||||
|
||||
// --- Memory validation ---
|
||||
var deployWarning string
|
||||
reservedMB := m.cfg.System.ReservedMemoryMB
|
||||
@@ -136,6 +171,7 @@ func (m *Manager) DeployStack(req DeployRequest) (string, error) {
|
||||
|
||||
// Hard block: real used + new request exceeds usable memory
|
||||
if newReqMB > 0 && usedMB+newReqMB > usableMB {
|
||||
clearDeploying()
|
||||
return "", fmt.Errorf(
|
||||
"Nincs elég memória az alkalmazás telepítéséhez. "+
|
||||
"Szükséges: %d MB, Elérhető: %d MB "+
|
||||
@@ -187,12 +223,15 @@ func (m *Manager) DeployStack(req DeployRequest) (string, error) {
|
||||
value = field.Default
|
||||
}
|
||||
if err := validateSubdomain(value); err != nil {
|
||||
clearDeploying()
|
||||
return "", err
|
||||
}
|
||||
if reservedSubdomains[value] {
|
||||
clearDeploying()
|
||||
return "", fmt.Errorf("a(z) %q aldomain foglalt rendszer számára", value)
|
||||
}
|
||||
if m.SubdomainInUse(value, req.StackName) {
|
||||
clearDeploying()
|
||||
return "", fmt.Errorf("a(z) %q aldomain már használatban van egy másik alkalmazásban", value)
|
||||
}
|
||||
|
||||
@@ -204,6 +243,7 @@ func (m *Manager) DeployStack(req DeployRequest) (string, error) {
|
||||
} else {
|
||||
generated, err := generateValue(field.Generate)
|
||||
if err != nil {
|
||||
clearDeploying()
|
||||
return "", fmt.Errorf("generating %s: %w", field.EnvVar, err)
|
||||
}
|
||||
value = generated
|
||||
@@ -215,6 +255,7 @@ func (m *Manager) DeployStack(req DeployRequest) (string, error) {
|
||||
if userVal, ok := req.Values[field.EnvVar]; ok && userVal != "" {
|
||||
value = userVal
|
||||
} else {
|
||||
clearDeploying()
|
||||
return "", fmt.Errorf("a(z) %q mező kitöltése kötelező — használja a Generálás gombot vagy írjon be egy jelszót", field.Label)
|
||||
}
|
||||
|
||||
@@ -229,12 +270,14 @@ func (m *Manager) DeployStack(req DeployRequest) (string, error) {
|
||||
|
||||
// Validate required fields
|
||||
if field.Required && value == "" {
|
||||
clearDeploying()
|
||||
return "", fmt.Errorf("a(z) %q (%s) mező kitöltése kötelező", field.Label, field.EnvVar)
|
||||
}
|
||||
|
||||
// Validate path fields exist on disk (inside the container's filesystem)
|
||||
// Validate path fields exist on the host filesystem
|
||||
if field.Type == "path" && value != "" {
|
||||
if _, err := os.Stat(value); os.IsNotExist(err) {
|
||||
clearDeploying()
|
||||
return "", fmt.Errorf("path %q does not exist for field %q", value, field.Label)
|
||||
}
|
||||
}
|
||||
@@ -257,6 +300,7 @@ func (m *Manager) DeployStack(req DeployRequest) (string, error) {
|
||||
}
|
||||
|
||||
if err := SaveAppConfig(stackDir, appCfg, m.encKey, SensitiveEnvVars(&meta)); err != nil {
|
||||
clearDeploying()
|
||||
return "", fmt.Errorf("saving app config: %w", err)
|
||||
}
|
||||
|
||||
@@ -272,14 +316,12 @@ func (m *Manager) DeployStack(req DeployRequest) (string, error) {
|
||||
m.checkLocalImages(req.StackName, stackDir)
|
||||
}
|
||||
|
||||
// Update in-memory stack state and mark as deploying. The compose-up
|
||||
// runs in a goroutine so the API can return immediately and the UI
|
||||
// shows progress via polling (image pull can take 30-60s).
|
||||
// Update in-memory stack state. Deploying was already set at the top (H1 fix).
|
||||
// The compose-up runs in a goroutine so the API can return immediately
|
||||
// and the UI shows progress via polling (image pull can take 30-60s).
|
||||
m.mu.Lock()
|
||||
if s, ok := m.stacks[req.StackName]; ok {
|
||||
s.Deployed = true
|
||||
s.Deploying = true
|
||||
s.DeployError = ""
|
||||
s.AppConfig = appCfg
|
||||
}
|
||||
m.mu.Unlock()
|
||||
@@ -544,6 +586,9 @@ func SaveAppConfig(stackDir string, cfg *AppConfig, encKey []byte, sensitiveVars
|
||||
if enc, err := crypto.Encrypt(encKey, v); err == nil {
|
||||
saveCfg.Env[k] = enc
|
||||
continue
|
||||
} else {
|
||||
// H10 fix: log encryption failure — value will be saved in plaintext.
|
||||
log.Printf("[WARN] Failed to encrypt env var %q: %v — saving as plaintext", k, err)
|
||||
}
|
||||
}
|
||||
saveCfg.Env[k] = v
|
||||
|
||||
@@ -120,11 +120,11 @@ func (m *Manager) SetEncryptionKey(key []byte) {
|
||||
// MigrateEncryption re-saves app.yaml for deployed stacks that still have
|
||||
// plaintext values in sensitive fields. Called once on startup.
|
||||
func (m *Manager) MigrateEncryption() {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if m.encKey == nil {
|
||||
return
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
migrated := 0
|
||||
for _, s := range m.stacks {
|
||||
@@ -233,8 +233,12 @@ func (m *Manager) ScanStacks() error {
|
||||
existing.ComposePath = composePath
|
||||
existing.Meta = meta
|
||||
existing.Protected = m.cfg.IsProtectedStack(name)
|
||||
existing.Deployed = deployed
|
||||
existing.AppConfig = appCfg
|
||||
// Don't overwrite Deployed/AppConfig while an async deploy is in
|
||||
// progress — the goroutine manages these fields (H3 fix).
|
||||
if !existing.Deploying {
|
||||
existing.Deployed = deployed
|
||||
existing.AppConfig = appCfg
|
||||
}
|
||||
} else {
|
||||
m.stacks[name] = &Stack{
|
||||
Name: name,
|
||||
@@ -507,10 +511,44 @@ func deepCopyStack(s *Stack) Stack {
|
||||
cp.HealthProbe = &hpCopy
|
||||
}
|
||||
|
||||
// Deep-copy Meta.DeployFields slice
|
||||
// Deep-copy Meta.DeployFields slice (including nested Options)
|
||||
if s.Meta.DeployFields != nil {
|
||||
cp.Meta.DeployFields = make([]DeployField, len(s.Meta.DeployFields))
|
||||
copy(cp.Meta.DeployFields, s.Meta.DeployFields)
|
||||
for i, f := range s.Meta.DeployFields {
|
||||
if f.Options != nil {
|
||||
cp.Meta.DeployFields[i].Options = make([]SelectOption, len(f.Options))
|
||||
copy(cp.Meta.DeployFields[i].Options, f.Options)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deep-copy Meta.OptionalConfig (slice of groups with nested Fields slices)
|
||||
if s.Meta.OptionalConfig != nil {
|
||||
cp.Meta.OptionalConfig = make([]OptionalConfigGroup, len(s.Meta.OptionalConfig))
|
||||
copy(cp.Meta.OptionalConfig, s.Meta.OptionalConfig)
|
||||
for i, g := range s.Meta.OptionalConfig {
|
||||
if g.Fields != nil {
|
||||
cp.Meta.OptionalConfig[i].Fields = make([]OptionalConfigField, len(g.Fields))
|
||||
copy(cp.Meta.OptionalConfig[i].Fields, g.Fields)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deep-copy Meta.HealthCheck pointer
|
||||
if s.Meta.HealthCheck != nil {
|
||||
hcCopy := *s.Meta.HealthCheck
|
||||
if s.Meta.HealthCheck.Checks != nil {
|
||||
hcCopy.Checks = make([]HealthCheckItem, len(s.Meta.HealthCheck.Checks))
|
||||
copy(hcCopy.Checks, s.Meta.HealthCheck.Checks)
|
||||
for i, c := range s.Meta.HealthCheck.Checks {
|
||||
if c.Expect != nil {
|
||||
eCopy := *c.Expect
|
||||
hcCopy.Checks[i].Expect = &eCopy
|
||||
}
|
||||
}
|
||||
}
|
||||
cp.Meta.HealthCheck = &hcCopy
|
||||
}
|
||||
|
||||
return cp
|
||||
|
||||
@@ -424,7 +424,7 @@ func (s *Syncer) runGitInDir(dir string, args ...string) error {
|
||||
cmd.Stdout = io.Discard
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
s.logger.Printf("[SYNC] Running: git %s", strings.Join(args, " "))
|
||||
s.logger.Printf("[SYNC] Running: git %s", maskRepoURL(strings.Join(args, " ")))
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
return fmt.Errorf("git %s: %w\nstderr: %s", strings.Join(args, " "), err, stderr.String())
|
||||
|
||||
@@ -17,9 +17,17 @@ type session struct {
|
||||
csrfToken string
|
||||
}
|
||||
|
||||
// loginAttempt tracks failed login attempts for rate limiting.
|
||||
type loginAttempt struct {
|
||||
count int
|
||||
lastFail time.Time
|
||||
}
|
||||
|
||||
const (
|
||||
sessionCookieName = "felhom_session"
|
||||
sessionMaxAge = 7 * 24 * time.Hour
|
||||
sessionCookieName = "felhom_session"
|
||||
sessionMaxAge = 7 * 24 * time.Hour
|
||||
loginMaxAttempts = 5
|
||||
loginWindowDuration = 1 * time.Minute
|
||||
)
|
||||
|
||||
// effectivePasswordHash returns the active password hash using the priority:
|
||||
@@ -98,13 +106,47 @@ func (s *Server) handleLogin(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Rate limit: check failed attempts from this IP
|
||||
ip := r.RemoteAddr
|
||||
if fwd := r.Header.Get("X-Forwarded-For"); fwd != "" {
|
||||
ip = strings.Split(fwd, ",")[0]
|
||||
}
|
||||
ip = strings.TrimSpace(ip)
|
||||
|
||||
s.loginAttemptMu.Lock()
|
||||
attempt := s.loginAttempts[ip]
|
||||
if attempt != nil && time.Since(attempt.lastFail) > loginWindowDuration {
|
||||
// Window expired — reset
|
||||
attempt = nil
|
||||
delete(s.loginAttempts, ip)
|
||||
}
|
||||
if attempt != nil && attempt.count >= loginMaxAttempts {
|
||||
s.loginAttemptMu.Unlock()
|
||||
s.logger.Printf("[WARN] Login rate limited for %s (%d attempts)", ip, attempt.count)
|
||||
s.renderLogin(w, "Túl sok sikertelen próbálkozás, próbálja újra 1 perc múlva", "")
|
||||
return
|
||||
}
|
||||
s.loginAttemptMu.Unlock()
|
||||
|
||||
effectiveHash := s.effectivePasswordHash()
|
||||
if err := bcrypt.CompareHashAndPassword([]byte(effectiveHash), []byte(password)); err != nil {
|
||||
s.logger.Printf("[WARN] Failed login from %s", r.RemoteAddr)
|
||||
s.loginAttemptMu.Lock()
|
||||
if s.loginAttempts[ip] == nil {
|
||||
s.loginAttempts[ip] = &loginAttempt{}
|
||||
}
|
||||
s.loginAttempts[ip].count++
|
||||
s.loginAttempts[ip].lastFail = time.Now()
|
||||
s.loginAttemptMu.Unlock()
|
||||
s.renderLogin(w, "Hibás jelszó", "")
|
||||
return
|
||||
}
|
||||
|
||||
// Successful login — clear rate limit for this IP
|
||||
s.loginAttemptMu.Lock()
|
||||
delete(s.loginAttempts, ip)
|
||||
s.loginAttemptMu.Unlock()
|
||||
|
||||
token := s.createSession()
|
||||
isSecure := r.TLS != nil || r.Header.Get("X-Forwarded-Proto") == "https"
|
||||
http.SetCookie(w, &http.Cookie{
|
||||
|
||||
@@ -965,6 +965,23 @@ func (s *Server) settingsCrossBackupHandler(w http.ResponseWriter, r *http.Reque
|
||||
schedule = existing.Schedule
|
||||
}
|
||||
|
||||
// Validate destination path against registered storage paths (H11 fix — matches API handler).
|
||||
if enabled && destPath != "" {
|
||||
registeredPaths := s.settings.GetStoragePaths()
|
||||
validDest := false
|
||||
for _, sp := range registeredPaths {
|
||||
if destPath == sp.Path {
|
||||
validDest = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !validDest {
|
||||
s.logger.Printf("[WARN] Cross-drive backup: rejected invalid dest path %q for %s", destPath, name)
|
||||
http.Redirect(w, r, "/stacks/"+name+"/deploy?flash_error="+url.QueryEscape("Érvénytelen célútvonal: "+destPath), http.StatusFound)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
var cfg *settings.CrossDriveBackup
|
||||
if destPath != "" || existing != nil {
|
||||
cfg = &settings.CrossDriveBackup{
|
||||
@@ -1543,6 +1560,10 @@ func (s *Server) settingsStorageLabelHandler(w http.ResponseWriter, r *http.Requ
|
||||
// SyncFileBrowserMounts regenerates FileBrowser's docker-compose.yml and config.yaml
|
||||
// with volume mounts and sources for all registered storage paths, then recreates the container.
|
||||
func (s *Server) SyncFileBrowserMounts() {
|
||||
// Prevent concurrent syncs — multiple callers can race on the same files (H5 fix).
|
||||
s.fileBrowserMu.Lock()
|
||||
defer s.fileBrowserMu.Unlock()
|
||||
|
||||
stackDir := "/opt/docker/stacks/filebrowser"
|
||||
composePath := stackDir + "/docker-compose.yml"
|
||||
|
||||
|
||||
@@ -41,10 +41,12 @@ type Server struct {
|
||||
encKey []byte // AES-256 key for decrypting app.yaml values
|
||||
tmpl *template.Template
|
||||
|
||||
sessions map[string]*session
|
||||
sessionsMu sync.RWMutex
|
||||
done chan struct{}
|
||||
closeOnce sync.Once
|
||||
sessions map[string]*session
|
||||
sessionsMu sync.RWMutex
|
||||
loginAttempts map[string]*loginAttempt
|
||||
loginAttemptMu sync.Mutex
|
||||
done chan struct{}
|
||||
closeOnce sync.Once
|
||||
|
||||
// Disk operation state (format/migrate jobs)
|
||||
diskJobMu sync.Mutex
|
||||
@@ -53,6 +55,9 @@ type Server struct {
|
||||
// Active raw mount for the attach wizard (empty when not in use)
|
||||
activeRawMount string
|
||||
|
||||
// Guard for FileBrowser sync — prevents concurrent file writes (H5 fix)
|
||||
fileBrowserMu sync.Mutex
|
||||
|
||||
// Drive migration
|
||||
driveMigrator *storage.DriveMigrator
|
||||
|
||||
@@ -90,6 +95,7 @@ func NewServer(cfg *config.Config, stackMgr *stacks.Manager, cpuCollector *syste
|
||||
logger: logger,
|
||||
version: version,
|
||||
sessions: make(map[string]*session),
|
||||
loginAttempts: make(map[string]*loginAttempt),
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
s.loadTemplates()
|
||||
@@ -111,6 +117,7 @@ func NewServer(cfg *config.Config, stackMgr *stacks.Manager, cpuCollector *syste
|
||||
}
|
||||
|
||||
// SetEncryptionKey sets the AES-256 key used to decrypt app.yaml values for display.
|
||||
// Must be called before ListenAndServe (all Set* methods are init-time only).
|
||||
func (s *Server) SetEncryptionKey(key []byte) {
|
||||
s.encKey = key
|
||||
}
|
||||
|
||||
@@ -952,7 +952,7 @@ func (s *Server) storageAttachBrowseHandler(w http.ResponseWriter, r *http.Reque
|
||||
|
||||
// Security: validate path is under the raw mount staging area
|
||||
cleanPath := filepath.Clean(browsePath)
|
||||
if !strings.HasPrefix(cleanPath, storage.RawMountBase) {
|
||||
if cleanPath != storage.RawMountBase && !strings.HasPrefix(cleanPath, storage.RawMountBase+"/") {
|
||||
jsonError(w, "Érvénytelen útvonal", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user