v0.24.0 — Pre-testing observability: debug logging, diagnostic dump, startup self-test

- Add [DEBUG] logging across all modules (backup, storage, sync, selfupdate,
  monitor, notify, report, assets, setup) gated behind logging.level: "debug"
- Add /api/debug/dump endpoint returning full controller state JSON (debug only)
- Add startup self-test validating 9 subsystems (Docker, dirs, storage, hub,
  restic repos, metrics DB) with pass/warn/fail summary
- New packages: internal/selftest, internal/util
- Constructor/signature changes: debug bool params, logger params on
  RunHealthCheck and BuildReport, smart watchdog probe logging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 18:32:26 +01:00
parent 6f02536243
commit be7803c0ac
30 changed files with 1281 additions and 67 deletions
+214
View File
@@ -2,6 +2,8 @@ package api
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
@@ -17,12 +19,16 @@ import (
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/metrics"
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
"gitea.dooplex.hu/admin/felhom-controller/internal/notify"
"gitea.dooplex.hu/admin/felhom-controller/internal/report"
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
"gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
)
// Router handles all /api/* requests.
@@ -45,6 +51,13 @@ type Router struct {
// Asset syncer for on-demand Hub asset sync
assetsSyncer *assets.Syncer
// Debug dump dependencies (set via setters)
scheduler *scheduler.Scheduler
hubPusher *report.Pusher
alertMgr *web.AlertManager
version string
startTime time.Time
}
// SetAssetsSyncer sets the Hub asset syncer for on-demand sync triggers.
@@ -52,6 +65,15 @@ func (r *Router) SetAssetsSyncer(as *assets.Syncer) {
r.assetsSyncer = as
}
// SetDebugDumpDeps sets optional dependencies for the /api/debug/dump endpoint.
func (r *Router) SetDebugDumpDeps(sched *scheduler.Scheduler, pusher *report.Pusher, alertMgr *web.AlertManager, version string, startTime time.Time) {
r.scheduler = sched
r.hubPusher = pusher
r.alertMgr = alertMgr
r.version = version
r.startTime = startTime
}
func NewRouter(cfg *config.Config, configPath string, sett *settings.Settings, stackMgr *stacks.Manager, syncer *catalogsync.Syncer, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, metricsStore *metrics.MetricsStore, updater *selfupdate.Updater, notif *notify.Notifier, logger *log.Logger) *Router {
return &Router{cfg: cfg, configPath: configPath, sett: sett, stackMgr: stackMgr, syncer: syncer, cpuCollector: cpuCollector, backupMgr: backupMgr, crossDriveRunner: crossDrive, metricsStore: metricsStore, updater: updater, notifier: notif, logger: logger}
}
@@ -214,6 +236,10 @@ func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
case path == "/assets/status" && req.Method == http.MethodGet:
r.assetSyncStatus(w, req)
// GET /api/debug/dump — diagnostic JSON dump (debug mode only)
case path == "/debug/dump" && req.Method == http.MethodGet:
r.debugDump(w, req)
default:
writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"})
}
@@ -1046,6 +1072,194 @@ func (r *Router) assetSyncStatus(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: r.assetsSyncer.Status()})
}
// --- Debug dump handler ---
func (r *Router) debugDump(w http.ResponseWriter, req *http.Request) {
if r.cfg.Logging.Level != "debug" {
writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"})
return
}
dump := make(map[string]interface{})
// Controller info
configHash := ""
if data, err := os.ReadFile(r.configPath); err == nil {
h := sha256.Sum256(data)
configHash = hex.EncodeToString(h[:])
}
dump["controller"] = map[string]interface{}{
"version": r.version,
"uptime_seconds": int(time.Since(r.startTime).Seconds()),
"config_hash": configHash,
"logging_level": r.cfg.Logging.Level,
"pid": os.Getpid(),
}
// Storage
storagePaths := r.sett.GetStoragePaths()
storageEntries := make([]map[string]interface{}, 0, len(storagePaths))
for _, sp := range storagePaths {
entry := map[string]interface{}{
"path": sp.Path,
"label": sp.Label,
"disconnected": sp.Disconnected,
"decommissioned": sp.Decommissioned,
}
if !sp.Disconnected && !sp.Decommissioned {
if di := system.GetDiskUsage(sp.Path); di != nil {
entry["total_gb"] = di.TotalGB
entry["used_gb"] = di.UsedGB
entry["used_percent"] = di.UsedPercent
}
}
storageEntries = append(storageEntries, entry)
}
dump["storage"] = storageEntries
// Stacks
allStacks := r.stackMgr.GetStacks()
deployed := 0
running := 0
stopped := 0
stackList := make([]map[string]interface{}, 0)
for _, s := range allStacks {
if !s.Deployed {
continue
}
deployed++
info := map[string]interface{}{
"name": s.Name,
"state": string(s.State),
}
if s.Meta.DisplayName != "" {
info["display_name"] = s.Meta.DisplayName
}
containerNames := make([]string, 0, len(s.Containers))
for _, c := range s.Containers {
containerNames = append(containerNames, c.Name)
switch c.State {
case stacks.StateRunning, stacks.StateStarting, stacks.StateUnhealthy:
running++
default:
stopped++
}
}
info["containers"] = containerNames
stackList = append(stackList, info)
}
dump["stacks"] = map[string]interface{}{
"deployed": deployed,
"running": running,
"stopped": stopped,
"list": stackList,
}
// Backup
if r.backupMgr != nil {
backupInfo := map[string]interface{}{
"enabled": true,
"running": r.backupMgr.IsRunning(),
}
dbDump, backupSt := r.backupMgr.GetStatus()
if dbDump != nil {
backupInfo["last_db_dump"] = map[string]interface{}{
"time": dbDump.LastRun,
"success": dbDump.Success,
}
}
if backupSt != nil {
backupInfo["last_backup"] = map[string]interface{}{
"time": backupSt.LastRun,
"success": backupSt.Success,
}
if backupSt.RepoStats != nil {
backupInfo["repo_size"] = backupSt.RepoStats.TotalSize
backupInfo["snapshot_count"] = backupSt.RepoStats.SnapshotCount
}
}
dump["backup"] = backupInfo
} else {
dump["backup"] = map[string]interface{}{"enabled": false}
}
// Hub
hubInfo := map[string]interface{}{
"url": r.cfg.Hub.URL,
"enabled": r.cfg.Hub.Enabled,
}
if r.hubPusher != nil {
s := r.hubPusher.GetStatus()
hubInfo["last_attempt"] = s.LastAttempt
hubInfo["last_success"] = s.LastSuccess
hubInfo["last_error"] = s.LastError
hubInfo["consecutive_failures"] = s.Consecutive
}
dump["hub"] = hubInfo
// Scheduler
if r.scheduler != nil {
jobs := r.scheduler.GetJobs()
jobList := make([]map[string]interface{}, 0, len(jobs))
for _, j := range jobs {
entry := map[string]interface{}{
"name": j.Name,
"running": j.Running,
}
if j.Interval > 0 {
entry["type"] = "every"
entry["interval"] = j.Interval.String()
} else if j.Schedule != "" {
entry["type"] = "daily"
entry["schedule"] = j.Schedule
}
if !j.LastRun.IsZero() {
entry["last_run"] = j.LastRun
}
if j.LastErr != nil {
entry["last_error"] = j.LastErr.Error()
}
jobList = append(jobList, entry)
}
dump["scheduler"] = jobList
}
// Health (fresh check)
healthReport := monitor.RunHealthCheck(r.cfg, r.cpuCollector, storagePaths, r.logger)
dump["health"] = map[string]interface{}{
"status": healthReport.Status,
"issues": healthReport.Issues,
"warnings": healthReport.Warnings,
}
// Notifications
prefs := r.sett.GetNotificationPrefs()
dump["notifications"] = map[string]interface{}{
"email": prefs.Email,
"enabled_events": prefs.EnabledEvents,
"cooldown_hours": prefs.CooldownHours,
}
// Self-update
if r.updater != nil {
status := r.updater.GetStatus()
dump["self_update"] = map[string]interface{}{
"enabled": true,
"auto": r.cfg.SelfUpdate.AutoUpdate,
"last_check": status.LastCheck,
}
} else {
dump["self_update"] = map[string]interface{}{"enabled": false}
}
// Alerts
if r.alertMgr != nil {
dump["alerts"] = r.alertMgr.GetAlerts()
}
writeJSON(w, http.StatusOK, dump)
}
func writeJSON(w http.ResponseWriter, status int, v interface{}) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
+18 -1
View File
@@ -47,12 +47,13 @@ type Syncer struct {
fallbackDir string // /usr/share/felhom/assets — baked-in fallback
httpClient *http.Client
logger *log.Logger
debug bool
mu sync.Mutex
status SyncStatus
}
// New creates a Syncer that downloads assets from the Hub.
func New(hubURL, apiKey, assetsDir, fallbackDir string, logger *log.Logger) *Syncer {
func New(hubURL, apiKey, assetsDir, fallbackDir string, logger *log.Logger, debug bool) *Syncer {
return &Syncer{
hubURL: strings.TrimSuffix(hubURL, "/"),
apiKey: apiKey,
@@ -60,6 +61,7 @@ func New(hubURL, apiKey, assetsDir, fallbackDir string, logger *log.Logger) *Syn
fallbackDir: fallbackDir,
httpClient: &http.Client{Timeout: 60 * time.Second},
logger: logger,
debug: debug,
status: SyncStatus{LastStatus: "never"},
}
}
@@ -78,11 +80,17 @@ func (s *Syncer) Sync(ctx context.Context) error {
}
// 1. Fetch Hub manifest
if s.debug {
s.logger.Printf("[DEBUG] Asset sync: fetching manifest from %s/api/v1/assets/manifest", s.hubURL)
}
manifest, err := s.fetchManifest(ctx)
if err != nil {
s.setError(fmt.Errorf("fetch manifest: %w", err))
return err
}
if s.debug {
s.logger.Printf("[DEBUG] Asset sync: manifest has %d files", len(manifest.Files))
}
// 2. Build local hash map
localHashes, err := s.buildLocalHashes()
@@ -90,6 +98,9 @@ func (s *Syncer) Sync(ctx context.Context) error {
s.setError(fmt.Errorf("scan local assets: %w", err))
return err
}
if s.debug {
s.logger.Printf("[DEBUG] Asset sync: %d local files found", len(localHashes))
}
// 3. Download changed/new files
hubFiles := make(map[string]bool, len(manifest.Files))
@@ -105,6 +116,9 @@ func (s *Syncer) Sync(ctx context.Context) error {
continue
}
if s.debug {
s.logger.Printf("[DEBUG] Asset sync: downloading %s (remote sha256=%s)", entry.Filename, entry.SHA256[:12]+"...")
}
if err := s.downloadFile(ctx, entry.Filename); err != nil {
s.logger.Printf("[WARN] Failed to download asset %s: %v", entry.Filename, err)
continue
@@ -117,6 +131,9 @@ func (s *Syncer) Sync(ctx context.Context) error {
for name := range localHashes {
if !hubFiles[name] {
path := filepath.Join(s.assetsDir, name)
if s.debug {
s.logger.Printf("[DEBUG] Asset sync: removing stale file %s", name)
}
if err := os.Remove(path); err != nil {
s.logger.Printf("[WARN] Failed to remove stale asset %s: %v", name, err)
} else {
+65 -5
View File
@@ -190,6 +190,15 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
drive := m.GetAppDrivePath(stack.Name)
result[drive] = append(result[drive], stack)
}
if m.isDebug() {
for drive, stacks := range result {
names := make([]string, len(stacks))
for i, s := range stacks {
names[i] = s.Name
}
m.logger.Printf("[DEBUG] groupStacksByDrive: %s → [%s]", drive, strings.Join(names, ", "))
}
}
return result
}
@@ -197,10 +206,18 @@ func (m *Manager) groupStacksByDrive() map[string][]StackSummary {
func (m *Manager) activeDrives() []string {
groups := m.groupStacksByDrive()
var drives []string
var disconnected []string
for d := range groups {
if m.settings != nil && (m.settings.IsDisconnected(d) || m.settings.IsDecommissioned(d)) {
disconnected = append(disconnected, d)
}
drives = append(drives, d)
}
sort.Strings(drives)
if m.isDebug() {
m.logger.Printf("[DEBUG] activeDrives: %d total (%s), %d disconnected/decommissioned",
len(drives), strings.Join(drives, ", "), len(disconnected))
}
return drives
}
@@ -218,7 +235,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
start := time.Now()
m.logger.Printf("[INFO] Starting database dump run")
dbs, err := DiscoverDatabases(ctx, m.logger)
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
if err != nil {
m.logger.Printf("[ERROR] Database discovery failed: %v", err)
return err
@@ -261,7 +278,7 @@ func (m *Manager) runDBDumpsInternal(ctx context.Context) error {
dumpDir := AppDBDumpPath(drivePath, db.StackName)
result := DumpOne(ctx, db, dumpDir, m.logger)
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
results = append(results, result)
if result.Error != nil {
@@ -354,6 +371,9 @@ func (m *Manager) runBackupInternal(ctx context.Context) error {
driveCount := 0
for drivePath, stacks := range driveStacks {
if m.isDebug() {
m.logger.Printf("[DEBUG] runBackupInternal: processing drive %s (%d stacks)", drivePath, len(stacks))
}
result, err := m.backupDrive(ctx, drivePath, stacks, infraPaths)
if err != nil {
anyErr = err
@@ -473,6 +493,13 @@ func (m *Manager) backupDrive(ctx context.Context, drivePath string, stacks []St
// Deduplicate paths
paths = dedup(paths)
if m.isDebug() {
m.logger.Printf("[DEBUG] backupDrive %s: repo=%s, %d include paths:", drivePath, repoPath, len(paths))
for _, p := range paths {
m.logger.Printf("[DEBUG] %s", p)
}
}
tags := []string{"felhom", m.cfg.Customer.ID, filepath.Base(drivePath)}
m.logger.Printf("[INFO] Backing up drive %s (%d apps, %d paths)", drivePath, len(stacks), len(paths))
@@ -549,15 +576,27 @@ func (m *Manager) RunIntegrityCheck(ctx context.Context) error {
return nil
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking %d drives", len(drives))
}
var checkErr error
for _, drive := range drives {
repoPath := PrimaryResticRepoPath(drive)
if !m.restic.RepoExists(repoPath) {
if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: skipping %s (repo does not exist)", repoPath)
}
continue
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: checking repo %s", repoPath)
}
if err := m.restic.Check(repoPath); err != nil {
m.logger.Printf("[ERROR] Restic check failed for %s: %v", repoPath, err)
checkErr = err
} else if m.isDebug() {
m.logger.Printf("[DEBUG] RunIntegrityCheck: repo %s OK", repoPath)
}
}
@@ -587,12 +626,28 @@ func (m *Manager) RunFullBackup(ctx context.Context) error {
}
defer m.releaseRunning()
if m.isDebug() {
drives := m.activeDrives()
driveStacks := m.groupStacksByDrive()
totalStacks := 0
for _, s := range driveStacks {
totalStacks += len(s)
}
m.logger.Printf("[DEBUG] RunFullBackup: starting full backup — %d active drives, %d stacks", len(drives), totalStacks)
}
// Step 1: DB dumps
if m.isDebug() {
m.logger.Printf("[DEBUG] RunFullBackup: phase 1 — database dumps")
}
if err := m.runDBDumpsInternal(ctx); err != nil {
m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway")
}
// Step 2: Restic backup
if m.isDebug() {
m.logger.Printf("[DEBUG] RunFullBackup: phase 2 — restic snapshots")
}
return m.runBackupInternal(ctx)
}
@@ -737,7 +792,7 @@ func (m *Manager) GetStackHDDMounts(name string) []string {
// DumpStackDB runs a database dump for containers belonging to a specific stack.
// Dumps to the stack's home drive: <drive>/backups/primary/<stack>/db-dumps/.
func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
dbs, err := DiscoverDatabases(ctx, m.logger)
dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug())
if err != nil {
return fmt.Errorf("database discovery failed: %w", err)
}
@@ -762,7 +817,7 @@ func (m *Manager) DumpStackDB(ctx context.Context, stackName string) error {
m.logger.Printf("[INFO] Running pre-backup DB dump for %s (%d database(s)) → %s", stackName, len(stackDBs), dumpDir)
for _, db := range stackDBs {
result := DumpOne(ctx, db, dumpDir, m.logger)
result := DumpOne(ctx, db, dumpDir, m.logger, m.isDebug())
if result.Error != nil {
return fmt.Errorf("DB dump failed for %s: %w", result.DB.ContainerName, result.Error)
}
@@ -1019,7 +1074,7 @@ func (m *Manager) RefreshCache(nextDBDump, nextBackup time.Time) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil {
if dbs, err := DiscoverDatabases(ctx, m.logger, m.isDebug()); err == nil {
status.DiscoveredDBs = dbs
}
@@ -1172,6 +1227,11 @@ func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupSta
}
}
// isDebug returns true if logging level is "debug".
func (m *Manager) isDebug() bool {
return m.cfg.Logging.Level == "debug"
}
func dbNames(dbs []DiscoveredDB) string {
var names []string
for _, db := range dbs {
+101 -2
View File
@@ -13,6 +13,7 @@ import (
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
"gitea.dooplex.hu/admin/felhom-controller/internal/util"
)
// DBDumper can run a database dump for a specific stack.
@@ -29,12 +30,13 @@ type CrossDriveRunner struct {
stacksDir string // path to stacks dir (for infra backup)
controllerYAMLPath string // path to controller.yaml (for infra backup)
logger *log.Logger
debug bool
mu sync.Mutex
running map[string]bool // per-app running state
}
// NewCrossDriveRunner creates a new CrossDriveRunner.
func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger) *CrossDriveRunner {
func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, systemDataPath, stacksDir string, logger *log.Logger, debug bool) *CrossDriveRunner {
return &CrossDriveRunner{
sett: sett,
stackProvider: provider,
@@ -42,6 +44,7 @@ func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, sy
stacksDir: stacksDir,
controllerYAMLPath: "/opt/docker/felhom-controller/controller.yaml",
logger: logger,
debug: debug,
running: make(map[string]bool),
}
}
@@ -67,6 +70,11 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
return fmt.Errorf("cross-drive backup not configured or disabled for %s", stackName)
}
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: starting for %s, dest=%s, schedule=%s, method=%s",
stackName, cfg.DestinationPath, cfg.Schedule, cfg.Method)
}
// Prevent concurrent runs for the same app
r.mu.Lock()
if r.running[stackName] {
@@ -84,12 +92,18 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
// Check if source or destination drive is disconnected
srcDrive := r.stackProvider.GetStackHDDPath(stackName)
if srcDrive != "" && r.sett.IsDisconnected(srcDrive) {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: source drive disconnected for %s: %s", stackName, srcDrive)
}
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
return fmt.Errorf("source drive disconnected: %s", srcDrive)
}
if r.sett.IsDisconnected(cfg.DestinationPath) {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: destination drive disconnected for %s: %s", stackName, cfg.DestinationPath)
}
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
@@ -107,6 +121,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
// Trigger fresh DB dump for this app before cross-drive backup
if r.dbDumper != nil {
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: triggering pre-backup DB dump for %s", stackName)
}
if err := r.dbDumper.DumpStackDB(ctx, stackName); err != nil {
r.logger.Printf("[WARN] Pre-backup DB dump failed for %s: %v — proceeding with user data backup", stackName, err)
// Non-fatal: user data backup is still valuable without fresh dump
@@ -120,6 +137,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
// Resolve HDD mounts for this app (may be empty for config-only apps)
mounts := r.stackProvider.GetStackHDDMounts(stackName)
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: %s has %d HDD mount(s): %v", stackName, len(mounts), mounts)
}
// Safety: destination must not overlap with any source
for _, m := range mounts {
@@ -145,6 +165,9 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
destDir := AppSecondaryRsyncPath(cfg.DestinationPath, stackName)
if sz, err := dirSizeBytes(destDir); err == nil {
sizeHuman = humanizeBytes(sz)
if r.debug {
r.logger.Printf("[DEBUG] RunAppBackup: %s backup size at destination: %s", stackName, sizeHuman)
}
}
r.logger.Printf("[INFO] Cross-drive backup completed: %s (%s)", stackName, duration.Round(time.Second))
@@ -155,6 +178,10 @@ func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) e
// RunAllScheduled runs cross-drive backups for all apps matching the schedule.
// Runs sequentially (disk I/O bound).
func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) error {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: starting for schedule=%s", schedule)
}
// Auto-enable Tier 2 for small apps (no HDD mounts) before running backups
r.AutoEnableSmallApps()
@@ -163,18 +190,39 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string)
configs := r.sett.GetAllCrossDriveConfigs()
if len(configs) == 0 {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: no cross-drive configs found")
}
return nil
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: %d total cross-drive config(s) found", len(configs))
}
var errs []string
var scheduled, skippedDisabled, skippedWrongSchedule int
for stackName, cfg := range configs {
if !cfg.Enabled {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — disabled", stackName)
}
skippedDisabled++
continue
}
if cfg.Schedule != schedule {
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: skipping %s — schedule mismatch (has=%s, want=%s)", stackName, cfg.Schedule, schedule)
}
skippedWrongSchedule++
continue
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: queuing %s for backup (dest=%s)", stackName, cfg.DestinationPath)
}
scheduled++
select {
case <-ctx.Done():
return ctx.Err()
@@ -186,6 +234,11 @@ func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string)
}
}
if r.debug {
r.logger.Printf("[DEBUG] RunAllScheduled: done — %d scheduled, %d disabled, %d wrong schedule, %d errors",
scheduled, skippedDisabled, skippedWrongSchedule, len(errs))
}
if len(errs) > 0 {
return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; "))
}
@@ -216,6 +269,9 @@ func (r *CrossDriveRunner) AnyRunning() bool {
// (≥10 GB free, <90% used) to protect OS stability; external drives just need
// ≥100 MB. Non-mount-point destinations are allowed with a logged warning.
func (r *CrossDriveRunner) ValidateDestination(path string) error {
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: checking path=%s", path)
}
if path == "" {
return fmt.Errorf("destination path is empty")
}
@@ -226,6 +282,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error {
return fmt.Errorf("destination %s does not exist", path)
}
onSystemDrive := !system.IsMountPoint(path)
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s, isMountPoint=%v", path, !onSystemDrive)
}
if onSystemDrive {
r.logger.Printf("[WARN] Destination %s is not a separate mount point (system drive) — backup will proceed but data is not protected against drive failure", path)
}
@@ -237,6 +296,10 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error {
r.logger.Printf("[WARN] Cannot determine disk usage for %s — proceeding without space verification", path)
return nil
}
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s, availGB=%.1f, usedPct=%.0f%%, onSystemDrive=%v",
path, di.AvailGB, di.UsedPercent, onSystemDrive)
}
if onSystemDrive {
// System drive: protect OS stability — require ≥10 GB free and <90% used
if di.AvailGB < 10 {
@@ -251,6 +314,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error {
return fmt.Errorf("destination %s has insufficient free space (%.1f GB free)", path, di.AvailGB)
}
}
if r.debug {
r.logger.Printf("[DEBUG] ValidateDestination: path=%s passed all checks", path)
}
return nil
}
@@ -258,6 +324,9 @@ func (r *CrossDriveRunner) ValidateDestination(path string) error {
func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
destDir := AppSecondaryRsyncPath(destBase, stackName)
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: stack=%s, destBase=%s, destDir=%s, %d mount(s)", stackName, destBase, destDir, len(mounts))
}
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("creating rsync dest dir: %w", err)
}
@@ -296,9 +365,16 @@ func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBa
"--exclude", "backups/*.dump",
src, dst)
r.logger.Printf("[DEBUG] rsync: %s → %s", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
out, err := cmd.CombinedOutput()
if err != nil {
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: rsync failed for %s: %s", srcMount, util.TruncateStr(strings.TrimSpace(string(out)), 500))
}
return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out)))
}
if r.debug {
r.logger.Printf("[DEBUG] runRsyncBackup: rsync OK for mount %s → %s", src, dst)
}
}
// --- Copy DB dumps for this stack from its home drive ---
@@ -423,20 +499,35 @@ func (r *CrossDriveRunner) syncInfraConfig(ctx context.Context) {
func (r *CrossDriveRunner) AutoEnableSmallApps() {
storagePaths := r.sett.GetStoragePaths()
if len(storagePaths) < 2 {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: fewer than 2 storage paths (%d) — skipping", len(storagePaths))
}
return // no secondary drive available
}
deployed := r.stackProvider.ListDeployedStacks()
existingConfigs := r.sett.GetAllCrossDriveConfigs()
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: %d deployed stacks, %d existing configs, %d storage paths",
len(deployed), len(existingConfigs), len(storagePaths))
}
var autoEnabled int
for _, stack := range deployed {
// Skip if already has cross-drive config (user has touched it)
if _, exists := existingConfigs[stack.Name]; exists {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — already has cross-drive config", stack.Name)
}
continue
}
// Skip if app has HDD mounts (large user data — needs manual config)
if mounts := r.stackProvider.GetStackHDDMounts(stack.Name); len(mounts) > 0 {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — has %d HDD mount(s)", stack.Name, len(mounts))
}
continue
}
@@ -450,6 +541,9 @@ func (r *CrossDriveRunner) AutoEnableSmallApps() {
}
}
if destPath == "" {
if r.debug {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: skipping %s — no suitable destination found", stack.Name)
}
continue // no suitable destination found
}
@@ -464,8 +558,13 @@ func (r *CrossDriveRunner) AutoEnableSmallApps() {
r.logger.Printf("[WARN] Auto-enable Tier 2 failed for %s: %v", stack.Name, err)
continue
}
autoEnabled++
r.logger.Printf("[INFO] Auto-enabled Tier 2 backup for %s → %s (no HDD mounts, daily rsync)", stack.Name, destPath)
}
if r.debug && autoEnabled > 0 {
r.logger.Printf("[DEBUG] AutoEnableSmallApps: auto-enabled %d app(s)", autoEnabled)
}
}
// --- helpers ---
+68 -4
View File
@@ -11,6 +11,8 @@ import (
"path/filepath"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/util"
)
// DBType represents a database engine type.
@@ -61,14 +63,22 @@ type DumpFileInfo struct {
}
// DiscoverDatabases finds running database containers via docker ps.
func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB, error) {
func DiscoverDatabases(ctx context.Context, logger *log.Logger, debug bool) ([]DiscoveredDB, error) {
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: running docker ps to find database containers")
}
cmd := exec.CommandContext(ctx, "docker", "ps", "--format", "{{.ID}}\t{{.Names}}\t{{.Image}}", "--filter", "status=running")
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("docker ps failed: %w", err)
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: docker ps output: %s", util.TruncateStr(strings.TrimSpace(string(out)), 500))
}
var dbs []DiscoveredDB
var skipped int
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
if line == "" {
@@ -87,9 +97,17 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB,
} else if strings.Contains(image, "mariadb") || strings.Contains(image, "mysql") {
dbType = DBTypeMariaDB
} else {
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: skipping container %s (image=%s, not a database)", name, image)
}
skipped++
continue
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: found %s container: %s (id=%s)", dbType, name, id[:12])
}
db := DiscoveredDB{
ContainerID: id,
ContainerName: name,
@@ -100,33 +118,49 @@ func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB,
// Get env vars from container
if err := populateDBEnv(ctx, &db); err != nil {
logger.Printf("[WARN] Could not read env vars for %s: %v", name, err)
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: skipping %s — env read failed", name)
}
continue
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: %s → stack=%s, dbUser=%s, dbName=%s", name, db.StackName, db.DBUser, db.DBName)
}
dbs = append(dbs, db)
}
if debug {
logger.Printf("[DEBUG] DiscoverDatabases: found %d database(s), skipped %d non-DB container(s)", len(dbs), skipped)
}
return dbs, nil
}
// DumpAll dumps all discovered databases.
func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger) []DumpResult {
func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) []DumpResult {
// Clean up old .tmp files (older than 1 hour)
cleanupTmpFiles(dumpDir, logger)
var results []DumpResult
for _, db := range dbs {
result := DumpOne(ctx, db, dumpDir, logger)
result := DumpOne(ctx, db, dumpDir, logger, debug)
results = append(results, result)
}
return results
}
// DumpOne dumps a single database.
func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger) DumpResult {
func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger, debug bool) DumpResult {
start := time.Now()
result := DumpResult{DB: db}
if debug {
logger.Printf("[DEBUG] DumpOne: starting dump for container=%s, stack=%s, dbType=%s, dumpDir=%s",
db.ContainerName, db.StackName, db.DBType, dumpDir)
}
// Ensure dump directory exists
if err := os.MkdirAll(dumpDir, 0755); err != nil {
result.Error = fmt.Errorf("creating dump dir: %w", err)
@@ -148,6 +182,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
if err != nil || strings.TrimSpace(string(checkOut)) != "true" {
result.Error = fmt.Errorf("container %s no longer running", db.ContainerName)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: container %s is no longer running — skipping", db.ContainerName)
}
return result
}
@@ -158,14 +195,29 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
"pg_dump", "-U", db.DBUser, "-d", db.DBName,
"--clean", "--if-exists", "--no-owner", "--no-privileges")
if debug {
logger.Printf("[DEBUG] DumpOne: pg_dump command: docker exec %s pg_dump -U %s -d %s --clean --if-exists --no-owner --no-privileges",
db.ContainerID[:12], db.DBUser, db.DBName)
}
case DBTypeMariaDB:
// Get root password from container env
password := getMariaDBPassword(dumpCtx, db.ContainerID)
if password == "" {
result.Error = fmt.Errorf("could not determine MariaDB root password for %s", db.ContainerName)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: MariaDB root password not found for %s — skipping", db.ContainerName)
}
return result
}
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
"mariadb-dump", "-u", "root", "-p***",
"--single-transaction", "--routines", "--triggers", db.DBName)
if debug {
logger.Printf("[DEBUG] DumpOne: mariadb-dump command: docker exec %s mariadb-dump -u root -p*** --single-transaction --routines --triggers %s",
db.ContainerID[:12], db.DBName)
}
// Actual command with real password (not logged)
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
"mariadb-dump", "-u", "root", "-p"+password,
"--single-transaction", "--routines", "--triggers", db.DBName)
@@ -198,6 +250,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
}
result.Error = fmt.Errorf("dump failed: %v — %s", err, errMsg)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: dump command failed for %s: %v", db.ContainerName, result.Error)
}
return result
}
@@ -207,6 +262,9 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
os.Remove(tmpPath)
result.Error = fmt.Errorf("dump produced empty file for %s", db.ContainerName)
result.Duration = time.Since(start)
if debug {
logger.Printf("[DEBUG] DumpOne: dump produced empty file for %s", db.ContainerName)
}
return result
}
@@ -225,6 +283,12 @@ func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.L
// Run validation on the dump file
result.Validation = ValidateDump(finalPath, db.DBType)
if debug {
logger.Printf("[DEBUG] DumpOne: completed %s → %s (size=%s, valid=%v, tables=%d, duration=%s)",
db.ContainerName, filename, humanizeBytes(stat.Size()),
result.Validation.Valid, result.Validation.TableCount, result.Duration.Round(time.Millisecond))
}
logger.Printf("[INFO] DB dump: %s → %s (%s, %s, %d tables)", db.ContainerName, filename,
humanizeBytes(stat.Size()), result.Duration.Round(time.Millisecond), result.Validation.TableCount)
+11 -1
View File
@@ -24,12 +24,16 @@ type InfraMetadata struct {
// WriteLocalInfraBackup writes the infra backup to .felhom-infra-backup/ on each drive.
// Individual drive failures are logged but not returned — the function is best-effort.
func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, timestamp string, drives []string, logger *log.Logger) {
func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, timestamp string, drives []string, logger *log.Logger, debug bool) {
if len(drives) == 0 {
logger.Printf("[DEBUG] No drives configured for local infra backup")
return
}
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: payload size=%d bytes, %d target drive(s): %v", len(backupJSON), len(drives), drives)
}
// Compute checksum of backup data
hash := sha256.Sum256(backupJSON)
checksum := hex.EncodeToString(hash[:])
@@ -51,10 +55,16 @@ func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, tim
written := 0
for _, drive := range drives {
dir := InfraBackupDir(drive)
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: writing to drive=%s, dir=%s", drive, dir)
}
if err := writeInfraToDir(dir, backupJSON, metaJSON); err != nil {
logger.Printf("[WARN] Local infra backup: failed to write to %s: %v", drive, err)
continue
}
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: write OK to %s", drive)
}
written++
}
@@ -20,7 +20,7 @@ func TestWriteAndReadLocalInfraBackup(t *testing.T) {
backupJSON := []byte(`{"customer_id":"test-123","domain":"test.hu","controller_version":"v0.21.0","timestamp":"2026-02-21T10:00:00Z"}`)
logger := testLogger(t)
WriteLocalInfraBackup(backupJSON, "test-123", "v0.21.0", "2026-02-21T10:00:00Z", []string{drive}, logger)
WriteLocalInfraBackup(backupJSON, "test-123", "v0.21.0", "2026-02-21T10:00:00Z", []string{drive}, logger, false)
// Verify files exist
dir := InfraBackupDir(drive)
@@ -124,7 +124,7 @@ func TestWriteLocalInfraBackup_MultipleDrives(t *testing.T) {
backupJSON := []byte(`{"test":"multi"}`)
logger := testLogger(t)
WriteLocalInfraBackup(backupJSON, "multi-test", "v1.0", "2026-01-01T00:00:00Z", drives, logger)
WriteLocalInfraBackup(backupJSON, "multi-test", "v1.0", "2026-01-01T00:00:00Z", drives, logger, false)
// All 3 should succeed
for _, d := range drives {
@@ -142,7 +142,7 @@ func TestWriteLocalInfraBackup_MultipleDrives(t *testing.T) {
func TestWriteLocalInfraBackup_NoDrives(t *testing.T) {
logger := testLogger(t)
// Should not panic
WriteLocalInfraBackup([]byte(`{}`), "test", "v1.0", "2026-01-01T00:00:00Z", nil, logger)
WriteLocalInfraBackup([]byte(`{}`), "test", "v1.0", "2026-01-01T00:00:00Z", nil, logger, false)
}
func contains(s, substr string) bool {
+36
View File
@@ -21,6 +21,10 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
return fmt.Errorf("invalid snapshot ID: must be 8-64 lowercase hex characters")
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: stack=%s, snapshotID=%s", stackName, snapshotID)
}
// Prevent concurrent operations
m.mu.Lock()
if m.running {
@@ -39,6 +43,10 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
hddMounts := m.stackProvider.GetStackHDDMounts(stackName)
hasHDD := len(hddMounts) > 0
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: %s has %d HDD mount(s), hasHDD=%v", stackName, len(hddMounts), hasHDD)
}
// Build list of paths to restore from the snapshot
var restorePaths []string
@@ -47,16 +55,25 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
if ok {
stackDir := filepath.Dir(composePath)
restorePaths = append(restorePaths, stackDir)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: will restore config dir: %s", stackDir)
}
}
// Restore DB dump files for this stack (per-drive path)
drivePath := m.GetAppDrivePath(stackName)
dumpDir := AppDBDumpPath(drivePath, stackName)
restorePaths = append(restorePaths, dumpDir)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: will restore DB dump dir: %s", dumpDir)
}
// Restore HDD data (always included for apps that have it — backup is mandatory)
if hasHDD {
restorePaths = append(restorePaths, hddMounts...)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: will restore HDD data: %v", hddMounts)
}
}
if len(restorePaths) == 0 {
@@ -66,17 +83,30 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
// Use the app's primary restic repo
repoPath := PrimaryResticRepoPath(drivePath)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: using repo=%s, %d restore path(s)", repoPath, len(restorePaths))
}
m.logger.Printf("[WARN] RESTORE starting: stack=%s, snapshot=%s, repo=%s, paths=%v, hasHDD=%v",
stackName, snapshotID, repoPath, restorePaths, hasHDD)
// Stop the app before restore
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 1/4 — stopping app %s", stackName)
}
if err := m.stackProvider.StopStack(stackName); err != nil {
m.logger.Printf("[WARN] RESTORE could not stop %s: %v (proceeding anyway)", stackName, err)
}
// Execute restore via restic
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 2/4 — restoring data from snapshot %s", snapshotID)
}
if err := m.restic.RestoreAppData(repoPath, snapshotID, restorePaths); err != nil {
m.logger.Printf("[ERROR] RESTORE failed for %s: %v", stackName, err)
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 3/4 — restarting app %s after failure", stackName)
}
if startErr := m.stackProvider.StartStack(stackName); startErr != nil {
m.logger.Printf("[WARN] RESTORE could not restart %s after failure: %v", stackName, startErr)
}
@@ -84,6 +114,9 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
}
// Restart the app
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 3/4 — restarting app %s after successful restore", stackName)
}
if err := m.stackProvider.StartStack(stackName); err != nil {
m.logger.Printf("[WARN] RESTORE could not restart %s after restore: %v", stackName, err)
}
@@ -92,6 +125,9 @@ func (m *Manager) RestoreApp(stackName, snapshotID string) error {
if hasHDD {
restoreType = "full (config+DB+userdata)"
}
if m.isDebug() {
m.logger.Printf("[DEBUG] RestoreApp: step 4/4 — restore completed, type=%s", restoreType)
}
m.logger.Printf("[INFO] RESTORE completed: stack=%s, snapshot=%s, type=%s", stackName, snapshotID, restoreType)
return nil
}
+59 -1
View File
@@ -2,6 +2,7 @@ package monitor
import (
"fmt"
"log"
"os"
"os/exec"
"strings"
@@ -22,26 +23,44 @@ type HealthReport struct {
}
// RunHealthCheck runs system checks and returns a diagnostic report.
func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, storagePaths []settings.StoragePath) *HealthReport {
func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, storagePaths []settings.StoragePath, logger *log.Logger) *HealthReport {
report := &HealthReport{
Status: "ok",
Timestamp: time.Now(),
}
debug := cfg.Logging.Level == "debug" && logger != nil
hddPath := cfg.Paths.HDDPath
if len(storagePaths) > 0 {
hddPath = storagePaths[0].Path
}
sysInfo := system.GetInfo(hddPath, cpuCollector)
if debug {
logger.Printf("[DEBUG] [HEALTH] Raw values: disk=%.1f%%, hdd=%.1f%% (configured=%v), mem=%.1f%% (%dMB/%dMB), cpu=%.1f%%, temp=%.1f°C (%s)",
sysInfo.DiskPercent, sysInfo.HDDPercent, sysInfo.HDDConfigured,
sysInfo.MemPercent, sysInfo.UsedMemMB, sysInfo.TotalMemMB,
sysInfo.CPUPercent, sysInfo.TemperatureCelsius, sysInfo.TemperatureSource)
}
// 1. Disk usage (SSD)
if sysInfo.DiskPercent > 0 {
if sysInfo.DiskPercent >= float64(cfg.Monitoring.Thresholds.DiskCritPercent) {
report.Issues = append(report.Issues, fmt.Sprintf("SSD disk usage critical: %.0f%%", sysInfo.DiskPercent))
if debug {
logger.Printf("[DEBUG] [HEALTH] SSD disk: CRITICAL (%.0f%% >= %d%%)", sysInfo.DiskPercent, cfg.Monitoring.Thresholds.DiskCritPercent)
}
} else if sysInfo.DiskPercent >= float64(cfg.Monitoring.Thresholds.DiskWarnPercent) {
report.Warnings = append(report.Warnings, fmt.Sprintf("SSD disk usage high: %.0f%%", sysInfo.DiskPercent))
if debug {
logger.Printf("[DEBUG] [HEALTH] SSD disk: WARN (%.0f%% >= %d%%)", sysInfo.DiskPercent, cfg.Monitoring.Thresholds.DiskWarnPercent)
}
} else {
report.Info = append(report.Info, fmt.Sprintf("SSD: %.0f%% used", sysInfo.DiskPercent))
if debug {
logger.Printf("[DEBUG] [HEALTH] SSD disk: OK (%.0f%%)", sysInfo.DiskPercent)
}
}
}
@@ -58,8 +77,14 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
if sysInfo.MemPercent > 0 {
if sysInfo.MemPercent >= float64(cfg.Monitoring.Thresholds.MemoryWarnPercent) {
report.Warnings = append(report.Warnings, fmt.Sprintf("Memory usage high: %.0f%%", sysInfo.MemPercent))
if debug {
logger.Printf("[DEBUG] [HEALTH] Memory: WARN (%.0f%% >= %d%%)", sysInfo.MemPercent, cfg.Monitoring.Thresholds.MemoryWarnPercent)
}
} else {
report.Info = append(report.Info, fmt.Sprintf("Memory: %.0f%% used", sysInfo.MemPercent))
if debug {
logger.Printf("[DEBUG] [HEALTH] Memory: OK (%.0f%%)", sysInfo.MemPercent)
}
}
}
@@ -67,8 +92,14 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
if sysInfo.CPUPercent > 0 {
if sysInfo.CPUPercent >= float64(cfg.Monitoring.Thresholds.CPUWarnPercent) {
report.Warnings = append(report.Warnings, fmt.Sprintf("CPU usage high: %.0f%%", sysInfo.CPUPercent))
if debug {
logger.Printf("[DEBUG] [HEALTH] CPU: WARN (%.0f%% >= %d%%)", sysInfo.CPUPercent, cfg.Monitoring.Thresholds.CPUWarnPercent)
}
} else {
report.Info = append(report.Info, fmt.Sprintf("CPU: %.0f%%", sysInfo.CPUPercent))
if debug {
logger.Printf("[DEBUG] [HEALTH] CPU: OK (%.0f%%)", sysInfo.CPUPercent)
}
}
}
@@ -76,23 +107,45 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
if sysInfo.TemperatureCelsius > 0 {
if sysInfo.TemperatureCelsius >= float64(cfg.Monitoring.Thresholds.TemperatureWarnCelsius) {
report.Warnings = append(report.Warnings, fmt.Sprintf("Temperature high: %.0f°C (%s)", sysInfo.TemperatureCelsius, sysInfo.TemperatureSource))
if debug {
logger.Printf("[DEBUG] [HEALTH] Temperature: WARN (%.0f°C >= %d°C)", sysInfo.TemperatureCelsius, cfg.Monitoring.Thresholds.TemperatureWarnCelsius)
}
} else {
report.Info = append(report.Info, fmt.Sprintf("Temperature: %.0f°C", sysInfo.TemperatureCelsius))
if debug {
logger.Printf("[DEBUG] [HEALTH] Temperature: OK (%.0f°C)", sysInfo.TemperatureCelsius)
}
}
}
// 5. Docker health
if err := checkDocker(); err != nil {
report.Issues = append(report.Issues, fmt.Sprintf("Docker: %v", err))
if debug {
logger.Printf("[DEBUG] [HEALTH] Docker daemon: FAIL (%v)", err)
}
} else {
report.Info = append(report.Info, "Docker: reachable")
if debug {
logger.Printf("[DEBUG] [HEALTH] Docker daemon: OK")
}
}
// 6. Protected containers
if debug {
logger.Printf("[DEBUG] [HEALTH] Checking %d protected containers: %v", len(cfg.Stacks.Protected), cfg.Stacks.Protected)
}
missingProtected := checkProtectedContainers(cfg.Stacks.Protected)
for _, name := range missingProtected {
report.Issues = append(report.Issues, fmt.Sprintf("Protected container not running: %s", name))
}
if debug {
if len(missingProtected) > 0 {
logger.Printf("[DEBUG] [HEALTH] Protected containers missing: %v", missingProtected)
} else {
logger.Printf("[DEBUG] [HEALTH] All protected containers running")
}
}
// 7. Storage paths
storageIssues, storageWarnings := checkStoragePaths(storagePaths)
@@ -106,6 +159,11 @@ func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector, stora
report.Status = "warn"
}
if debug {
logger.Printf("[DEBUG] [HEALTH] Final status: %s (issues=%d, warnings=%d, info=%d)",
report.Status, len(report.Issues), len(report.Warnings), len(report.Info))
}
return report
}
+54
View File
@@ -58,6 +58,11 @@ type pathProbeState struct {
lastStatus string // "connected", "disconnected"
lastProbeTime time.Time
probeInterval time.Duration
// Debug counters for summary logging
probeCount int
probeOKCount int
lastSummaryTime time.Time
totalLatency time.Duration
}
// StorageWatchdog monitors registered storage paths and reacts to disconnection/reconnection.
@@ -95,6 +100,9 @@ func NewStorageWatchdog(
}
}
// isDebug returns true if the logging level is set to "debug".
func (w *StorageWatchdog) isDebug() bool { return w.cfg.Logging.Level == "debug" }
// SetAlertRefresh sets the callback to trigger alert refresh.
func (w *StorageWatchdog) SetAlertRefresh(fn func()) {
w.alertRefresh = fn
@@ -164,17 +172,45 @@ func (w *StorageWatchdog) getOrCreateState(path string) *pathProbeState {
// handleConnectedProbe probes a connected drive and triggers disconnect if needed.
func (w *StorageWatchdog) handleConnectedProbe(sp settings.StoragePath, state *pathProbeState) {
probeStart := time.Now()
result := system.ProbeStoragePath(sp.Path)
probeLatency := time.Since(probeStart)
if w.isDebug() {
state.probeCount++
state.totalLatency += probeLatency
}
if result.Status == system.ProbeConnected {
if state.consecutiveFailures > 0 {
w.logger.Printf("[DEBUG] [STORAGE] Probe recovered for %s after %d failures", sp.Path, state.consecutiveFailures)
}
state.consecutiveFailures = 0
state.lastStatus = "connected"
if w.isDebug() {
state.probeOKCount++
// Every 60 probes (~5 minutes at 5s interval): emit summary
if state.probeCount >= 60 {
avgLatency := state.totalLatency / time.Duration(state.probeCount)
w.logger.Printf("[DEBUG] [STORAGE] Storage watchdog: %s — %d/%d probes OK (last 5m, avg %dms)",
sp.Path, state.probeOKCount, state.probeCount, avgLatency.Milliseconds())
state.probeCount = 0
state.probeOKCount = 0
state.totalLatency = 0
state.lastSummaryTime = time.Now()
}
}
return
}
state.consecutiveFailures++
// Debug: log immediately on unexpected failure (was connected, now failing)
if w.isDebug() && state.lastStatus == "connected" {
w.logger.Printf("[DEBUG] [STORAGE] Storage probe failed for %s (%d/%d before disconnect): %v",
sp.Path, state.consecutiveFailures, probeThreshold, result.Err)
}
w.logger.Printf("[WARN] [STORAGE] Probe failed for %s (%d/%d): %v",
sp.Path, state.consecutiveFailures, probeThreshold, result.Err)
@@ -239,6 +275,11 @@ func (w *StorageWatchdog) handleReconnectCheck(ctx context.Context, sp settings.
return
}
if w.isDebug() {
w.logger.Printf("[DEBUG] [STORAGE] Reconnect check for %s: UUID=%s, mountPath=%s, isAttachWizard=%v",
sp.Path, uuid, mountPath, isAttachWizard)
}
// Check if the UUID block device is present
uuidPath := filepath.Join(hostDevUUIDPath, uuid)
if _, err := os.Stat(uuidPath); err != nil {
@@ -251,6 +292,11 @@ func (w *StorageWatchdog) handleReconnectCheck(ctx context.Context, sp settings.
}
w.logger.Printf("[INFO] [STORAGE] Drive reconnected (UUID found), attempting remount: %s (%s)", sp.Path, label)
if w.isDebug() {
w.logger.Printf("[DEBUG] [STORAGE] UUID %s found at %s, mounting %s (raw=%s, attachWizard=%v)",
uuid, uuidPath, sp.Path, rawPath, isAttachWizard)
}
// Attempt remount
if err := w.remount(sp.Path, rawPath, isAttachWizard); err != nil {
w.logger.Printf("[ERROR] [STORAGE] Remount failed for %s: %v", sp.Path, err)
@@ -261,9 +307,17 @@ func (w *StorageWatchdog) handleReconnectCheck(ctx context.Context, sp settings.
verifyResult := system.ProbeStoragePath(sp.Path)
if verifyResult.Status != system.ProbeConnected {
w.logger.Printf("[ERROR] [STORAGE] Post-remount probe failed for %s: %v", sp.Path, verifyResult.Err)
if w.isDebug() {
w.logger.Printf("[DEBUG] [STORAGE] Post-mount verification failed for %s: status=%v, err=%v",
sp.Path, verifyResult.Status, verifyResult.Err)
}
return
}
if w.isDebug() {
w.logger.Printf("[DEBUG] [STORAGE] Post-mount verification succeeded for %s", sp.Path)
}
w.logger.Printf("[INFO] [STORAGE] Drive successfully remounted: %s (%s)", sp.Path, label)
// Clean stale restic locks
+19 -3
View File
@@ -23,6 +23,7 @@ type Notifier struct {
httpClient *http.Client
logger *log.Logger
enabled bool
debug bool
settings *settings.Settings
mu sync.Mutex
@@ -30,7 +31,7 @@ type Notifier struct {
}
// New creates a new Notifier. Returns a no-op notifier if hub is not enabled.
func New(hubURL, apiKey, customerID string, sett *settings.Settings, logger *log.Logger) *Notifier {
func New(hubURL, apiKey, customerID string, sett *settings.Settings, logger *log.Logger, debug bool) *Notifier {
enabled := hubURL != "" && apiKey != ""
if enabled {
logger.Printf("[INFO] Notifier enabled (hub: %s)", hubURL)
@@ -45,6 +46,7 @@ func New(hubURL, apiKey, customerID string, sett *settings.Settings, logger *log
httpClient: &http.Client{Timeout: 10 * time.Second},
logger: logger,
enabled: enabled,
debug: debug,
settings: sett,
}
}
@@ -162,6 +164,9 @@ func (n *Notifier) PushEvent(eventType, severity, message string, details interf
go func() {
url := n.hubURL + "/api/v1/event"
if n.debug {
n.logger.Printf("[DEBUG] PushEvent: type=%s severity=%s url=%s", eventType, severity, url)
}
var lastErr error
for attempt := 0; attempt < 3; attempt++ {
if attempt > 0 {
@@ -185,6 +190,9 @@ func (n *Notifier) PushEvent(eventType, severity, message string, details interf
resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
if n.debug {
n.logger.Printf("[DEBUG] PushEvent: %s pushed OK (HTTP %d)", eventType, resp.StatusCode)
}
n.logger.Printf("[INFO] Event pushed: %s (%s) — %s", eventType, severity, message)
return
}
@@ -284,9 +292,10 @@ func (n *Notifier) NotifyControllerUpdated(fromVer, toVer string, success bool)
}
// NotifyControllerStarted sends a controller startup event.
func (n *Notifier) NotifyControllerStarted(version string) {
// details may include self-test summary (e.g., {"selftest_pass": 8, "selftest_warn": 1, "selftest_fail": 0}).
func (n *Notifier) NotifyControllerStarted(version string, details map[string]interface{}) {
n.PushEvent("controller_started", "info",
fmt.Sprintf("Controller elindult (%s)", version), nil)
fmt.Sprintf("Controller elindult (%s)", version), details)
}
// NotifyStorageDisconnected sends a drive disconnection event.
@@ -375,6 +384,10 @@ func (n *Notifier) SyncPreferences(email string, enabledEvents []string, cooldow
}
url := n.hubURL + "/api/v1/preferences"
if n.debug {
n.logger.Printf("[DEBUG] SyncPreferences: url=%s email=%s events=%v cooldown=%dh",
url, email, enabledEvents, cooldownHours)
}
req, err := http.NewRequest("POST", url, bytes.NewReader(jsonData))
if err != nil {
return fmt.Errorf("request: %w", err)
@@ -393,6 +406,9 @@ func (n *Notifier) SyncPreferences(email string, enabledEvents []string, cooldow
return fmt.Errorf("hub hiba (%d): %s", resp.StatusCode, string(body))
}
if n.debug {
n.logger.Printf("[DEBUG] SyncPreferences: response HTTP %d", resp.StatusCode)
}
n.logger.Printf("[INFO] Notification preferences synced to hub: email=%s, events=%v, cooldown=%dh", email, enabledEvents, cooldownHours)
return nil
}
+22 -1
View File
@@ -4,6 +4,7 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"log"
"os"
"strconv"
"strings"
@@ -29,7 +30,13 @@ func BuildReport(
metricsStore *metrics.MetricsStore,
version string,
storagePaths []settings.StoragePath,
logger *log.Logger,
) *Report {
debug := cfg.Logging.Level == "debug"
if debug && logger != nil {
logger.Printf("[DEBUG] BuildReport: starting — version=%s, storagePaths=%d", version, len(storagePaths))
}
r := &Report{
Version: 1,
CustomerID: cfg.Customer.ID,
@@ -48,6 +55,9 @@ func BuildReport(
if data, err := os.ReadFile(configPath); err == nil {
h := sha256.Sum256(data)
r.ConfigHash = hex.EncodeToString(h[:])
if debug && logger != nil {
logger.Printf("[DEBUG] BuildReport: configHash=%s (%d bytes)", r.ConfigHash[:12]+"...", len(data))
}
}
}
@@ -111,6 +121,12 @@ func BuildReport(
})
}
if debug && logger != nil {
logger.Printf("[DEBUG] BuildReport: system info collected — cpu=%.1f%%, mem=%d/%dMB, temp=%.1fC",
sysInfo.CPUPercent, sysInfo.UsedMemMB, sysInfo.TotalMemMB, sysInfo.TemperatureCelsius)
logger.Printf("[DEBUG] BuildReport: storage entries=%d", len(r.Storage))
}
// Containers
r.Containers = buildContainerReport(stackMgr, metricsStore)
@@ -118,7 +134,7 @@ func BuildReport(
r.Backup = buildBackupReport(cfg, backupMgr)
// Health
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, storagePaths)
healthReport := monitor.RunHealthCheck(cfg, cpuCollector, storagePaths, logger)
r.Health = HealthReport{
Status: healthReport.Status,
Issues: healthReport.Issues,
@@ -134,6 +150,11 @@ func BuildReport(
// Stacks
r.Stacks = buildStacksReport(stackMgr)
if debug && logger != nil {
logger.Printf("[DEBUG] BuildReport: complete — containers=%d, health=%s, deployed=%d, available=%d",
r.Containers.Total, r.Health.Status, len(r.Stacks.Deployed), len(r.Stacks.Available))
}
return r
}
+12 -1
View File
@@ -35,6 +35,7 @@ type Pusher struct {
httpClient *http.Client
logger *log.Logger
enabled bool
debug bool
statusMu sync.RWMutex
status PushStatus
@@ -45,7 +46,7 @@ type Pusher struct {
}
// NewPusher creates a new report pusher from hub configuration.
func NewPusher(cfg *config.HubConfig, logger *log.Logger) *Pusher {
func NewPusher(cfg *config.HubConfig, logger *log.Logger, debug bool) *Pusher {
return &Pusher{
hubURL: strings.TrimRight(cfg.URL, "/"),
apiKey: cfg.APIKey,
@@ -54,6 +55,7 @@ func NewPusher(cfg *config.HubConfig, logger *log.Logger) *Pusher {
},
logger: logger,
enabled: cfg.Enabled,
debug: debug,
}
}
@@ -69,6 +71,9 @@ func (p *Pusher) Push(report *Report) error {
}
url := p.hubURL + "/api/v1/report"
if p.debug {
p.logger.Printf("[DEBUG] Push: url=%s payload=%d bytes", url, len(data))
}
p.statusMu.Lock()
p.status.LastAttempt = time.Now()
@@ -143,6 +148,9 @@ func (p *Pusher) PushInfraBackup(data []byte) error {
}
url := p.hubURL + "/api/v1/infra-backup"
if p.debug {
p.logger.Printf("[DEBUG] PushInfraBackup: url=%s payload=%d bytes", url, len(data))
}
var lastErr error
for attempt := 0; attempt < 3; attempt++ {
@@ -173,6 +181,9 @@ func (p *Pusher) PushInfraBackup(data []byte) error {
return nil
}
lastErr = fmt.Errorf("HTTP %d", resp.StatusCode)
if p.debug {
p.logger.Printf("[DEBUG] PushInfraBackup: attempt %d failed — HTTP %d", attempt+1, resp.StatusCode)
}
}
return fmt.Errorf("infra backup push failed after 3 attempts: %w", lastErr)
+256
View File
@@ -0,0 +1,256 @@
package selftest
import (
"context"
"fmt"
"log"
"net/http"
"os"
"os/exec"
"path/filepath"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
)
// CheckResult represents the outcome of a single self-test check.
type CheckResult struct {
Name string `json:"name"`
Status string `json:"status"` // "pass", "warn", "fail"
Message string `json:"message"`
}
// Summary contains all self-test results.
type Summary struct {
Results []CheckResult `json:"results"`
Pass int `json:"pass"`
Warn int `json:"warn"`
Fail int `json:"fail"`
}
// Run executes all startup self-test checks and returns a summary.
// It never blocks startup — each check has an individual timeout.
func Run(cfg *config.Config, sett *settings.Settings, logger *log.Logger) *Summary {
s := &Summary{}
checks := []func() CheckResult{
func() CheckResult { return checkDockerSocket() },
func() CheckResult { return checkStacksDir(cfg.Paths.StacksDir) },
func() CheckResult { return checkDataDir(cfg.Paths.DataDir) },
func() CheckResult { return checkSystemDataPath(cfg.Paths.SystemDataPath) },
func() CheckResult { return checkStoragePaths(sett) },
func() CheckResult { return checkGitCatalog(cfg.Paths.StacksDir) },
func() CheckResult { return checkHubConnectivity(cfg) },
func() CheckResult { return checkResticRepos(sett) },
func() CheckResult { return checkMetricsDB(cfg.Paths.DataDir) },
}
logger.Println("[INFO] ========== Startup Self-Test ==========")
for _, check := range checks {
result := runSafe(check)
s.Results = append(s.Results, result)
switch result.Status {
case "pass":
s.Pass++
case "warn":
s.Warn++
case "fail":
s.Fail++
}
tag := "[PASS]"
if result.Status == "warn" {
tag = "[WARN]"
} else if result.Status == "fail" {
tag = "[FAIL]"
}
logger.Printf("[INFO] %s %s: %s", tag, result.Name, result.Message)
}
logger.Println("[INFO] ========================================")
logger.Printf("[INFO] Self-test complete: %d passed, %d warnings, %d failed", s.Pass, s.Warn, s.Fail)
return s
}
// runSafe executes a check function, recovering from panics.
func runSafe(fn func() CheckResult) (result CheckResult) {
defer func() {
if r := recover(); r != nil {
result = CheckResult{
Name: "unknown",
Status: "fail",
Message: fmt.Sprintf("panic: %v", r),
}
}
}()
return fn()
}
func checkDockerSocket() CheckResult {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
out, err := exec.CommandContext(ctx, "docker", "info", "--format", "{{.ServerVersion}}").Output()
if err != nil {
return CheckResult{Name: "Docker socket", Status: "fail", Message: fmt.Sprintf("docker info failed: %v", err)}
}
return CheckResult{Name: "Docker socket", Status: "pass", Message: fmt.Sprintf("reachable (v%s)", string(out[:len(out)-1]))}
}
func checkStacksDir(stacksDir string) CheckResult {
info, err := os.Stat(stacksDir)
if err != nil {
return CheckResult{Name: "Stacks directory", Status: "fail", Message: fmt.Sprintf("not accessible: %v", err)}
}
if !info.IsDir() {
return CheckResult{Name: "Stacks directory", Status: "fail", Message: "path exists but is not a directory"}
}
return CheckResult{Name: "Stacks directory", Status: "pass", Message: stacksDir}
}
func checkDataDir(dataDir string) CheckResult {
info, err := os.Stat(dataDir)
if err != nil {
return CheckResult{Name: "Data directory", Status: "fail", Message: fmt.Sprintf("not accessible: %v", err)}
}
if !info.IsDir() {
return CheckResult{Name: "Data directory", Status: "fail", Message: "path exists but is not a directory"}
}
// Write test: create and remove a temp file
tmpFile := filepath.Join(dataDir, ".selftest-"+fmt.Sprintf("%d", time.Now().UnixNano()))
if err := os.WriteFile(tmpFile, []byte("test"), 0644); err != nil {
return CheckResult{Name: "Data directory", Status: "warn", Message: fmt.Sprintf("not writable: %v", err)}
}
os.Remove(tmpFile)
return CheckResult{Name: "Data directory", Status: "pass", Message: fmt.Sprintf("%s (writable)", dataDir)}
}
func checkSystemDataPath(systemDataPath string) CheckResult {
if systemDataPath == "" {
return CheckResult{Name: "System data path", Status: "warn", Message: "not configured"}
}
info, err := os.Stat(systemDataPath)
if err != nil {
return CheckResult{Name: "System data path", Status: "warn", Message: fmt.Sprintf("not accessible: %v", err)}
}
if !info.IsDir() {
return CheckResult{Name: "System data path", Status: "fail", Message: "path exists but is not a directory"}
}
if !system.IsMountPoint(systemDataPath) {
return CheckResult{Name: "System data path", Status: "warn", Message: fmt.Sprintf("%s exists but is not a mount point", systemDataPath)}
}
return CheckResult{Name: "System data path", Status: "pass", Message: systemDataPath}
}
func checkStoragePaths(sett *settings.Settings) CheckResult {
paths := sett.GetStoragePaths()
if len(paths) == 0 {
return CheckResult{Name: "Storage paths", Status: "warn", Message: "no storage paths registered"}
}
connected := 0
disconnected := 0
for _, sp := range paths {
if sp.Decommissioned {
continue
}
if sp.Disconnected {
disconnected++
continue
}
if _, err := os.Stat(sp.Path); err == nil && system.IsMountPoint(sp.Path) {
connected++
} else {
disconnected++
}
}
msg := fmt.Sprintf("%d connected, %d disconnected", connected, disconnected)
if connected == 0 && disconnected > 0 {
return CheckResult{Name: "Storage paths", Status: "warn", Message: msg}
}
return CheckResult{Name: "Storage paths", Status: "pass", Message: msg}
}
func checkGitCatalog(stacksDir string) CheckResult {
matches, err := filepath.Glob(filepath.Join(stacksDir, "*", ".felhom.yml"))
if err != nil {
return CheckResult{Name: "Git catalog", Status: "warn", Message: fmt.Sprintf("glob error: %v", err)}
}
if len(matches) == 0 {
return CheckResult{Name: "Git catalog", Status: "warn", Message: "no .felhom.yml files found in stacks dir"}
}
return CheckResult{Name: "Git catalog", Status: "pass", Message: fmt.Sprintf("%d app definitions found", len(matches))}
}
func checkHubConnectivity(cfg *config.Config) CheckResult {
if !cfg.Hub.Enabled || cfg.Hub.URL == "" {
return CheckResult{Name: "Hub connectivity", Status: "pass", Message: "hub disabled, skipped"}
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
url := cfg.Hub.URL + "/healthz"
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("request error: %v", err)}
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("unreachable: %v", err)}
}
resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
return CheckResult{Name: "Hub connectivity", Status: "pass", Message: fmt.Sprintf("%s reachable (HTTP %d)", cfg.Hub.URL, resp.StatusCode)}
}
return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("HTTP %d from %s", resp.StatusCode, url)}
}
func checkResticRepos(sett *settings.Settings) CheckResult {
paths := sett.GetStoragePaths()
if len(paths) == 0 {
return CheckResult{Name: "Restic repos", Status: "pass", Message: "no storage paths, skipped"}
}
found := 0
missing := 0
for _, sp := range paths {
if sp.Disconnected || sp.Decommissioned {
continue
}
repoPath := backup.PrimaryResticRepoPath(sp.Path)
if _, err := os.Stat(repoPath); err == nil {
found++
} else {
missing++
}
}
if found == 0 && missing > 0 {
return CheckResult{Name: "Restic repos", Status: "warn", Message: fmt.Sprintf("0 repos found, %d expected", missing)}
}
msg := fmt.Sprintf("%d repos found", found)
if missing > 0 {
msg += fmt.Sprintf(", %d missing", missing)
}
return CheckResult{Name: "Restic repos", Status: "pass", Message: msg}
}
func checkMetricsDB(dataDir string) CheckResult {
dbPath := filepath.Join(dataDir, "metrics.db")
info, err := os.Stat(dbPath)
if err != nil {
if os.IsNotExist(err) {
return CheckResult{Name: "Metrics DB", Status: "warn", Message: "metrics.db not found (will be created)"}
}
return CheckResult{Name: "Metrics DB", Status: "warn", Message: fmt.Sprintf("not accessible: %v", err)}
}
return CheckResult{Name: "Metrics DB", Status: "pass", Message: fmt.Sprintf("%.1f MB", float64(info.Size())/1024/1024)}
}
+27 -2
View File
@@ -40,6 +40,7 @@ type Updater struct {
dataDir string
composePath string // e.g., "/opt/docker/felhom-controller/docker-compose.yml"
logger *log.Logger
debug bool
mu sync.Mutex
latestVersion string
@@ -49,7 +50,7 @@ type Updater struct {
}
// NewUpdater creates a new Updater instance.
func NewUpdater(cfg *config.SelfUpdateConfig, gitCfg *config.GitConfig, currentVersion, dataDir, composePath string, logger *log.Logger) *Updater {
func NewUpdater(cfg *config.SelfUpdateConfig, gitCfg *config.GitConfig, currentVersion, dataDir, composePath string, logger *log.Logger, debug bool) *Updater {
return &Updater{
cfg: cfg,
gitCfg: gitCfg,
@@ -57,6 +58,7 @@ func NewUpdater(cfg *config.SelfUpdateConfig, gitCfg *config.GitConfig, currentV
dataDir: dataDir,
composePath: composePath,
logger: logger,
debug: debug,
}
}
@@ -131,10 +133,16 @@ func (u *Updater) CheckForUpdate() CheckResult {
return result
}
if latestVer.Compare(currentVer) > 0 {
cmp := latestVer.Compare(currentVer)
if cmp > 0 {
result.UpdateAvailable = true
}
if u.debug {
u.logger.Printf("[DEBUG] [SELFUPDATE] Version comparison: current=%s, latest=%s, cmp=%d, updateAvailable=%v",
u.currentVer, latestStr, cmp, result.UpdateAvailable)
}
u.mu.Lock()
u.latestVersion = latestStr
u.lastCheck = &result
@@ -153,6 +161,10 @@ func (u *Updater) queryRegistry() (string, error) {
// Gitea registry V2: GET /v2/<owner>/<repo>/tags/list
url := fmt.Sprintf("https://gitea.dooplex.hu/v2/%s/tags/list", registryImagePath(u.cfg.Image))
if u.debug {
u.logger.Printf("[DEBUG] [SELFUPDATE] Registry API URL: %s (user: %s)", url, u.gitCfg.Username)
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return "", fmt.Errorf("creating request: %w", err)
@@ -181,6 +193,10 @@ func (u *Updater) queryRegistry() (string, error) {
return "", fmt.Errorf("decoding response: %w", err)
}
if u.debug {
u.logger.Printf("[DEBUG] [SELFUPDATE] Registry returned %d tags: %v", len(tagsResp.Tags), tagsResp.Tags)
}
// Find highest semver tag
var highest *Version
for _, tag := range tagsResp.Tags {
@@ -342,6 +358,15 @@ func (u *Updater) updateComposeFile(newImage string) error {
// Replace image line: "image: gitea.dooplex.hu/admin/felhom-controller:..." → new image
re := regexp.MustCompile(`(image:\s*)gitea\.dooplex\.hu/admin/felhom-controller:\S+`)
if u.debug {
// Log old image line for debugging
oldMatch := re.Find(data)
if oldMatch != nil {
u.logger.Printf("[DEBUG] [SELFUPDATE] Compose file edit: %q → %q", string(oldMatch), "image: "+newImage)
}
}
newData := re.ReplaceAll(data, []byte("${1}"+newImage))
if bytes.Equal(data, newData) {
+18
View File
@@ -73,6 +73,11 @@ func NewServer(cfg *config.Config, dataDir string, logger *log.Logger, version s
return s
}
// isDebug returns true if logging level is "debug".
func (s *Server) isDebug() bool {
return s.cfg != nil && s.cfg.Logging.Level == "debug"
}
func (s *Server) loadTemplates() {
s.tmpl = template.Must(
template.New("").Funcs(template.FuncMap{
@@ -311,6 +316,9 @@ func (s *Server) processHubRestore(w http.ResponseWriter, r *http.Request) {
return
}
if s.isDebug() {
s.logger.Printf("[DEBUG] Setup: hub restore — pulling recovery from %s for customer %s", hubURL, customerID)
}
recovery, err := report.PullRecovery(hubURL, customerID, password)
if err != nil {
var msg string
@@ -328,6 +336,10 @@ func (s *Server) processHubRestore(w http.ResponseWriter, r *http.Request) {
return
}
if s.isDebug() {
s.logger.Printf("[DEBUG] Setup: hub recovery received — hasInfra=%v, configLen=%d", recovery.HasInfraBackup, len(recovery.ConfigYAML))
}
// Store recovery data in state for restore execution
s.state.SelectedBackup = &SelectedBackup{
Source: "hub",
@@ -392,6 +404,9 @@ func (s *Server) processFreshHub(w http.ResponseWriter, r *http.Request) {
s.logger.Printf("[INFO] Setup: config downloaded (%d bytes), writing config...", len(configYAML))
// Write config and finish setup
if s.isDebug() {
s.logger.Printf("[DEBUG] Setup: writing fresh config (%d bytes)", len(configYAML))
}
s.state.SetFormField("retrieval_password", password)
if err := s.writeFreshConfig(configYAML, password); err != nil {
s.logger.Printf("[ERROR] Setup: writeFreshConfig failed: %v", err)
@@ -447,6 +462,9 @@ func (s *Server) processManual(w http.ResponseWriter, r *http.Request) {
// Generate controller.yaml
configYAML := s.generateManualConfig()
if s.isDebug() {
s.logger.Printf("[DEBUG] Setup: generated manual config (%d bytes) for customer %s", len(configYAML), customerID)
}
if err := s.writeFreshConfig(configYAML, ""); err != nil {
csrf := ensureCSRFToken(w, r)
data := map[string]interface{}{
+10 -2
View File
@@ -45,7 +45,7 @@ type lsblkDevice struct {
}
// ScanDrivesForInfraBackups scans all block devices for .felhom-infra-backup/ directories.
func ScanDrivesForInfraBackups(logger *log.Logger) ([]DriveBackup, error) {
func ScanDrivesForInfraBackups(logger *log.Logger, debug bool) ([]DriveBackup, error) {
logger.Printf("[INFO] Setup: scanning drives for infra backups...")
// Read currently mounted filesystems
@@ -68,6 +68,10 @@ func ScanDrivesForInfraBackups(logger *log.Logger) ([]DriveBackup, error) {
return nil, fmt.Errorf("parsing lsblk: %w", err)
}
if debug {
logger.Printf("[DEBUG] Setup scan: lsblk returned %d block devices", len(lsblk.Blockdevices))
}
var results []DriveBackup
// Flatten all partitions
@@ -83,6 +87,10 @@ func ScanDrivesForInfraBackups(logger *log.Logger) ([]DriveBackup, error) {
}
}
if debug {
logger.Printf("[DEBUG] Setup scan: found %d partitions to check, %d root devices to skip", len(partitions), len(rootDevices))
}
for _, part := range partitions {
// Skip partitions without filesystem
if part.FSType == nil || *part.FSType == "" || *part.FSType == "swap" {
@@ -256,7 +264,7 @@ func countValid(results []DriveBackup) int {
// runDriveScan runs the scan asynchronously and stores results on the Server.
func (s *Server) runDriveScan() {
results, err := ScanDrivesForInfraBackups(s.logger)
results, err := ScanDrivesForInfraBackups(s.logger, s.isDebug())
s.scanMu.Lock()
defer s.scanMu.Unlock()
+9 -5
View File
@@ -1,13 +1,17 @@
package storage
import "log"
// AttachRequest holds parameters for attaching an existing partition
// without formatting — using a bind mount from a subfolder.
type AttachRequest struct {
DevicePath string // "/dev/sdb1" — must have an existing filesystem
MountName string // "hdd_1" → bind-mounts at /mnt/hdd_1
SubPath string // full path on raw mount to bind-mount (e.g., "/mnt/.felhom-raw/hdd_1/felhom_data")
Label string // Display label for the UI
SetDefault bool // Register as default storage path
DevicePath string // "/dev/sdb1" — must have an existing filesystem
MountName string // "hdd_1" → bind-mounts at /mnt/hdd_1
SubPath string // full path on raw mount to bind-mount (e.g., "/mnt/.felhom-raw/hdd_1/felhom_data")
Label string // Display label for the UI
SetDefault bool // Register as default storage path
Logger *log.Logger // Optional logger for debug output
Debug bool // Enable debug logging
}
// DirEntry represents a directory entry returned by ListDirectories.
@@ -182,8 +182,14 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string,
progress <- FormatProgress{Step: "error", Message: msg, Error: errStr, Percent: 0}
return fmt.Errorf("%s: %w", msg, err)
}
dbg := func(format string, args ...interface{}) {
if req.Logger != nil && req.Debug {
req.Logger.Printf("[DEBUG] FinalizeAttach: "+format, args...)
}
}
mountPath := "/mnt/" + req.MountName
dbg("starting: device=%s mountName=%s subPath=%s", req.DevicePath, req.MountName, req.SubPath)
// --- Step 1: Validate ---
send("validating", "Paraméterek ellenőrzése...", 5)
@@ -244,6 +250,7 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string,
subRel := strings.TrimPrefix(cleanSub, rawMountPath)
subRel = strings.TrimPrefix(subRel, "/")
dbg("raw mount path: %s, sub relative: %q", rawMountPath, subRel)
send("mounting", "fstab bejegyzések hozzáadása...", 35)
// Backup fstab (non-fatal)
@@ -251,13 +258,17 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string,
// Fstab entry 1: raw partition mount
// Use nofail so a missing disk doesn't block boot
dbg("fstab entry 1: UUID=%s → %s (fstype=%s)", uuid, rawMountPath, fsType)
if err := AppendFstabEntry(FstabPath, uuid, rawMountPath, fsType, "defaults,nofail,noatime"); err != nil {
dbg("fstab raw mount entry failed: %v", err)
return "", fail("mounting", "fstab bejegyzés hozzáadása sikertelen (raw mount)", err)
}
// Fstab entry 2: bind mount from subfolder to final path
bindSource := cleanSub
dbg("fstab entry 2: bind %s → %s", bindSource, mountPath)
if err := appendBindFstabEntry(FstabPath, bindSource, mountPath); err != nil {
dbg("fstab bind entry failed: %v", err)
// Roll back the raw mount fstab entry
_ = RemoveFstabEntry(FstabPath, uuid)
return "", fail("mounting", "fstab bejegyzés hozzáadása sikertelen (bind mount)", err)
@@ -272,20 +283,25 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string,
return "", fail("mounting", "Csatlakoztatási mappa nem hozható létre: "+mountPath, err)
}
dbg("bind mount: mount --bind %s %s", cleanSub, mountPath)
if out, err := exec.Command("mount", "--bind", cleanSub, mountPath).CombinedOutput(); err != nil {
dbg("bind mount failed: %s", string(out))
_ = RemoveFstabEntry(FstabPath, uuid)
_ = removeBindFstabEntry(FstabPath, mountPath)
return "", fail("mounting", "Bind mount sikertelen: "+string(out), err)
}
// Verify bind mount
dbg("verifying bind mount with findmnt")
verifyOut, verifyErr := exec.Command("findmnt", "-n", "-o", "SOURCE", "--target", mountPath).Output()
if verifyErr != nil || strings.TrimSpace(string(verifyOut)) == "" {
dbg("bind mount verification failed: findmnt returned %q err=%v", string(verifyOut), verifyErr)
_ = exec.Command("umount", mountPath).Run()
_ = RemoveFstabEntry(FstabPath, uuid)
_ = removeBindFstabEntry(FstabPath, mountPath)
return "", fail("mounting", "A bind mount nem ellenőrizhető", fmt.Errorf("mount point %s not found after bind mount", mountPath))
}
dbg("bind mount verified: source=%q", strings.TrimSpace(string(verifyOut)))
send("mounting", "Csatlakoztatva: "+mountPath, 70)
@@ -301,6 +317,7 @@ func FinalizeAttach(req AttachRequest, progress chan<- FormatProgress) (string,
}
}
dbg("attach completed successfully: %s", mountPath)
send("done", "Meghajtó sikeresen csatolva: "+mountPath, 100)
return mountPath, nil
+8 -5
View File
@@ -3,16 +3,19 @@ package storage
import (
"bufio"
"fmt"
"log"
"strings"
)
// FormatRequest holds parameters for formatting and mounting a disk.
type FormatRequest struct {
DevicePath string // "/dev/sdb" or "/dev/sdb1"
MountName string // "hdd_1" → mounts at /mnt/hdd_1
Label string // Display label for the UI
CreatePartition bool // If true, create a single partition first (wipes disk)
SetDefault bool // Register as default storage path
DevicePath string // "/dev/sdb" or "/dev/sdb1"
MountName string // "hdd_1" → mounts at /mnt/hdd_1
Label string // Display label for the UI
CreatePartition bool // If true, create a single partition first (wipes disk)
SetDefault bool // Register as default storage path
Logger *log.Logger // Optional logger for debug output
Debug bool // Enable debug logging
}
// FormatProgress tracks the formatting/mounting progress.
@@ -10,6 +10,8 @@ import (
"path/filepath"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/util"
)
// FormatAndMount formats a disk/partition and mounts it.
@@ -27,8 +29,14 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string,
progress <- FormatProgress{Step: "error", Message: msg, Error: errStr, Percent: 0}
return fmt.Errorf("%s: %w", msg, err)
}
dbg := func(format string, args ...interface{}) {
if req.Logger != nil && req.Debug {
req.Logger.Printf("[DEBUG] FormatAndMount: "+format, args...)
}
}
mountPath := "/mnt/" + req.MountName
dbg("starting: device=%s mountName=%s createPartition=%v", req.DevicePath, req.MountName, req.CreatePartition)
// --- Step 1: Validate ---
send("validating", "Eszköz ellenőrzése...", 5)
@@ -78,10 +86,14 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string,
if req.CreatePartition {
// Wipe existing partition table and filesystem signatures first
// H18: Log wipefs errors instead of silently discarding them.
dbg("step wipefs: wipefs -a %s", HostDevicePath(req.DevicePath))
send("partitioning", fmt.Sprintf("wipefs -a %s ...", HostDevicePath(req.DevicePath)), 12)
if err := exec.Command("wipefs", "-a", HostDevicePath(req.DevicePath)).Run(); err != nil {
// Non-fatal: some systems don't have wipefs; continue anyway
dbg("wipefs failed (non-fatal): %v", err)
send("partitioning", fmt.Sprintf("[WARN] wipefs sikertelen %s: %v (folytatás)", req.DevicePath, err), 13)
} else {
dbg("wipefs completed successfully")
}
time.Sleep(500 * time.Millisecond)
@@ -89,12 +101,16 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string,
// ",," = start=default, size=default(fill disk), type=default(Linux filesystem GUID).
// --force: overwrite even if device appears busy.
// --wipe always: wipe filesystem signatures from newly created partitions.
dbg("step sfdisk: sfdisk --force --wipe always %s", HostDevicePath(req.DevicePath))
send("partitioning", fmt.Sprintf("sfdisk --force --wipe always %s ...", HostDevicePath(req.DevicePath)), 15)
sfdiskInput := "label: gpt\n,,\n"
cmd := exec.Command("sfdisk", "--force", "--wipe", "always", HostDevicePath(req.DevicePath))
cmd.Stdin = strings.NewReader(sfdiskInput)
if out, err := cmd.CombinedOutput(); err != nil {
dbg("sfdisk failed: %s", util.TruncateStr(string(out), 500))
return "", fail("partitioning", "Partícionálás sikertelen: "+string(out), err)
} else {
dbg("sfdisk output: %s", util.TruncateStr(string(out), 500))
}
_ = exec.Command("partprobe", HostDevicePath(req.DevicePath)).Run()
@@ -119,14 +135,17 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string,
fsLabel = fsLabel[:16]
}
dbg("step mkfs.ext4: mkfs.ext4 -L %s -F %s", fsLabel, HostDevicePath(partDev))
send("formatting", fmt.Sprintf("mkfs.ext4 -L %s -F %s ...", fsLabel, HostDevicePath(partDev)), 30)
mkfsCmd := exec.Command("mkfs.ext4", "-L", fsLabel, "-F", HostDevicePath(partDev))
var mkfsOut bytes.Buffer
mkfsCmd.Stdout = &mkfsOut
mkfsCmd.Stderr = &mkfsOut
if err := mkfsCmd.Run(); err != nil {
dbg("mkfs.ext4 failed: %s", util.TruncateStr(mkfsOut.String(), 500))
return "", fail("formatting", "Formázás sikertelen: "+mkfsOut.String(), err)
}
dbg("mkfs.ext4 output: %s", util.TruncateStr(mkfsOut.String(), 500))
send("formatting", "Formázás kész", 60)
@@ -135,12 +154,15 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string,
return "", fail("mounting", "Csatlakoztatási mappa nem hozható létre: "+mountPath, err)
}
dbg("step blkid: blkid -s UUID -o value %s", HostDevicePath(partDev))
send("mounting", fmt.Sprintf("UUID lekérése: blkid %s ...", HostDevicePath(partDev)), 65)
uuidOut, err := exec.Command("blkid", "-s", "UUID", "-o", "value", HostDevicePath(partDev)).Output()
if err != nil {
dbg("blkid UUID failed: %v", err)
return "", fail("mounting", "UUID lekérése sikertelen", err)
}
uuid := strings.TrimSpace(string(uuidOut))
dbg("blkid returned UUID=%q", uuid)
if uuid == "" {
return "", fail("mounting", "UUID üres a formázás után", fmt.Errorf("empty UUID"))
}
@@ -148,28 +170,36 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string,
// Backup fstab (non-fatal)
_ = BackupFstab(FstabPath)
dbg("step fstab: appending UUID=%s mountPath=%s fstype=ext4", uuid, mountPath)
if err := AppendFstabEntry(FstabPath, uuid, mountPath, "ext4", "defaults,nofail,noatime"); err != nil {
dbg("fstab append failed: %v", err)
return "", fail("mounting", "fstab bejegyzés hozzáadása sikertelen", err)
}
dbg("fstab entry added successfully")
// Mount by device path explicitly — container's /etc/fstab != host fstab,
// so "mount /mnt/hdd_1" (fstab lookup) won't work.
dbg("step mount: mount -t ext4 -o defaults,noatime %s %s", HostDevicePath(partDev), mountPath)
send("mounting", fmt.Sprintf("mount -t ext4 %s %s ...", HostDevicePath(partDev), mountPath), 70)
if out, err := exec.Command("mount", "-t", "ext4", "-o", "defaults,noatime",
HostDevicePath(partDev), mountPath).CombinedOutput(); err != nil {
dbg("mount failed: %s", util.TruncateStr(string(out), 500))
// H19: Roll back fstab entry to prevent orphaned entry that hangs system on reboot.
_ = RemoveFstabEntry(FstabPath, uuid)
return "", fail("mounting", "Csatlakoztatás sikertelen: "+string(out), err)
}
// Verify mount actually worked (don't just trust exit code)
dbg("step verify: findmnt -n -o SOURCE --target %s", mountPath)
verifyOut, verifyErr := exec.Command("findmnt", "-n", "-o", "SOURCE", "--target", mountPath).Output()
if verifyErr != nil || strings.TrimSpace(string(verifyOut)) == "" {
dbg("mount verification failed: findmnt returned %q err=%v", string(verifyOut), verifyErr)
// H19: Also roll back fstab if mount verify fails.
_ = RemoveFstabEntry(FstabPath, uuid)
return "", fail("mounting", "A csatlakoztatás nem ellenőrizhető: mount sikerült, de a meghajtó nem látható",
fmt.Errorf("mount point %s not found after mount", mountPath))
}
dbg("mount verified: findmnt source=%q", strings.TrimSpace(string(verifyOut)))
send("mounting", "Csatlakoztatva: "+mountPath, 80)
@@ -185,6 +215,7 @@ func FormatAndMount(req FormatRequest, progress chan<- FormatProgress) (string,
}
}
dbg("format and mount completed successfully: %s", mountPath)
send("done", "Meghajtó sikeresen inicializálva: "+mountPath, 100)
return mountPath, nil
+23 -5
View File
@@ -18,11 +18,13 @@ import (
// MigrateRequest holds parameters for migrating app data.
type MigrateRequest struct {
StackName string // e.g., "immich"
DisplayName string // e.g., "Immich"
CurrentHDDPath string // e.g., "/mnt/hdd_placeholder"
TargetPath string // e.g., "/mnt/hdd_1"
HDDMounts []string // host-side paths to rsync (e.g., ["/mnt/hdd_placeholder/storage/immich"])
StackName string // e.g., "immich"
DisplayName string // e.g., "Immich"
CurrentHDDPath string // e.g., "/mnt/hdd_placeholder"
TargetPath string // e.g., "/mnt/hdd_1"
HDDMounts []string // host-side paths to rsync (e.g., ["/mnt/hdd_placeholder/storage/immich"])
Logger *log.Logger // Optional logger for debug output
Debug bool // Enable debug logging
}
// MigrateProgress tracks migration state.
@@ -82,6 +84,14 @@ func MigrateAppData(
return fmt.Errorf("%s: %w", msg, err)
}
dbg := func(format string, args ...interface{}) {
if req.Logger != nil && req.Debug {
req.Logger.Printf("[DEBUG] MigrateAppData: "+format, args...)
}
}
dbg("starting migration: stack=%s from=%s to=%s mounts=%d", req.StackName, req.CurrentHDDPath, req.TargetPath, len(req.HDDMounts))
// --- Step 1: Validate ---
if req.CurrentHDDPath == "" {
return fail("validating", "A jelenlegi tárhely nem megadott", fmt.Errorf("empty current HDD path"))
@@ -107,8 +117,11 @@ func MigrateAppData(
}
}
dbg("estimated total size: %s (%d bytes)", bytesHuman(totalBytes), totalBytes)
// Check free space on target
freeBytes := getFreeBytes(req.TargetPath)
dbg("target free space: %s (%d bytes)", bytesHuman(freeBytes), freeBytes)
if freeBytes > 0 && totalBytes > 0 && int64(float64(totalBytes)*1.05) > freeBytes {
return fail("validating", fmt.Sprintf(
"Nincs elég szabad hely a céltárolón: szükséges ~%s, szabad %s",
@@ -126,15 +139,18 @@ func MigrateAppData(
send("stopping", "Alkalmazás leállítva", 10, 0, totalBytes)
// --- Step 3: rsync ---
dbg("starting rsync phase: %d mount(s) to copy", len(req.HDDMounts))
var bytesCopied int64
for i, srcPath := range req.HDDMounts {
// Determine destination path: replace CurrentHDDPath prefix with TargetPath.
// H13: Require trailing separator to prevent /mnt/hdd matching /mnt/hdd_backup/data.
if srcPath != req.CurrentHDDPath && !strings.HasPrefix(srcPath, req.CurrentHDDPath+"/") {
dbg("skipping mount %s (not under %s)", srcPath, req.CurrentHDDPath)
continue
}
relPath := strings.TrimPrefix(srcPath, req.CurrentHDDPath)
dstPath := filepath.Join(req.TargetPath, relPath)
dbg("rsync %d/%d: %s → %s", i+1, len(req.HDDMounts), srcPath, dstPath)
// Ensure destination parent exists
if err := os.MkdirAll(filepath.Dir(dstPath), 0755); err != nil {
@@ -162,6 +178,7 @@ func MigrateAppData(
send("updating", "Konfiguráció frissítése...", 75, bytesCopied, totalBytes)
// --- Step 4: Update app.yaml HDD_PATH ---
dbg("updating config: HDD_PATH %s → %s for stack %s", req.CurrentHDDPath, req.TargetPath, req.StackName)
if err := updateFn(req.StackName, req.TargetPath); err != nil {
send("rolling_back", "Konfiguráció frissítése sikertelen, visszaállítás...", 0, bytesCopied, totalBytes)
_ = startFn(req.StackName)
@@ -178,6 +195,7 @@ func MigrateAppData(
return fail("starting", "Alkalmazás indítása sikertelen az új tárolóról", err)
}
dbg("migration completed: stack=%s bytesCopied=%d elapsed=%ds", req.StackName, bytesCopied, int(time.Since(start).Seconds()))
send("done",
fmt.Sprintf("Áthelyezés kész! Az alkalmazás az új tárolóról fut. (Régi adat: %s, idő: %ds)",
req.CurrentHDDPath, int(time.Since(start).Seconds())),
+43 -4
View File
@@ -5,11 +5,14 @@ package storage
import (
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"gitea.dooplex.hu/admin/felhom-controller/internal/util"
)
// lsblkOutput matches the top-level JSON from lsblk -J.
@@ -193,7 +196,13 @@ func partitionToParentDisk(devPath string) string {
// enrichWithBlkid fills in missing FSType, UUID, and Label on partitions using blkid.
// Probes each partition individually via /host-dev (Docker overrides /dev with its own
// tmpfs, so the host block devices are accessible at /host-dev instead).
func enrichWithBlkid(disks []BlockDevice) {
func enrichWithBlkid(disks []BlockDevice, logger *log.Logger, debug bool) {
dbg := func(format string, args ...interface{}) {
if debug && logger != nil {
logger.Printf("[DEBUG] enrichWithBlkid: "+format, args...)
}
}
for i := range disks {
for j := range disks[i].Partitions {
p := &disks[i].Partitions[j]
@@ -202,16 +211,25 @@ func enrichWithBlkid(disks []BlockDevice) {
if p.FSType == "" {
if out, err := exec.Command("blkid", "-o", "value", "-s", "TYPE", hostPath).Output(); err == nil {
p.FSType = strings.TrimSpace(string(out))
dbg("blkid TYPE %s → %q", hostPath, p.FSType)
} else {
dbg("blkid TYPE %s failed: %v", hostPath, err)
}
}
if p.UUID == "" {
if out, err := exec.Command("blkid", "-o", "value", "-s", "UUID", hostPath).Output(); err == nil {
p.UUID = strings.TrimSpace(string(out))
dbg("blkid UUID %s → %q", hostPath, p.UUID)
} else {
dbg("blkid UUID %s failed: %v", hostPath, err)
}
}
if p.Label == "" {
if out, err := exec.Command("blkid", "-o", "value", "-s", "LABEL", hostPath).Output(); err == nil {
p.Label = strings.TrimSpace(string(out))
dbg("blkid LABEL %s → %q", hostPath, p.Label)
} else {
dbg("blkid LABEL %s failed: %v", hostPath, err)
}
}
}
@@ -220,7 +238,13 @@ func enrichWithBlkid(disks []BlockDevice) {
// ScanDisks detects all block devices and classifies them into
// available (not mounted, not system) and system/mounted disks.
func ScanDisks() (*ScanResult, error) {
func ScanDisks(logger *log.Logger, debug bool) (*ScanResult, error) {
dbg := func(format string, args ...interface{}) {
if debug && logger != nil {
logger.Printf("[DEBUG] ScanDisks: "+format, args...)
}
}
out, err := exec.Command(
"lsblk", "-J", "-b",
"-o", "NAME,PATH,SIZE,TYPE,FSTYPE,MOUNTPOINT,MODEL,RM",
@@ -229,11 +253,15 @@ func ScanDisks() (*ScanResult, error) {
return nil, fmt.Errorf("lsblk failed: %w", err)
}
dbg("raw lsblk JSON: %s", util.TruncateStr(string(out), 500))
var parsed lsblkOutput
if err := json.Unmarshal(out, &parsed); err != nil {
return nil, fmt.Errorf("lsblk JSON parse failed: %w", err)
}
dbg("lsblk returned %d block devices", len(parsed.BlockDevices))
// Get system disk names from host fstab (works correctly inside container)
systemDiskNames := getSystemDiskNames()
@@ -287,16 +315,27 @@ func ScanDisks() (*ScanResult, error) {
isSystem := systemDiskNames[dev.Name]
bd.Mounted = anyMounted || isSystem
classification := "available"
if isSystem || anyMounted {
classification = "system"
result.SystemDisks = append(result.SystemDisks, bd)
} else {
result.AvailableDisks = append(result.AvailableDisks, bd)
}
dbg("disk %s: model=%q size=%s partitions=%d removable=%v classification=%s",
bd.Name, bd.Model, bd.Size, len(bd.Partitions), bd.Removable, classification)
for _, p := range bd.Partitions {
dbg(" partition %s: fstype=%q mountpoint=%q size=%s", p.Name, p.FSType, p.MountPoint, p.Size)
}
}
dbg("classification result: %d available, %d system", len(result.AvailableDisks), len(result.SystemDisks))
// Enrich FSType, UUID, Label from blkid (lsblk can't probe fstype in container)
enrichWithBlkid(result.AvailableDisks)
enrichWithBlkid(result.SystemDisks)
enrichWithBlkid(result.AvailableDisks, logger, debug)
enrichWithBlkid(result.SystemDisks, logger, debug)
return result, nil
}
+5 -2
View File
@@ -2,9 +2,12 @@
package storage
import "fmt"
import (
"fmt"
"log"
)
// ScanDisks is not supported on non-Linux platforms.
func ScanDisks() (*ScanResult, error) {
func ScanDisks(logger *log.Logger, debug bool) (*ScanResult, error) {
return nil, fmt.Errorf("storage init is only supported on Linux")
}
+46
View File
@@ -3,12 +3,14 @@ package sync
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"log"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"sync"
"time"
@@ -60,6 +62,17 @@ func New(cfg *config.Config, logger *log.Logger, rescanFn func() error, postSync
}
}
// isDebug returns true if the logging level is set to "debug".
func (s *Syncer) isDebug() bool { return s.cfg.Logging.Level == "debug" }
// maskRepoURL masks credentials in a git URL for safe logging.
// e.g., "https://user:token@host/path" → "https://user:***@host/path"
var reURLCreds = regexp.MustCompile(`(https?://)([^:]+):([^@]+)@`)
func maskRepoURL(url string) string {
return reURLCreds.ReplaceAllString(url, "${1}${2}:***@")
}
// Start begins the periodic sync loop. Call Stop() to terminate.
func (s *Syncer) Start() {
if s.cfg.Git.RepoURL == "" {
@@ -192,6 +205,9 @@ func (s *Syncer) doSync() SyncResult {
// Step 4: Inject missing deploy fields for updated stacks
if len(updated) > 0 && s.postSyncHook != nil {
if s.isDebug() {
s.logger.Printf("[DEBUG] [SYNC] Post-sync hook: triggering missing field injection for %d stack(s): %v", len(updated), updated)
}
s.postSyncHook(updated)
}
@@ -230,11 +246,17 @@ func (s *Syncer) gitCloneOrPull() error {
args := []string{"clone", "--depth", "1", "--branch", s.cfg.Git.Branch}
repoURL := s.buildRepoURL()
args = append(args, repoURL, s.cacheDir)
if s.isDebug() {
s.logger.Printf("[DEBUG] [SYNC] git clone URL: %s, branch: %s, cacheDir: %s", maskRepoURL(repoURL), s.cfg.Git.Branch, s.cacheDir)
}
return s.runGit(args...)
}
// Pull
s.logger.Printf("[SYNC] Pulling latest from %s (branch: %s)", s.cfg.Git.RepoURL, s.cfg.Git.Branch)
if s.isDebug() {
s.logger.Printf("[DEBUG] [SYNC] git fetch --depth 1 origin %s in %s", s.cfg.Git.Branch, s.cacheDir)
}
if err := s.runGitInDir(s.cacheDir, "fetch", "--depth", "1", "origin", s.cfg.Git.Branch); err != nil {
return fmt.Errorf("git fetch: %w", err)
}
@@ -290,6 +312,9 @@ func (s *Syncer) copyTemplates() (newApps []string, updated []string, err error)
dst := filepath.Join(dstDir, filename)
if _, err := os.Stat(src); os.IsNotExist(err) {
if s.isDebug() {
s.logger.Printf("[DEBUG] [SYNC] %s/%s: source not found, skipping", appName, filename)
}
continue
}
@@ -301,6 +326,11 @@ func (s *Syncer) copyTemplates() (newApps []string, updated []string, err error)
if changed {
anyChanged = true
s.logger.Printf("[SYNC] Updated %s/%s", appName, filename)
if s.isDebug() {
s.logFileHashes(appName, filename, src, dst)
}
} else if s.isDebug() {
s.logger.Printf("[DEBUG] [SYNC] %s/%s: hash match, skipped", appName, filename)
}
}
@@ -314,6 +344,22 @@ func (s *Syncer) copyTemplates() (newApps []string, updated []string, err error)
return newApps, updated, nil
}
// logFileHashes logs the source and destination file hashes for debugging.
func (s *Syncer) logFileHashes(appName, filename, src, dst string) {
srcData, err := os.ReadFile(src)
if err != nil {
return
}
srcHash := sha256.Sum256(srcData)
dstData, err := os.ReadFile(dst)
if err != nil {
s.logger.Printf("[DEBUG] [SYNC] %s/%s: src=%s, dst=new file", appName, filename, hex.EncodeToString(srcHash[:8]))
return
}
dstHash := sha256.Sum256(dstData)
s.logger.Printf("[DEBUG] [SYNC] %s/%s: src=%s, dst=%s (changed)", appName, filename, hex.EncodeToString(srcHash[:8]), hex.EncodeToString(dstHash[:8]))
}
// copyIfChanged copies src to dst only if the content differs.
// Returns true if the file was actually written.
func copyIfChanged(src, dst string) (bool, error) {
+12
View File
@@ -0,0 +1,12 @@
package util
import "strings"
// TruncateStr truncates a string to maxLen characters, appending "..." if truncated.
func TruncateStr(s string, maxLen int) string {
s = strings.TrimSpace(s)
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
+8 -2
View File
@@ -197,7 +197,7 @@ func (s *Server) storageAPIHandler(w http.ResponseWriter, r *http.Request) {
// storageScanAPIHandler handles POST /api/storage/scan.
func (s *Server) storageScanAPIHandler(w http.ResponseWriter, r *http.Request) {
result, err := storage.ScanDisks()
result, err := storage.ScanDisks(s.logger, s.cfg.Logging.Level == "debug")
if err != nil {
s.logger.Printf("[ERROR] storageScan: %v", err)
jsonError(w, "Meghajtók keresése sikertelen: "+err.Error(), http.StatusInternalServerError)
@@ -249,12 +249,14 @@ func (s *Server) storageInitAPIHandler(w http.ResponseWriter, r *http.Request) {
Label: req.Label,
CreatePartition: req.CreatePartition,
SetDefault: req.SetDefault,
Logger: s.logger,
Debug: s.cfg.Logging.Level == "debug",
}
// Smart partition: if disk has exactly 1 partition with no filesystem,
// skip destructive repartitioning and format the existing partition directly.
if fmtReq.CreatePartition {
if scanResult, scanErr := storage.ScanDisks(); scanErr == nil {
if scanResult, scanErr := storage.ScanDisks(s.logger, s.cfg.Logging.Level == "debug"); scanErr == nil {
for _, disk := range scanResult.AvailableDisks {
if disk.Path == req.DevicePath && len(disk.Partitions) == 1 && disk.Partitions[0].FSType == "" {
s.logger.Printf("[INFO] Disk %s has 1 empty partition (%s) — skipping repartition",
@@ -493,6 +495,8 @@ func (s *Server) storageMigrateAPIHandler(w http.ResponseWriter, r *http.Request
CurrentHDDPath: currentHDDPath,
TargetPath: req.TargetPath,
HDDMounts: mounts,
Logger: s.logger,
Debug: s.cfg.Logging.Level == "debug",
}
stopFn := func(name string) error {
@@ -1037,6 +1041,8 @@ func (s *Server) storageAttachAPIHandler(w http.ResponseWriter, r *http.Request)
SubPath: req.SubPath,
Label: req.Label,
SetDefault: req.SetDefault,
Logger: s.logger,
Debug: s.cfg.Logging.Level == "debug",
}
go func() {