diff --git a/TASK.md b/TASK.md index 6f0cd08..14aebe8 100644 --- a/TASK.md +++ b/TASK.md @@ -1,340 +1,357 @@ -# TASK: v0.16.0 — Controller Self-Update +# TASK: Hub Update Trigger + Controller URL Reporting + +**Controller:** v0.15.6 → v0.16.1 (v0.16.0 self-update already deployed) +**Hub:** v0.1.7 → v0.1.8 ## Overview -Implement a Watchtower-style self-update mechanism so the controller can update itself from the Settings page (or automatically on schedule). No automatic rollback — Docker's `restart: unless-stopped` + healthchecks.io monitoring is the safety net. +Add the ability to trigger a controller self-update from the hub's customer detail page. This requires: -**Flow:** Check Gitea registry for new tags → pull image → update compose file → `docker compose up -d` → process dies → new container starts. +1. **Controller** sends its URL in periodic reports (`controller_url` field) +2. **Hub** stores the controller URL, checks the Gitea registry for the latest controller image version, and shows a "Trigger Update" button when an update is available +3. **Hub** proxies the update trigger request to the controller's existing `/api/selfupdate/update` endpoint using the shared API key -The `SelfUpdateConfig` struct already exists in `config.go` but has zero implementation. This task implements everything. +**Two repositories involved:** +- `deploy-felhom-compose/controller/` — add `controller_url` to reports (Part 1) +- `felhom.eu/hub/` — version checker, trigger button, URL tracking (Part 2) --- -## Part 1: New Package `controller/internal/selfupdate/` +## Part 1: Controller Changes (v0.16.1) -Create directory `controller/internal/selfupdate/` with 3 files. +Minimal changes — just add `controller_url` to the report payload so the hub knows where to reach the controller. -### 1.1 `version.go` — Version parsing/comparison +### 1.1 `controller/internal/report/types.go` +**Add `ControllerURL` field to the Report struct (after `ControllerVersion`, line 10):** + +Change: ```go -package selfupdate - -import ( - "fmt" - "strconv" - "strings" -) - -// Version represents a semantic version (Major.Minor.Patch). -type Version struct { - Major int - Minor int - Patch int - Raw string -} - -// ParseVersion parses "X.Y.Z" or "vX.Y.Z". Returns error for "dev", "latest", or invalid formats. -func ParseVersion(s string) (Version, error) { - s = strings.TrimPrefix(s, "v") - if s == "dev" || s == "latest" || s == "" { - return Version{}, fmt.Errorf("invalid version: %q", s) - } - parts := strings.SplitN(s, ".", 3) - if len(parts) != 3 { - return Version{}, fmt.Errorf("invalid version format: %q (expected X.Y.Z)", s) - } - major, err := strconv.Atoi(parts[0]) - if err != nil { - return Version{}, fmt.Errorf("invalid major version: %w", err) - } - minor, err := strconv.Atoi(parts[1]) - if err != nil { - return Version{}, fmt.Errorf("invalid minor version: %w", err) - } - patch, err := strconv.Atoi(parts[2]) - if err != nil { - return Version{}, fmt.Errorf("invalid patch version: %w", err) - } - return Version{Major: major, Minor: minor, Patch: patch, Raw: s}, nil -} - -// Compare returns -1 if a < b, 0 if a == b, 1 if a > b. -func (a Version) Compare(b Version) int { - if a.Major != b.Major { - if a.Major < b.Major { - return -1 - } - return 1 - } - if a.Minor != b.Minor { - if a.Minor < b.Minor { - return -1 - } - return 1 - } - if a.Patch != b.Patch { - if a.Patch < b.Patch { - return -1 - } - return 1 - } - return 0 -} - -// String returns the version as "X.Y.Z". -func (v Version) String() string { - return fmt.Sprintf("%d.%d.%d", v.Major, v.Minor, v.Patch) -} +type Report struct { + Version int `json:"version"` + CustomerID string `json:"customer_id"` + CustomerName string `json:"customer_name"` + ControllerVersion string `json:"controller_version"` + Timestamp time.Time `json:"timestamp"` ``` -### 1.2 `state.go` — Update audit state +To: +```go +type Report struct { + Version int `json:"version"` + CustomerID string `json:"customer_id"` + CustomerName string `json:"customer_name"` + ControllerVersion string `json:"controller_version"` + ControllerURL string `json:"controller_url,omitempty"` + Timestamp time.Time `json:"timestamp"` +``` -The state file is an **audit log** persisted at `{dataDir}/update-state.json`. Used for startup verification and UI display. NOT for rollback. +### 1.2 `controller/internal/report/builder.go` + +**Add `"fmt"` to imports and set `ControllerURL` in `BuildReport()`.** + +After line 33 (`ControllerVersion: version,`), add: +```go + // Controller URL for hub callbacks (self-update trigger, etc.) + if cfg.Customer.Domain != "" { + r.ControllerURL = fmt.Sprintf("https://felhom.%s", cfg.Customer.Domain) + } +``` + +Add `"fmt"` to the import block. + +### 1.3 `controller/cmd/controller/main.go` + +**Bump version constant:** + +Change: +```go +var Version = "0.16.0" +``` +To: +```go +var Version = "0.16.1" +``` + +(Or wherever the `Version` variable is defined — check the exact location.) + +--- + +## Part 2: Hub Changes (v0.1.8) + +### 2.1 `hub/cmd/hub/main.go` — Config + wiring + +**A) Add `Registry` section to Config struct (after the `Alerting` section, line 44):** ```go -package selfupdate + Registry struct { + Image string `yaml:"image"` + Username string `yaml:"username"` + Token string `yaml:"token"` + CheckInterval string `yaml:"check_interval"` + } `yaml:"registry"` +``` -import ( - "encoding/json" - "fmt" - "log" - "os" - "path/filepath" -) +**B) Add defaults in `loadConfig()` (at the end of the defaults section):** -const stateFileName = "update-state.json" +```go + if cfg.Registry.Image == "" { + cfg.Registry.Image = "gitea.dooplex.hu/admin/felhom-controller" + } + if cfg.Registry.CheckInterval == "" { + cfg.Registry.CheckInterval = "6h" + } +``` -// UpdateState tracks the last update attempt. Persisted to disk as audit log. -type UpdateState struct { - Status string `json:"status"` // "pending", "success", "failed" - PreviousVersion string `json:"previous_version"` - PreviousImage string `json:"previous_image"` - TargetVersion string `json:"target_version"` - TargetImage string `json:"target_image"` - InitiatedAt string `json:"initiated_at"` // RFC3339 - InitiatedBy string `json:"initiated_by"` // "manual" or "auto" - CompletedAt string `json:"completed_at,omitempty"` - Error string `json:"error,omitempty"` -} +**C) Create version checker and pass API key to web server.** -// LoadState reads the update state file. Returns nil, nil if file doesn't exist. -func LoadState(dataDir string) (*UpdateState, error) { - path := filepath.Join(dataDir, stateFileName) - data, err := os.ReadFile(path) - if err != nil { - if os.IsNotExist(err) { - return nil, nil +After `webServer := web.New(...)`, add version checker setup: + +```go + // Initialize version checker for controller image registry + var versionChecker *web.VersionChecker + if cfg.Registry.Username != "" && cfg.Registry.Token != "" { + checkInterval, err := time.ParseDuration(cfg.Registry.CheckInterval) + if err != nil { + checkInterval = 6 * time.Hour } - return nil, fmt.Errorf("reading state file: %w", err) + versionChecker = web.NewVersionChecker(cfg.Registry.Image, cfg.Registry.Username, cfg.Registry.Token, checkInterval, logger) + go versionChecker.Run(ctx) + logger.Printf("[INFO] Registry version checker started (every %s)", cfg.Registry.CheckInterval) + } else { + logger.Printf("[INFO] Registry version checker disabled (no credentials configured)") } +``` - var state UpdateState - if err := json.Unmarshal(data, &state); err != nil { - return nil, fmt.Errorf("parsing state file: %w", err) - } - return &state, nil -} +**D) Update `web.New()` call to include API key and version checker:** -// SaveState writes the state file atomically (write to .tmp, then rename). -func SaveState(dataDir string, state *UpdateState) error { - path := filepath.Join(dataDir, stateFileName) - tmpPath := path + ".tmp" +Change from: +```go + webServer := web.New(dataStore, cfg.Auth.PasswordHash, staleThreshold, logger) +``` - data, err := json.MarshalIndent(state, "", " ") +To: +```go + webServer := web.New(dataStore, cfg.Auth.PasswordHash, cfg.API.ReportAPIKey, staleThreshold, logger) +``` + +After creating the version checker, set it on the web server: +```go + webServer.SetVersionChecker(versionChecker) +``` + +### 2.2 `hub/internal/store/store.go` — Add controller_url tracking + +**A) Add `ControllerURL` field to `CustomerSummary` struct (after `ReportJSON`, line 31):** + +```go + ControllerURL string +``` + +**B) Add migration for `controller_url` column at the end of the `migrate()` function:** + +After the existing `_, err := s.db.Exec(...)` block and the `return err` line, change the function to: + +```go +func (s *Store) migrate() error { + _, err := s.db.Exec(` + // ... existing SQL unchanged ... + `) if err != nil { - return fmt.Errorf("marshaling state: %w", err) + return err } - if err := os.WriteFile(tmpPath, data, 0644); err != nil { - return fmt.Errorf("writing temp state file: %w", err) - } - - if err := os.Rename(tmpPath, path); err != nil { - return fmt.Errorf("renaming state file: %w", err) - } + // v0.1.8: add controller_url column (idempotent — ignore error if already exists) + s.db.Exec("ALTER TABLE reports ADD COLUMN controller_url TEXT") return nil } +``` -// ClearState removes the state file. Used for cleanup. -func ClearState(dataDir string, logger *log.Logger) { - path := filepath.Join(dataDir, stateFileName) - if err := os.Remove(path); err != nil && !os.IsNotExist(err) { - logger.Printf("[WARN] Failed to clear update state file: %v", err) - } +(Move `return err` to be right after the first Exec block, then add the ALTER TABLE, then `return nil`.) + +**C) Update `SaveReport()` to extract and store `controller_url`:** + +In the `parsed` struct (around line 202), add the field: +```go + ControllerURL string `json:"controller_url"` +``` + +Update the INSERT query and args: + +```go + _, err := s.db.Exec(` + INSERT INTO reports (customer_id, report_json, health_status, cpu_percent, + memory_percent, container_total, container_running, + backup_last_snapshot, controller_version, controller_url) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + customerID, string(reportJSON), + parsed.Health.Status, parsed.System.CPUPercent, + parsed.System.MemoryPercent, parsed.Containers.Total, + parsed.Containers.Running, backupSnapshot, + parsed.ControllerVersion, parsed.ControllerURL, + ) +``` + +**D) Update `GetCustomers()` to scan `controller_url`:** + +Add `r.controller_url` to the SELECT query: +```sql + SELECT r.customer_id, r.received_at, r.report_json, + r.health_status, r.cpu_percent, r.memory_percent, + r.container_total, r.container_running, + r.backup_last_snapshot, r.controller_version, r.controller_url + FROM reports r +``` + +Add a `controllerURL sql.NullString` variable and update the Scan: +```go + var controllerURL sql.NullString + if err := rows.Scan(&c.CustomerID, &receivedAt, &c.ReportJSON, + &c.HealthStatus, &c.CPUPercent, &c.MemoryPercent, + &c.ContainerTotal, &c.ContainerRunning, + &backupSnapshot, &c.ControllerVersion, &controllerURL); err != nil { + return nil, err + } +``` + +After scanning, assign: +```go + if controllerURL.Valid { + c.ControllerURL = controllerURL.String + } +``` + +**E) Update `GetCustomer()` identically** — add `r.controller_url` to SELECT, `controllerURL sql.NullString`, update Scan, assign. + +**F) Update `GetCustomerHistory()` identically** — add `r.controller_url` to SELECT, `controllerURL sql.NullString`, update Scan, assign. + +### 2.3 `hub/internal/web/server.go` — Version checker, trigger handler, template data + +**A) Add fields to Server struct:** + +```go +type Server struct { + store *store.Store + passwordHash string + apiKey string // report API key — used for controller callbacks + logger *log.Logger + templates *template.Template + staleThreshold time.Duration + versionChecker *VersionChecker } ``` -### 1.3 `updater.go` — Core logic - -This is the main file. Contains: -- Registry check (HTTP GET to Gitea registry V2 API) -- Update trigger (pull → replace compose → docker compose up -d) -- Startup verification (check state file after restart) -- Status for API/UI +**B) Add imports:** ```go -package selfupdate - -import ( - "bytes" - "encoding/json" - "fmt" - "log" - "net/http" - "os" - "os/exec" - "regexp" - "strings" + "io" "sync" - "time" +``` - "gitea.dooplex.hu/admin/felhom-controller/internal/config" -) +(`fmt`, `encoding/json`, `net/http`, `time` should already be present.) -// CheckResult holds the result of a version check. -type CheckResult struct { - CurrentVersion string `json:"current_version"` - LatestVersion string `json:"latest_version"` - UpdateAvailable bool `json:"update_available"` - Error string `json:"error,omitempty"` - CheckedAt string `json:"checked_at"` +**C) Update `New()` constructor to accept `apiKey`:** + +```go +func New(store *store.Store, passwordHash, apiKey string, staleThreshold time.Duration, logger *log.Logger) *Server { +``` + +Add to struct init: `apiKey: apiKey,` + +**D) Add `SetVersionChecker` method:** + +```go +// SetVersionChecker sets the version checker (optional, may be nil if no registry credentials). +func (s *Server) SetVersionChecker(vc *VersionChecker) { + s.versionChecker = vc } +``` -// UpdateStatus is the complete status returned by the API. -type UpdateStatus struct { - Running bool `json:"running"` - LastCheck *CheckResult `json:"last_check,omitempty"` - LastState *UpdateState `json:"last_state,omitempty"` -} +**E) Add the `VersionChecker` type (at the bottom of `server.go` or in a new file `hub/internal/web/version.go` — implementer's choice):** -// Updater manages controller self-updates. -type Updater struct { - cfg *config.SelfUpdateConfig - gitCfg *config.GitConfig - currentVer string - dataDir string - composePath string // e.g., "/opt/docker/felhom-controller/docker-compose.yml" - logger *log.Logger +```go +// VersionChecker periodically queries the Gitea Docker Registry V2 API +// for the latest controller image version tag. +type VersionChecker struct { + image string // e.g., "gitea.dooplex.hu/admin/felhom-controller" + username string + token string + checkInterval time.Duration + logger *log.Logger - mu sync.Mutex + mu sync.RWMutex latestVersion string - lastCheck *CheckResult - updateRunning bool - backupRunning func() bool + lastCheck time.Time + lastError string } -// NewUpdater creates a new Updater instance. -func NewUpdater(cfg *config.SelfUpdateConfig, gitCfg *config.GitConfig, currentVersion, dataDir, composePath string, logger *log.Logger) *Updater { - return &Updater{ - cfg: cfg, - gitCfg: gitCfg, - currentVer: currentVersion, - dataDir: dataDir, - composePath: composePath, - logger: logger, +// NewVersionChecker creates a new VersionChecker. +func NewVersionChecker(image, username, token string, checkInterval time.Duration, logger *log.Logger) *VersionChecker { + return &VersionChecker{ + image: image, + username: username, + token: token, + checkInterval: checkInterval, + logger: logger, } } -// SetBackupRunningCheck sets the callback to check if a backup is in progress. -func (u *Updater) SetBackupRunningCheck(fn func() bool) { - u.backupRunning = fn +// Run starts the periodic version check loop. Call in a goroutine. +// It checks immediately on start, then every checkInterval. +func (vc *VersionChecker) Run(ctx context.Context) { + vc.check() + ticker := time.NewTicker(vc.checkInterval) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + vc.check() + } + } } -// IsUpdateRunning returns true if an update is currently in progress. -func (u *Updater) IsUpdateRunning() bool { - u.mu.Lock() - defer u.mu.Unlock() - return u.updateRunning -} - -// GetStatus returns the current update status for API/UI. -func (u *Updater) GetStatus() UpdateStatus { - u.mu.Lock() - lastCheck := u.lastCheck - running := u.updateRunning - u.mu.Unlock() - - state, err := LoadState(u.dataDir) +func (vc *VersionChecker) check() { + latest, err := vc.queryRegistry() + vc.mu.Lock() + defer vc.mu.Unlock() + vc.lastCheck = time.Now() if err != nil { - u.logger.Printf("[WARN] Failed to load update state: %v", err) - } - - return UpdateStatus{ - Running: running, - LastCheck: lastCheck, - LastState: state, + vc.lastError = err.Error() + vc.logger.Printf("[WARN] Registry version check failed: %v", err) + return } + vc.lastError = "" + vc.latestVersion = latest + vc.logger.Printf("[DEBUG] Registry version check: latest = %s", latest) } -// CheckForUpdate queries the Gitea registry for the latest version tag. -// Caches the result. Thread-safe. -func (u *Updater) CheckForUpdate() CheckResult { - result := CheckResult{ - CurrentVersion: u.currentVer, - CheckedAt: time.Now().UTC().Format(time.RFC3339), - } - - // Dev version can't check for updates - currentVer, err := ParseVersion(u.currentVer) - if err != nil { - result.Error = "Dev verzió nem ellenőrizhető" - u.mu.Lock() - u.lastCheck = &result - u.mu.Unlock() - return result - } - - // Query registry - latestStr, err := u.queryRegistry() - if err != nil { - result.Error = fmt.Sprintf("Registry lekérdezés sikertelen: %v", err) - u.logger.Printf("[WARN] Registry check failed: %v", err) - u.mu.Lock() - u.lastCheck = &result - u.mu.Unlock() - return result - } - - result.LatestVersion = latestStr - - latestVer, err := ParseVersion(latestStr) - if err != nil { - result.Error = fmt.Sprintf("Érvénytelen verzió a registry-ben: %s", latestStr) - u.mu.Lock() - u.lastCheck = &result - u.mu.Unlock() - return result - } - - if latestVer.Compare(currentVer) > 0 { - result.UpdateAvailable = true - } - - u.mu.Lock() - u.latestVersion = latestStr - u.lastCheck = &result - u.mu.Unlock() - - return result +// LatestVersion returns the cached latest version string (e.g., "0.16.1"), or "" if unknown. +func (vc *VersionChecker) LatestVersion() string { + vc.mu.RLock() + defer vc.mu.RUnlock() + return vc.latestVersion } -// queryRegistry queries the Gitea Docker Registry V2 API for available tags. -// Returns the highest valid semver tag found. -func (u *Updater) queryRegistry() (string, error) { - if u.gitCfg.Username == "" || u.gitCfg.Token == "" { - return "", fmt.Errorf("registry hitelesítő adatok hiányoznak") +// queryRegistry queries the Gitea Docker Registry V2 API for available tags +// and returns the highest valid semver tag. +func (vc *VersionChecker) queryRegistry() (string, error) { + // Extract "owner/repo" from image reference + // e.g., "gitea.dooplex.hu/admin/felhom-controller" → "admin/felhom-controller" + imagePath := vc.image + if parts := strings.SplitN(vc.image, "/", 2); len(parts) == 2 { + imagePath = parts[1] } - // Gitea registry V2: GET /v2///tags/list - registryBase := strings.TrimSuffix(u.cfg.Image, "/"+imageName(u.cfg.Image)) - url := fmt.Sprintf("https://gitea.dooplex.hu/v2/%s/tags/list", registryImagePath(u.cfg.Image)) + url := fmt.Sprintf("https://gitea.dooplex.hu/v2/%s/tags/list", imagePath) req, err := http.NewRequest("GET", url, nil) if err != nil { return "", fmt.Errorf("creating request: %w", err) } - req.SetBasicAuth(u.gitCfg.Username, u.gitCfg.Token) + req.SetBasicAuth(vc.username, vc.token) client := &http.Client{Timeout: 15 * time.Second} resp, err := client.Do(req) @@ -351,933 +368,453 @@ func (u *Updater) queryRegistry() (string, error) { } var tagsResp struct { - Name string `json:"name"` Tags []string `json:"tags"` } if err := json.NewDecoder(resp.Body).Decode(&tagsResp); err != nil { return "", fmt.Errorf("decoding response: %w", err) } - // Find highest semver tag - var highest *Version + // Find the highest valid semver tag + var bestMajor, bestMinor, bestPatch int + found := false for _, tag := range tagsResp.Tags { - v, err := ParseVersion(tag) - if err != nil { - continue // skip non-semver tags ("latest", "dev", etc.) + tag = strings.TrimPrefix(tag, "v") + parts := strings.SplitN(tag, ".", 3) + if len(parts) != 3 { + continue } - if highest == nil || v.Compare(*highest) > 0 { - highest = &v + major, e1 := strconv.Atoi(parts[0]) + minor, e2 := strconv.Atoi(parts[1]) + patch, e3 := strconv.Atoi(parts[2]) + if e1 != nil || e2 != nil || e3 != nil { + continue + } + if !found || major > bestMajor || + (major == bestMajor && minor > bestMinor) || + (major == bestMajor && minor == bestMinor && patch > bestPatch) { + bestMajor, bestMinor, bestPatch = major, minor, patch + found = true } } - if highest == nil { + if !found { return "", fmt.Errorf("no valid semver tags found") } - return highest.String(), nil -} - -// registryImagePath extracts the "owner/repo" from a full image reference. -// e.g., "gitea.dooplex.hu/admin/felhom-controller" → "admin/felhom-controller" -func registryImagePath(image string) string { - // Remove registry host - parts := strings.SplitN(image, "/", 2) - if len(parts) == 2 { - return parts[1] - } - return image -} - -// imageName extracts the repo name from a full image reference. -// e.g., "gitea.dooplex.hu/admin/felhom-controller" → "felhom-controller" -func imageName(image string) string { - parts := strings.Split(image, "/") - return parts[len(parts)-1] -} - -// TriggerUpdate starts the self-update process. Returns error immediately if -// preconditions fail. The actual update runs in a goroutine. -func (u *Updater) TriggerUpdate(initiatedBy string) error { - u.mu.Lock() - if u.updateRunning { - u.mu.Unlock() - return fmt.Errorf("Frissítés már folyamatban") - } - - // Dev version check - if _, err := ParseVersion(u.currentVer); err != nil { - u.mu.Unlock() - return fmt.Errorf("Dev verzió nem frissíthető") - } - - // Backup running check - if u.backupRunning != nil && u.backupRunning() { - u.mu.Unlock() - return fmt.Errorf("Mentés fut, próbálja később") - } - - // Compose file accessible check - if _, err := os.Stat(u.composePath); err != nil { - u.mu.Unlock() - return fmt.Errorf("docker-compose.yml nem elérhető: %w", err) - } - - u.updateRunning = true - u.mu.Unlock() - - // Check for update (or use cached) - result := u.CheckForUpdate() - if !result.UpdateAvailable { - u.mu.Lock() - u.updateRunning = false - u.mu.Unlock() - return fmt.Errorf("Nincs elérhető frissítés") - } - - targetVersion := result.LatestVersion - targetImage := fmt.Sprintf("%s:%s", u.cfg.Image, targetVersion) - previousImage := fmt.Sprintf("%s:%s", u.cfg.Image, u.currentVer) - - u.logger.Printf("[INFO] Starting self-update: %s → %s (initiated by: %s)", u.currentVer, targetVersion, initiatedBy) - - go u.performUpdate(targetVersion, targetImage, previousImage, initiatedBy) - - return nil -} - -// performUpdate runs the actual update steps in a goroutine. -func (u *Updater) performUpdate(targetVersion, targetImage, previousImage, initiatedBy string) { - defer func() { - u.mu.Lock() - u.updateRunning = false - u.mu.Unlock() - }() - - // 1. Write pending state - state := &UpdateState{ - Status: "pending", - PreviousVersion: u.currentVer, - PreviousImage: previousImage, - TargetVersion: targetVersion, - TargetImage: targetImage, - InitiatedAt: time.Now().UTC().Format(time.RFC3339), - InitiatedBy: initiatedBy, - } - if err := SaveState(u.dataDir, state); err != nil { - u.logger.Printf("[ERROR] Failed to save update state: %v", err) - return - } - - // 2. Docker pull - u.logger.Printf("[INFO] Pulling image: %s", targetImage) - pullOut, pullErr := runCommand("docker", "pull", targetImage) - if pullErr != nil { - state.Status = "failed" - state.Error = fmt.Sprintf("docker pull failed: %v — %s", pullErr, pullOut) - state.CompletedAt = time.Now().UTC().Format(time.RFC3339) - SaveState(u.dataDir, state) - u.logger.Printf("[ERROR] Docker pull failed: %v — %s", pullErr, pullOut) - return - } - u.logger.Printf("[INFO] Image pulled successfully: %s", targetImage) - - // 3. Update compose file (replace image tag) - if err := u.updateComposeFile(targetImage); err != nil { - state.Status = "failed" - state.Error = fmt.Sprintf("compose update failed: %v", err) - state.CompletedAt = time.Now().UTC().Format(time.RFC3339) - SaveState(u.dataDir, state) - u.logger.Printf("[ERROR] Compose file update failed: %v", err) - return - } - u.logger.Printf("[INFO] Compose file updated with new image: %s", targetImage) - - // 4. Docker compose up -d (this kills the current container) - u.logger.Printf("[INFO] Running docker compose up -d — container will restart") - composeDir := strings.TrimSuffix(u.composePath, "/docker-compose.yml") - upOut, upErr := runCommand("docker", "compose", "-f", u.composePath, "-p", "felhom-controller", "up", "-d") - if upErr != nil { - // If we get here, compose up failed but we already changed the image tag. - // Log the error — the state file remains "pending" for manual investigation. - u.logger.Printf("[ERROR] docker compose up -d failed: %v — %s (dir: %s)", upErr, upOut, composeDir) - return - } - - // If we're still alive after compose up -d, log it. - // Normally this process should be killed when Docker replaces the container. - u.logger.Printf("[WARN] Still running after docker compose up -d — expected to be replaced") - time.Sleep(30 * time.Second) - u.logger.Printf("[WARN] Still alive 30s after docker compose up -d") -} - -// updateComposeFile reads the compose file, replaces the image tag, and writes it back atomically. -func (u *Updater) updateComposeFile(newImage string) error { - data, err := os.ReadFile(u.composePath) - if err != nil { - return fmt.Errorf("reading compose file: %w", err) - } - - // Replace image line: "image: gitea.dooplex.hu/admin/felhom-controller:..." → new image - re := regexp.MustCompile(`(image:\s*)gitea\.dooplex\.hu/admin/felhom-controller:\S+`) - newData := re.ReplaceAll(data, []byte("${1}"+newImage)) - - if bytes.Equal(data, newData) { - return fmt.Errorf("no image line found to replace in compose file") - } - - // Atomic write: write to .tmp, then rename - tmpPath := u.composePath + ".tmp" - if err := os.WriteFile(tmpPath, newData, 0644); err != nil { - return fmt.Errorf("writing temp compose file: %w", err) - } - if err := os.Rename(tmpPath, u.composePath); err != nil { - return fmt.Errorf("renaming compose file: %w", err) - } - - return nil -} - -// VerifyStartup checks the update state file on startup. -// Called once from main.go before the scheduler starts. -// Returns the state if a pending update was detected, nil otherwise. -func (u *Updater) VerifyStartup() *UpdateState { - state, err := LoadState(u.dataDir) - if err != nil { - u.logger.Printf("[WARN] Failed to load update state on startup: %v — clearing", err) - ClearState(u.dataDir, u.logger) - return nil - } - if state == nil || state.Status != "pending" { - return nil - } - - // Compare current version with target - currentVer, curErr := ParseVersion(u.currentVer) - targetVer, tgtErr := ParseVersion(state.TargetVersion) - - if curErr != nil || tgtErr != nil { - state.Status = "failed" - state.Error = "Version parse error on startup verification" - state.CompletedAt = time.Now().UTC().Format(time.RFC3339) - SaveState(u.dataDir, state) - u.logger.Printf("[WARN] Post-update startup: version parse error (current=%s, target=%s)", u.currentVer, state.TargetVersion) - return state - } - - if currentVer.Compare(targetVer) == 0 { - // Success — we're running the target version - state.Status = "success" - state.CompletedAt = time.Now().UTC().Format(time.RFC3339) - SaveState(u.dataDir, state) - u.logger.Printf("[INFO] Post-update startup: update successful (%s → %s)", state.PreviousVersion, state.TargetVersion) - } else { - // Version mismatch — update may have failed - state.Status = "failed" - state.Error = fmt.Sprintf("Version mismatch: expected %s, running %s", state.TargetVersion, u.currentVer) - state.CompletedAt = time.Now().UTC().Format(time.RFC3339) - SaveState(u.dataDir, state) - u.logger.Printf("[WARN] Post-update startup: version mismatch (expected %s, running %s)", state.TargetVersion, u.currentVer) - } - - return state -} - -// runCommand executes a command and returns combined stdout+stderr and error. -func runCommand(name string, args ...string) (string, error) { - cmd := exec.Command(name, args...) - var out bytes.Buffer - cmd.Stdout = &out - cmd.Stderr = &out - err := cmd.Run() - return out.String(), err + return fmt.Sprintf("%d.%d.%d", bestMajor, bestMinor, bestPatch), nil } ``` -**IMPORTANT:** The `registryBase` variable on the line in `queryRegistry()` is unused — remove it. The URL construction should just use the hardcoded gitea.dooplex.hu host and `registryImagePath()`. Here's the corrected `queryRegistry` URL line: +**F) Add imports needed by VersionChecker:** + +If placing in `server.go`, add `"strconv"` and `"context"` to imports (some may already be present). + +If placing in a new `version.go` file, include all needed imports: ```go -url := fmt.Sprintf("https://gitea.dooplex.hu/v2/%s/tags/list", registryImagePath(u.cfg.Image)) +package web + +import ( + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "strconv" + "strings" + "sync" + "time" +) ``` -(Remove the `registryBase` line entirely.) ---- +**G) Add route for trigger-update in `ServeHTTP`.** -## Part 2: Changes to Existing Files +This case MUST come BEFORE the general `/customers/` case. Reorder the switch: -### 2.1 `controller/internal/config/config.go` +```go + case strings.HasPrefix(path, "/customers/") && strings.HasSuffix(path, "/trigger-update"): + customerID := strings.TrimPrefix(path, "/customers/") + customerID = strings.TrimSuffix(customerID, "/trigger-update") + if r.Method == http.MethodPost { + s.handleTriggerUpdate(w, r, customerID) + } else { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + } + case strings.HasPrefix(path, "/customers/"): + customerID := strings.TrimPrefix(path, "/customers/") + s.handleCustomerDetail(w, r, customerID) +``` -**A) Add `AutoUpdateTime` field to `SelfUpdateConfig` (line ~121):** +**H) Add `handleTriggerUpdate` handler:** + +```go +func (s *Server) handleTriggerUpdate(w http.ResponseWriter, r *http.Request, customerID string) { + customer, err := s.store.GetCustomer(customerID) + if err != nil { + s.logger.Printf("[ERROR] Trigger update — get customer %s: %v", customerID, err) + http.Error(w, "Internal error", http.StatusInternalServerError) + return + } + if customer == nil { + http.NotFound(w, r) + return + } + + // Get controller URL — from denormalized field or report JSON fallback + controllerURL := customer.ControllerURL + if controllerURL == "" { + var rpt struct { + ControllerURL string `json:"controller_url"` + } + json.Unmarshal([]byte(customer.ReportJSON), &rpt) + controllerURL = rpt.ControllerURL + } + if controllerURL == "" { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte(`{"ok":false,"error":"Controller URL not available — waiting for next report"}`)) + return + } + + if s.apiKey == "" { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"ok":false,"error":"API key not configured"}`)) + return + } + + // POST to controller's self-update endpoint + updateURL := controllerURL + "/api/selfupdate/update" + req, err := http.NewRequest("POST", updateURL, nil) + if err != nil { + s.logger.Printf("[ERROR] Trigger update — create request for %s: %v", updateURL, err) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(`{"ok":false,"error":"Failed to create request"}`)) + return + } + req.Header.Set("Authorization", "Bearer "+s.apiKey) + + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) + if err != nil { + s.logger.Printf("[ERROR] Trigger update — request to %s failed: %v", updateURL, err) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusBadGateway) + json.NewEncoder(w).Encode(map[string]interface{}{"ok": false, "error": fmt.Sprintf("Controller unreachable: %v", err)}) + return + } + defer resp.Body.Close() + + // Forward the controller's response + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<16)) + s.logger.Printf("[INFO] Trigger update for %s — controller responded %d: %s", customerID, resp.StatusCode, string(body)) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(resp.StatusCode) + w.Write(body) +} +``` + +**I) Update `handleCustomerDetail` to include version + URL data for the template:** + +Add fields to `detailData` struct: +```go + type detailData struct { + Customer *store.CustomerSummary + Report map[string]interface{} + History []store.CustomerSummary + OverallStatus string + NotifPrefs *store.NotificationPrefs + RecentNotifications []store.NotificationLogEntry + InfraBackup *store.InfraBackupMeta + InfraBackupAge string + ControllerURL string // controller's external URL + LatestVersion string // latest controller image version from registry + UpdateAvailable bool // true if latest > current + } +``` + +Before the `data := detailData{...}` assignment, add: + +```go + // Get controller URL (from denormalized field or report JSON fallback) + controllerURL := customer.ControllerURL + if controllerURL == "" { + var rpt struct { + ControllerURL string `json:"controller_url"` + } + json.Unmarshal([]byte(customer.ReportJSON), &rpt) + controllerURL = rpt.ControllerURL + } + + // Check if update is available + var latestVersion string + var updateAvailable bool + if s.versionChecker != nil { + latestVersion = s.versionChecker.LatestVersion() + if latestVersion != "" && customer.ControllerVersion != "" { + updateAvailable = latestVersion != customer.ControllerVersion && compareVersions(latestVersion, customer.ControllerVersion) > 0 + } + } +``` + +In the `data := detailData{...}` assignment, add: +```go + ControllerURL: controllerURL, + LatestVersion: latestVersion, + UpdateAvailable: updateAvailable, +``` + +**J) Add `compareVersions` helper function:** + +```go +// compareVersions returns >0 if a > b, 0 if equal, <0 if a < b. +// Accepts "X.Y.Z" format. Returns 0 on parse error. +func compareVersions(a, b string) int { + a = strings.TrimPrefix(a, "v") + b = strings.TrimPrefix(b, "v") + aParts := strings.SplitN(a, ".", 3) + bParts := strings.SplitN(b, ".", 3) + if len(aParts) != 3 || len(bParts) != 3 { + return 0 + } + for i := 0; i < 3; i++ { + ai, e1 := strconv.Atoi(aParts[i]) + bi, e2 := strconv.Atoi(bParts[i]) + if e1 != nil || e2 != nil { + return 0 + } + if ai != bi { + return ai - bi + } + } + return 0 +} +``` + +### 2.4 `hub/internal/web/templates/customer.html` — Update trigger section + +**Add "Controller Update" section after the "Health" section (after line 184, before the "Notifications" section).** + +Insert between the Health `` and ``: + +```html + +
+

Controller Update

+
+
+ Current version + v{{.Customer.ControllerVersion}} +
+ {{if .LatestVersion}} +
+ Latest version + + v{{.LatestVersion}} + {{if .UpdateAvailable}} + ● update available + {{else}} + — up to date + {{end}} + +
+ {{end}} + {{if .ControllerURL}} +
+ Controller URL + {{.ControllerURL}} +
+ {{end}} +
+ {{if and .ControllerURL .UpdateAvailable}} +
+ + +
+ {{else if and .ControllerURL (not .LatestVersion)}} +
+ + +

Registry check not configured — cannot verify if update is available

+
+ {{end}} +
+ + +``` + +**Button visibility logic:** +- `ControllerURL` set AND `UpdateAvailable` = true → show "Trigger Update" button (green dot, update available) +- `ControllerURL` set AND `LatestVersion` empty (no registry config) → show button with a note that version can't be verified +- `ControllerURL` set AND version is up to date → show version info only, no button +- `ControllerURL` not set → whole "Controller Update" card still shows version info, but no button + +### 2.5 `hub/internal/api/handler.go` — Include `controller_url` in API response + +**In `handleCustomers()`, add `ControllerURL` to the `customerJSON` struct:** + +```go + type customerJSON struct { + ID string `json:"id"` + Name string `json:"name"` + ControllerVersion string `json:"controller_version"` + ControllerURL string `json:"controller_url,omitempty"` + HealthStatus string `json:"health_status"` + LastSeen time.Time `json:"last_seen"` + CPUPercent float64 `json:"cpu_percent"` + MemoryPercent float64 `json:"memory_percent"` + ContainerTotal int `json:"container_total"` + ContainerRunning int `json:"container_running"` + BackupLastSnapshot *time.Time `json:"backup_last_snapshot"` + } +``` + +In the loop, add `ControllerURL: c.ControllerURL,` to the struct literal. + +### 2.6 `hub/cmd/hub/main.go` — Bump version Change: ```go -type SelfUpdateConfig struct { - Enabled bool `yaml:"enabled"` - CheckInterval string `yaml:"check_interval"` - Image string `yaml:"image"` - AutoUpdate bool `yaml:"auto_update"` - HealthTimeoutSeconds int `yaml:"health_timeout_seconds"` -} +var ( + Version = "dev" ``` -To: -```go -type SelfUpdateConfig struct { - Enabled bool `yaml:"enabled"` - CheckInterval string `yaml:"check_interval"` - Image string `yaml:"image"` - AutoUpdate bool `yaml:"auto_update"` - AutoUpdateTime string `yaml:"auto_update_time"` - HealthTimeoutSeconds int `yaml:"health_timeout_seconds"` -} -``` - -**B) Add defaults in `applyDefaults()` (after line 208, near `SelfUpdate.CheckInterval`):** - -Add these two lines: -```go - d(&cfg.SelfUpdate.Image, "gitea.dooplex.hu/admin/felhom-controller") - d(&cfg.SelfUpdate.AutoUpdateTime, "04:30") -``` - -### 2.2 `controller/internal/notify/notifier.go` - -Add two convenience methods after the existing `NotifyIntegrityFailed` (around line 243): - -```go -// NotifyUpdateSuccess sends a notification about a successful controller update. -func (n *Notifier) NotifyUpdateSuccess(fromVer, toVer string) { - n.Notify("update_success", "info", - fmt.Sprintf("Controller frissítve: %s → %s", fromVer, toVer), "") -} - -// NotifyUpdateFailed sends a notification about a failed controller update. -func (n *Notifier) NotifyUpdateFailed(targetVer, errMsg string) { - n.Notify("update_failed", "warning", - fmt.Sprintf("Controller frissítés sikertelen: %s — %s", targetVer, errMsg), "") -} -``` - -### 2.3 `controller/internal/api/router.go` - -**A) Add import for selfupdate package:** - -Add to imports: -```go - "gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate" -``` - -**B) Add `updater` field to Router struct (line ~32):** - -Add after the `metricsStore` field: -```go - updater *selfupdate.Updater -``` - -**C) Update `NewRouter` constructor (line 35):** - -Change from: -```go -func NewRouter(cfg *config.Config, sett *settings.Settings, stackMgr *stacks.Manager, syncer *catalogsync.Syncer, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, metricsStore *metrics.MetricsStore, logger *log.Logger) *Router { - return &Router{cfg: cfg, sett: sett, stackMgr: stackMgr, syncer: syncer, cpuCollector: cpuCollector, backupMgr: backupMgr, crossDriveRunner: crossDrive, metricsStore: metricsStore, logger: logger} -} -``` - -To: -```go -func NewRouter(cfg *config.Config, sett *settings.Settings, stackMgr *stacks.Manager, syncer *catalogsync.Syncer, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, metricsStore *metrics.MetricsStore, updater *selfupdate.Updater, logger *log.Logger) *Router { - return &Router{cfg: cfg, sett: sett, stackMgr: stackMgr, syncer: syncer, cpuCollector: cpuCollector, backupMgr: backupMgr, crossDriveRunner: crossDrive, metricsStore: metricsStore, updater: updater, logger: logger} -} -``` - -**D) Add 3 route cases in `ServeHTTP` switch (before the `default:` case, around line 155):** - -```go - // GET /api/selfupdate/status - case path == "/selfupdate/status" && req.Method == http.MethodGet: - r.selfupdateStatus(w, req) - - // POST /api/selfupdate/check - case path == "/selfupdate/check" && req.Method == http.MethodPost: - r.selfupdateCheck(w, req) - - // POST /api/selfupdate/update - case path == "/selfupdate/update" && req.Method == http.MethodPost: - r.selfupdateTrigger(w, req) -``` - -**E) Add 3 handler methods (at the end of the file, before `writeJSON`):** - -```go -func (r *Router) selfupdateStatus(w http.ResponseWriter, _ *http.Request) { - if r.updater == nil { - writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: map[string]interface{}{"enabled": false}}) - return - } - writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: r.updater.GetStatus()}) -} - -func (r *Router) selfupdateCheck(w http.ResponseWriter, _ *http.Request) { - if r.updater == nil { - writeJSON(w, http.StatusBadRequest, apiResponse{OK: false, Error: "Self-update not configured"}) - return - } - result := r.updater.CheckForUpdate() - writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: result}) -} - -func (r *Router) selfupdateTrigger(w http.ResponseWriter, _ *http.Request) { - if r.updater == nil { - writeJSON(w, http.StatusBadRequest, apiResponse{OK: false, Error: "Self-update not configured"}) - return - } - if err := r.updater.TriggerUpdate("manual"); err != nil { - writeJSON(w, http.StatusConflict, apiResponse{OK: false, Error: err.Error()}) - return - } - r.logger.Println("[API] Manual self-update triggered") - writeJSON(w, http.StatusOK, apiResponse{OK: true, Message: "Frissítés elindítva"}) -} -``` - -### 2.4 `controller/internal/web/server.go` - -**A) Add import for selfupdate:** - -```go - "gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate" -``` - -**B) Add `updater` field to Server struct (after `notifier`, around line 31):** - -```go - updater *selfupdate.Updater -``` - -**C) Update `NewServer` constructor (line 52):** - -Change from: -```go -func NewServer(cfg *config.Config, stackMgr *stacks.Manager, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, sched *scheduler.Scheduler, sett *settings.Settings, alertMgr *AlertManager, notif *notify.Notifier, logger *log.Logger, version string) *Server { -``` - -To: -```go -func NewServer(cfg *config.Config, stackMgr *stacks.Manager, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, crossDrive *backup.CrossDriveRunner, sched *scheduler.Scheduler, sett *settings.Settings, alertMgr *AlertManager, notif *notify.Notifier, updater *selfupdate.Updater, logger *log.Logger, version string) *Server { -``` - -And add in the struct initialization: -```go - updater: updater, -``` - -### 2.5 `controller/internal/web/handlers.go` - -**In `settingsData()` (line 897), add self-update data after `data["HubEnabled"]` (line 908):** - -```go - // Self-update status - data["SelfUpdateEnabled"] = s.cfg.SelfUpdate.Enabled - if s.updater != nil { - status := s.updater.GetStatus() - data["UpdateRunning"] = status.Running - if status.LastCheck != nil { - data["UpdateAvailable"] = status.LastCheck.UpdateAvailable - data["LatestVersion"] = status.LastCheck.LatestVersion - data["LastCheckTime"] = status.LastCheck.CheckedAt - data["LastCheckError"] = status.LastCheck.Error - } - if status.LastState != nil { - data["LastUpdateState"] = status.LastState - } - data["AutoUpdateEnabled"] = s.cfg.SelfUpdate.AutoUpdate - data["AutoUpdateTime"] = s.cfg.SelfUpdate.AutoUpdateTime - } -``` - -### 2.6 `controller/internal/web/alerts.go` - -**A) Change `Refresh()` signature (line 43):** - -Change from: -```go -func (am *AlertManager) Refresh(report *monitor.HealthReport, cfg *config.Config, backupMgr *backup.Manager) { -``` - -To: -```go -func (am *AlertManager) Refresh(report *monitor.HealthReport, cfg *config.Config, backupMgr *backup.Manager, updateAvailable bool, latestVersion string) { -``` - -**B) Add update-available alert at the end of the function, before `sortAlerts` (around line 98):** - -After the "Backup disabled" alert block, add: -```go - // Update available - if updateAvailable && latestVersion != "" { - alerts = append(alerts, Alert{ - ID: "update-available", - Level: "info", - Message: fmt.Sprintf("Új controller verzió elérhető: %s", latestVersion), - Link: "/settings", - LinkText: "Frissítés", - }) - } -``` - -### 2.7 `controller/internal/web/templates/settings.html` - -**A) Remove the version row from Section A (lines 59-62):** - -Delete these lines: -```html -
- Controller verzió - {{.Version}} -
-``` - -**B) Add new "Verzió és frissítés" card between Section A (`` on line 64) and the "Adattárolók" section (line 66):** - -Insert this after the closing `` of Section A and before ``: - -```html - -
-

Verzió és frissítés

-
-
- Jelenlegi verzió - {{.Version}} -
- {{if .SelfUpdateEnabled}} - {{if .LatestVersion}} -
- Legújabb verzió - - {{.LatestVersion}} - {{if .UpdateAvailable}} - ● Frissítés elérhető - {{else}} - — naprakész - {{end}} - -
- {{end}} - {{if .LastCheckTime}} -
- Utolsó ellenőrzés - {{.LastCheckTime}} -
- {{end}} - {{if .LastCheckError}} -
- Hiba - {{.LastCheckError}} -
- {{end}} -
- Automatikus frissítés - - {{if .AutoUpdateEnabled}}✅ Aktív ({{.AutoUpdateTime}}){{else}}–{{end}} - -
- {{with .LastUpdateState}} -
- Utolsó frissítés - - {{if eq .Status "success"}}✅ Sikeres ({{.PreviousVersion}} → {{.TargetVersion}}) - {{else if eq .Status "failed"}}❌ Sikertelen — {{.Error}} - {{else if eq .Status "pending"}}⏳ Folyamatban - {{end}} - -
- {{end}} -
- - - - {{if .UpdateAvailable}} - - {{end}} - - -
- {{end}} -
-
- - -``` - -### 2.8 `controller/cmd/controller/main.go` - -**A) Add import:** - -```go - "gitea.dooplex.hu/admin/felhom-controller/internal/selfupdate" -``` - -And add `"path/filepath"` to imports if not already present. - -**B) Create updater instance and verify startup (add after notifier creation, around line 221, before scheduler init):** - -```go - // --- Initialize self-updater --- - var updater *selfupdate.Updater - if cfg.SelfUpdate.Enabled { - composePath := filepath.Join(filepath.Dir(cfg.Paths.DataDir), "docker-compose.yml") - updater = selfupdate.NewUpdater(&cfg.SelfUpdate, &cfg.Git, Version, cfg.Paths.DataDir, composePath, logger) - updater.SetBackupRunningCheck(func() bool { - return backupMgr != nil && backupMgr.IsRunning() - }) - // Check for post-update state (did a previous update succeed or fail?) - if state := updater.VerifyStartup(); state != nil { - if state.Status == "success" { - notifier.NotifyUpdateSuccess(state.PreviousVersion, state.TargetVersion) - } else if state.Status == "failed" { - notifier.NotifyUpdateFailed(state.TargetVersion, state.Error) - } - } - logger.Printf("[INFO] Self-update enabled (check every %s, auto-update: %v, auto-update time: %s)", - cfg.SelfUpdate.CheckInterval, cfg.SelfUpdate.AutoUpdate, cfg.SelfUpdate.AutoUpdateTime) - } -``` - -**C) Register scheduler jobs (add after the existing monitoring/backup scheduler jobs, before the hub pusher section):** - -```go - // Self-update scheduler jobs - if cfg.SelfUpdate.Enabled && updater != nil { - // Periodic version check (populates UI, never triggers update) - checkInterval, ciErr := time.ParseDuration(cfg.SelfUpdate.CheckInterval) - if ciErr != nil { - checkInterval = 6 * time.Hour - } - sched.Every("selfupdate-check", checkInterval, func(ctx context.Context) error { - result := updater.CheckForUpdate() - if result.UpdateAvailable { - logger.Printf("[INFO] Update available: %s -> %s", result.CurrentVersion, result.LatestVersion) - } - return nil - }) - - // Auto-update (daily, fires after typical backup completion) - if cfg.SelfUpdate.AutoUpdate { - sched.Daily("selfupdate-auto", cfg.SelfUpdate.AutoUpdateTime, func(ctx context.Context) error { - result := updater.CheckForUpdate() - if !result.UpdateAvailable { - return nil - } - if err := updater.TriggerUpdate("auto"); err != nil { - logger.Printf("[WARN] Auto-update skipped: %v", err) - } - return nil - }) - } - } -``` - -**D) Add initial version check in the startup goroutine (add after the hub report section, around line 408, inside the `go func()` block):** - -```go - // Initial self-update check (so settings page shows version info quickly) - if updater != nil { - time.Sleep(25 * time.Second) // Additional delay after hub report - result := updater.CheckForUpdate() - if result.UpdateAvailable { - logger.Printf("[INFO] Startup: update available %s -> %s", result.CurrentVersion, result.LatestVersion) - } else if result.Error != "" { - logger.Printf("[DEBUG] Startup version check: %s", result.Error) - } - } -``` - -**E) Update the `alertMgr.Refresh()` calls to include update params.** - -There are 2 calls to `alertMgr.Refresh()` in main.go: - -1. In the system-health scheduler job (around line 255): -```go - alertMgr.Refresh(healthReport, cfg, backupMgr) -``` -Change to: -```go - updateAvailable := false - latestVersion := "" - if updater != nil { - status := updater.GetStatus() - if status.LastCheck != nil { - updateAvailable = status.LastCheck.UpdateAvailable - latestVersion = status.LastCheck.LatestVersion - } - } - alertMgr.Refresh(healthReport, cfg, backupMgr, updateAvailable, latestVersion) -``` - -2. In the initial alert refresh goroutine (around line 434): -```go - alertMgr.Refresh(report, cfg, backupMgr) -``` -Change to: -```go - alertMgr.Refresh(report, cfg, backupMgr, false, "") -``` -(On initial startup, we haven't checked for updates yet, so pass false/"") - -**F) Update `NewRouter` call (line 439):** - -Change from: -```go - apiRouter := api.NewRouter(cfg, sett, stackMgr, syncer, cpuCollector, backupMgr, crossDriveRunner, metricsStore, logger) -``` - -To: -```go - apiRouter := api.NewRouter(cfg, sett, stackMgr, syncer, cpuCollector, backupMgr, crossDriveRunner, metricsStore, updater, logger) -``` - -**G) Update `NewServer` call (line 442):** - -Change from: -```go - webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, crossDriveRunner, sched, sett, alertMgr, notifier, logger, Version) -``` - -To: -```go - webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, crossDriveRunner, sched, sett, alertMgr, notifier, updater, logger, Version) -``` - -### 2.9 `controller/docker-compose.yml` - -**Replace the config + data volume mounts (lines 17-20):** - -Change from: -```yaml - # Controller config - - /opt/docker/felhom-controller/controller.yaml:/opt/docker/felhom-controller/controller.yaml:ro - # Controller persistent data (sessions, restic cache, restic password) - - controller-data:/opt/docker/felhom-controller/data -``` - -To: -```yaml - # Controller directory (compose file access for self-update) - - /opt/docker/felhom-controller:/opt/docker/felhom-controller - # Controller config (read-only override on top of directory mount) - - /opt/docker/felhom-controller/controller.yaml:/opt/docker/felhom-controller/controller.yaml:ro - # Controller persistent data (named volume override on top of directory mount) - - controller-data:/opt/docker/felhom-controller/data -``` - -Mount order matters: directory mount first, then named volume overrides `data/`, then read-only file overrides `controller.yaml`. - -### 2.10 API Key Auth for Self-Update Endpoints - -Currently all `/api/` routes (except `/api/health`) are behind session auth. For external triggering (build workflow, hub), the selfupdate endpoints need to also accept the hub API key as a bearer token. - -**A) In `controller/cmd/controller/main.go`, register `/api/selfupdate/` separately in the mux (around line 454):** - -Add this BEFORE the generic `/api/` route (mux uses longest prefix match): - -```go - // Self-update API — accepts session auth OR hub API key (for external triggering) - mux.Handle("/api/selfupdate/", selfUpdateAuthMiddleware(cfg, webServer, http.HandlerFunc(apiRouter.ServeHTTP))) -``` - -So the mux block becomes: -```go - mux.HandleFunc("/api/health", apiRouter.HealthHandler) - mux.Handle("/api/storage/", webServer.RequireAuth(http.HandlerFunc(webServer.ServeStorageAPI))) - mux.Handle("/api/selfupdate/", selfUpdateAuthMiddleware(cfg, webServer, http.HandlerFunc(apiRouter.ServeHTTP))) - mux.Handle("/api/", webServer.RequireAuth(http.HandlerFunc(apiRouter.ServeHTTP))) -``` - -**B) Add the middleware function in `main.go` (at the bottom of the file, before or after the existing helper functions):** - -```go -// selfUpdateAuthMiddleware allows access via session auth (normal UI) OR hub API key bearer token (external). -func selfUpdateAuthMiddleware(cfg *config.Config, webServer *web.Server, next http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // Check bearer token first (for external API calls: hub, build scripts) - if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "Bearer ") { - token := strings.TrimPrefix(auth, "Bearer ") - if token != "" && cfg.Hub.APIKey != "" && token == cfg.Hub.APIKey { - next.ServeHTTP(w, r) - return - } - } - // Fall back to session auth - webServer.RequireAuth(next).ServeHTTP(w, r) - }) -} -``` - -This means: -- **UI buttons** work via session cookie (unchanged) -- **External callers** (build script, hub) can use `Authorization: Bearer ` header -- If neither is valid, returns 401 +**Note:** The version is injected at build time via `-ldflags`, so no code change needed — just use `0.1.8` as the build script argument. --- -## Part 3: Compose Path Calculation +## Summary of All File Changes -The compose file path is computed from the data dir in main.go: -```go -composePath := filepath.Join(filepath.Dir(cfg.Paths.DataDir), "docker-compose.yml") -``` - -Since `cfg.Paths.DataDir` defaults to `/opt/docker/felhom-controller/data`, `filepath.Dir()` gives `/opt/docker/felhom-controller`, and the compose file is at `/opt/docker/felhom-controller/docker-compose.yml`. - ---- - -## Summary of all file changes +### Controller (`deploy-felhom-compose/controller/`) — v0.16.1 | # | File | Change | |---|------|--------| -| NEW | `controller/internal/selfupdate/version.go` | Version parsing/comparison (ParseVersion, Compare) | -| NEW | `controller/internal/selfupdate/state.go` | Update state file I/O (LoadState, SaveState, ClearState) | -| NEW | `controller/internal/selfupdate/updater.go` | Core logic: registry check, update trigger, startup verify | -| 1 | `controller/internal/config/config.go` | Add `AutoUpdateTime` field + defaults for Image and AutoUpdateTime | -| 2 | `controller/internal/notify/notifier.go` | Add `NotifyUpdateSuccess()` and `NotifyUpdateFailed()` | -| 3 | `controller/internal/api/router.go` | Add updater field + 3 API endpoints | -| 4 | `controller/internal/web/server.go` | Add updater field to struct + constructor | -| 5 | `controller/internal/web/handlers.go` | Add self-update data to `settingsData()` | -| 6 | `controller/internal/web/alerts.go` | Add updateAvailable params to `Refresh()` + info alert | -| 7 | `controller/internal/web/templates/settings.html` | Move version to new card + buttons + JS | -| 8 | `controller/cmd/controller/main.go` | Wire updater, scheduler jobs, startup verify, update callers, API key auth middleware, selfupdate mux route | -| 9 | `controller/docker-compose.yml` | Add directory mount for self-update | +| 1 | `internal/report/types.go` | Add `ControllerURL string` field to Report struct | +| 2 | `internal/report/builder.go` | Set `ControllerURL` from `cfg.Customer.Domain`, add `"fmt"` import | +| 3 | `cmd/controller/main.go` | Bump Version to `"0.16.1"` | + +### Hub (`felhom.eu/hub/`) — v0.1.8 + +| # | File | Change | +|---|------|--------| +| 1 | `cmd/hub/main.go` | Add `Registry` config section + defaults, create VersionChecker, pass API key to web.New() | +| 2 | `internal/store/store.go` | Add `controller_url` column migration + `ControllerURL` to CustomerSummary + update SaveReport/GetCustomers/GetCustomer/GetCustomerHistory | +| 3 | `internal/web/server.go` (or new `version.go`) | Add `apiKey` + `versionChecker` fields, `VersionChecker` struct + `Run()` + `queryRegistry()`, `handleTriggerUpdate` handler, route, `ControllerURL`/`LatestVersion`/`UpdateAvailable` in detailData, `compareVersions` helper | +| 4 | `internal/web/templates/customer.html` | Add "Controller Update" card with version comparison + conditional trigger button + JS | +| 5 | `internal/api/handler.go` | Add `controller_url` to customer API JSON response | --- -## Build & Deploy (v0.16.0) +## Hub Config Addition + +Add to `hub.yaml` on deploy: + +```yaml +registry: + image: "gitea.dooplex.hu/admin/felhom-controller" + username: "admin" # Gitea username with read access to container registry + token: "..." # Gitea access token + check_interval: "6h" # How often to check for new versions +``` + +If no registry credentials are configured, the version checker is disabled. The "Trigger Update" button still appears but with a note that version cannot be verified. + +--- + +## Build & Deploy + +### Controller (v0.16.1) ```bash SSH=/c/Windows/System32/OpenSSH/ssh.exe # 1. Commit and push cd /e/git/deploy-felhom-compose -git add -A && git commit -m "feat: add controller self-update (v0.16.0)" && git push +git add -A && git commit -m "feat: add controller_url to hub reports (v0.16.1)" && git push # 2. Build -$SSH kisfenyo@192.168.0.180 "cd ~/build/felhom-controller && git -C ~/git/deploy-felhom-compose pull && ./build.sh 0.16.0 --push" +$SSH kisfenyo@192.168.0.180 "cd ~/build/felhom-controller && git -C ~/git/deploy-felhom-compose pull && ./build.sh 0.16.1 --push" -# 3. Deploy — ONE-TIME: manually add directory mount to remote compose before first deploy -# SSH into demo node and edit docker-compose.yml to add the directory mount: -# - /opt/docker/felhom-controller:/opt/docker/felhom-controller -# (See Part 2.9 for exact format) -# Then deploy: -$SSH kisfenyo@192.168.0.162 "cd /opt/docker/felhom-controller && sudo docker pull gitea.dooplex.hu/admin/felhom-controller:0.16.0 && sudo sed -i 's|image: gitea.dooplex.hu/admin/felhom-controller:.*|image: gitea.dooplex.hu/admin/felhom-controller:0.16.0|' docker-compose.yml && sudo docker compose up -d" - -# 4. Verify -$SSH kisfenyo@192.168.0.162 "docker ps --filter name=felhom-controller --format '{{.Image}} {{.Status}}'" -$SSH kisfenyo@192.168.0.162 "docker logs felhom-controller --tail 30" -``` - -**After v0.16.0 is deployed with the directory mount, future updates can be triggered from the Settings page or via API.** - -### Post-v0.16.0 Build & Deploy Workflow - -After v0.16.0, steps 3-4 change. The build server push is the same, but deploy uses the self-update API instead of manual SSH docker commands: - -```bash -SSH=/c/Windows/System32/OpenSSH/ssh.exe - -# 1. Commit and push (unchanged) -cd /e/git/deploy-felhom-compose -git add -A && git commit -m "" && git push - -# 2. Build + push image (unchanged) -$SSH kisfenyo@192.168.0.180 "cd ~/build/felhom-controller && git -C ~/git/deploy-felhom-compose pull && ./build.sh --push" - -# 3. Trigger self-update via API (replaces manual docker pull + sed + compose up) +# 3. Trigger self-update via API (v0.16.0 already has self-update) curl -s -X POST https://felhom.demo-felhom.eu/api/selfupdate/update \ -H "Authorization: Bearer " -# 4. Wait ~10s for container restart, then verify -sleep 10 -curl -s https://felhom.demo-felhom.eu/api/health +# 4. Wait + verify +sleep 15 $SSH kisfenyo@192.168.0.162 "docker ps --filter name=felhom-controller --format '{{.Image}} {{.Status}}'" ``` -**IMPORTANT:** CLAUDE.md needs to be updated with this new workflow after v0.16.0 is deployed. The old SSH-based `docker pull + sed + docker compose up -d` deploy step should be replaced with the `curl` API call shown above. +### Hub (v0.1.8) + +```bash +# 1. Commit and push +cd /e/git/felhom.eu +git add -A && git commit -m "feat: add controller update trigger + version checker (v0.1.8)" && git push + +# 2. Build +$SSH kisfenyo@192.168.0.180 "cd ~/build/felhom-hub && ./build.sh 0.1.8 --push" + +# 3. Deploy to k3s +$SSH kisfenyo@192.168.0.180 "sudo kubectl set image -n felhom-system deploy/hub hub=gitea.dooplex.hu/admin/felhom-hub:0.1.8" + +# 4. Verify +$SSH kisfenyo@192.168.0.180 "sudo kubectl get pods -n felhom-system -l app=hub && sudo kubectl logs -n felhom-system -l app=hub --tail 10" + +# 5. Add registry config to hub.yaml (one-time) +# SSH to build server and edit the hub config to add the registry section +``` + +**Deploy order:** Controller first (v0.16.1), then Hub (v0.1.8). The hub needs the controller to already send `controller_url` in its reports. --- ## Verification -1. Open Settings page → "Verzió és frissítés" card shows current version -2. Click "Frissítés keresése" → queries registry, shows latest version (or error) -3. If update available, "Frissítés telepítése" button appears -4. Startup logs show `[INFO] Self-update enabled (check every 6h, auto-update: false, auto-update time: 04:30)` -5. If a previous update was pending, startup logs show success/failure -6. Alert banner shows "Új controller verzió elérhető" with link to /settings when update is available -7. API key auth works: `curl -X POST https://felhom.demo-felhom.eu/api/selfupdate/check -H "Authorization: Bearer "` returns version info -8. Without auth: `curl -X POST https://felhom.demo-felhom.eu/api/selfupdate/check` returns 401 +### Controller +1. After deploy, next hub report contains `"controller_url": "https://felhom.demo-felhom.eu"` field +2. Verify: `$SSH kisfenyo@192.168.0.162 "docker logs felhom-controller --tail 5"` — check report push log + +### Hub +1. Customer detail page shows "Controller Update" card with current version +2. If registry configured: shows latest version + green/gray indicator +3. If update available: "Trigger Update" button appears +4. Click button → triggers self-update on controller → shows success/error +5. If registry NOT configured: button shown with caveat note +6. Logs show `[INFO] Registry version checker started (every 6h)` on startup +7. API at `GET /api/v1/customers` includes `controller_url` field diff --git a/TASK2.md b/TASK2.md deleted file mode 100644 index 5f733e8..0000000 --- a/TASK2.md +++ /dev/null @@ -1,1442 +0,0 @@ -# TASK2: Disaster Recovery — Hub-Based Infrastructure Restore - -## Overview - -Add the ability to fully restore a Felhom deployment after a system drive failure. -The controller pushes an **infrastructure snapshot** to the central Hub during -each backup cycle. When a fresh controller is deployed on a replacement system, -it pulls the snapshot from the Hub, auto-mounts surviving drives using stored -disk UUIDs, and restores all applications and their data. - -**This is a phased implementation:** - -| Phase | Scope | Where | Status | -|-------|-------|-------|--------| -| **Phase 1** | Hub infra-backup endpoints + controller push | Hub + Controller | **DONE** | -| **Phase 2** | New-deployment detection + Hub pull + auto-mount | Controller | **DONE** | -| **Phase 3** | Restore UI + app data restoration | Controller | **DONE** | -| **Phase 4** | docker-setup.sh integration | Script | **DONE** | - -Phases 1-2 can be deployed independently. Phase 3 depends on Phase 2. -Phase 4 depends on Phase 1 (needs Hub endpoints). - -### Phase 1 — What was deployed - -**Hub changes** (`e:/git/felhom.eu/hub/`): -- `internal/store/store.go` — new `infra_backups` table (CREATE TABLE in migrate()), `SaveInfraBackup()`, `GetInfraBackup()`, `GetInfraBackupMeta()` + `InfraBackupMeta` struct -- `internal/api/handler.go` — `POST /api/v1/infra-backup` (push) + `GET /api/v1/infra-backup/{customer_id}` (pull), both with Bearer auth -- `internal/web/server.go` — `handleCustomerDetail()` loads `InfraBackupMeta` and passes to template -- `internal/web/templates/customer.html` — "Infra Backup" card showing last-updated age, stack count, disk count - -**Controller changes** (`controller/`): -- `internal/settings/settings.go` — new `GetCrossDriveResticPassword()` read-only getter -- `internal/report/infra_backup.go` — `InfraBackup`, `DiskLayout`, `DiskMount`, `InfraStack` types + `BuildInfraBackup()` builder -- `internal/report/infra_backup_linux.go` — `collectDiskLayout()` parses /host-fstab + blkid/lsblk for disk topology -- `internal/report/infra_backup_other.go` — no-op stub for non-Linux compilation -- `internal/report/pusher.go` — `PushInfraBackup()` method (3 retries, 5s backoff) -- `cmd/controller/main.go` — `pushInfraBackup()` helper; called after nightly backup cycle and on startup; `hubPusher` declaration moved earlier for closure access - -### Phase 2 — What was deployed - -**Controller changes** (`controller/`): -- `internal/backup/disk_layout.go` — **NEW** — `DiskLayout` and `DiskMount` types (moved from report to avoid circular import: report→backup, backup→report) -- `internal/report/infra_backup.go` — updated `DiskLayout` field to use `backup.DiskLayout` -- `internal/report/infra_backup_linux.go` — updated to return `backup.DiskLayout` -- `internal/report/infra_backup_other.go` — updated to return `backup.DiskLayout` -- `internal/report/infra_pull.go` — **NEW** — `PullInfraBackup(hubURL, apiKey, customerID)` HTTP GET from Hub, returns `*InfraBackup` or nil/nil for 404 -- `internal/backup/restore_drives_linux.go` — **NEW** — `MountDrivesFromLayout(ctx, layout, logger)` scans block devices by UUID, mounts using two-layer pattern (raw+bind), updates /host-fstab; includes `scanBlockDeviceUUIDs()` (lsblk+blkid), `mountDirect()`, `mountRawAndBind()`, `addDRFstabEntries()`, `isMountedPath()`, `hostDevPath()` -- `internal/backup/restore_drives_other.go` — **NEW** — no-op stub for non-Linux compilation -- `internal/settings/settings.go` — added `SetCrossDriveResticPassword(password)` setter (RWMutex + atomic save) -- `cmd/controller/main.go` — added fresh-deployment detection (`!fileExists(settings.json)`), Hub pull, password restoration, settings restoration, drive mounting (with 2min timeout), settings re-load after restore; helper functions: `fileExists()`, `restorePasswordsFromHub()`, `restoreSettingsFromHub()` - -### Phase 3 — Implementation plan - -**Context:** After Phase 2, drives are mounted and local backup data is accessible. -The Hub infra backup has the `deployed_stacks` manifest and cross-drive backup data -lives at `/backups/secondary//rsync/` with `_config/` and `_db/` subdirs. - -**Key insight:** In the common DR scenario (system drive died, HDDs survived), app data -is already on the HDD. The main thing to restore is stack configs (compose files + -app.yaml with deployed flag + env vars). Cross-drive rsync backups include `_config/` -which has the full stack directory. - -**Files (NEW):** -- `internal/backup/restore_scan.go` — `RestorePlan`, `RestorableApp` types + `ScanDrivesForBackups()` + `BuildRestorePlan()` -- `internal/backup/restore_app_linux.go` — `RestoreAppFromBackup()` (restore config + data + DB dump + docker compose up) -- `internal/backup/restore_app_other.go` — non-Linux stub -- `internal/web/handler_restore.go` — restore page handler + JSON API endpoints -- `internal/web/templates/restore.html` — full-page DR restore UI (standalone, no sidebar) - -**Files (MODIFIED):** -- `internal/web/server.go` — `restoreMode` + `restorePlan` state; `SetRestoreState()`; route interception (redirect all to /restore) -- `cmd/controller/main.go` — after Phase 2 drive mount, scan for backups + build restore plan + pass to web server - -**Restore page behavior:** -- When `restoreMode` is active, ALL web routes redirect to `/restore` (except `/static/*`, `/api/health`, `/api/restore/*`, `/login`, `/logout`) -- Page shows: domain/customer info, drive status, per-app table (config found, data found, DB dump found), restore all / skip buttons -- POST `/api/restore/all` starts sequential restore of all apps -- POST `/api/restore/skip` exits restore mode → normal dashboard -- GET `/api/restore/status` returns current plan with per-app status for JS polling -- All text in Hungarian - -**Per-app restore sequence:** -1. Restore stack config from `_config/` → `/opt/docker/stacks//` -2. Verify app data exists on HDD (it should if HDD survived) -3. If app data missing but rsync backup exists → rsync data back -4. If DB dumps in `_db/` → copy to primary dump dir -5. `docker compose pull` (pull images) -6. `docker compose up -d` (start app) -7. Update status → next app - -**Post-restore:** re-scan stacks, clear restoreMode, normal dashboard operation - -### Phase 3 — What was deployed - -**Controller changes** (`controller/`): -- `internal/backup/restore_scan.go` — **NEW** — `RestorePlan`, `RestorableApp`, `DriveInfo`, `InfraStackInfo` types; `ScanDrivesForBackups()` scans mount paths for cross-drive backup dirs, correlates with Hub manifest; `Snapshot()` for thread-safe JSON serialization; `UpdateApp()` for progress tracking -- `internal/backup/restore_app_linux.go` — **NEW** — `RestoreAppFromBackup()` restores a single app: rsyncs `_config/` to stack dir, verifies/restores user data, copies DB dumps, runs `docker compose pull && up -d` -- `internal/backup/restore_app_other.go` — **NEW** — non-Linux stub -- `internal/web/handler_restore.go` — **NEW** — `restorePageHandler()` renders DR page; `apiRestoreStatus()` returns plan+app statuses as JSON; `apiRestoreAll()` triggers sequential restore in goroutine; `apiRestoreSkip()` exits restore mode; `executeAllRestores()` drives the restore loop with per-app timeout -- `internal/web/templates/restore.html` — **NEW** — standalone full-page DR UI (no sidebar); shows customer info, drive status cards, app table with config/data/DB columns, progress bar, restore all / skip buttons; JS polling every 2s during restore -- `internal/web/server.go` — added `restorePlan *backup.RestorePlan` + `restoreMu`; `SetRestoreState()` and `InRestoreMode()` methods; route interception in `ServeHTTP()` redirects all non-static/non-restore routes to `/restore` when in restore mode -- `internal/web/funcmap.go` — added `statusText` template function (Hungarian labels for restore status codes) -- `cmd/controller/main.go` — after Phase 2 drive mount, builds `[]InfraStackInfo` from Hub data, calls `ScanDrivesForBackups()`, sets `restorePlan` metadata, calls `webServer.SetRestoreState()` - -### Phase 4 — What was deployed - -**Script changes:** -- `scripts/docker-setup.sh` — `print_summary()` now shows a "Disaster Recovery" block when `$CUSTOMER_ID` is set, informing the operator that the controller will automatically contact the Hub, mount drives, and offer restore - -**README updates:** -- `controller/README.md` — version bump to v0.15.5; repo layout updated with new DR files (restore_scan.go, restore_app_linux.go, restore_drives_linux.go, infra_pull.go, handler_restore.go); roadmap marks DR as completed -- Hub README (`felhom.eu/hub/README.md`) — already had complete DR documentation, no changes needed - ---- - -## Architecture - -### The problem (catch-22) - -When the system drive dies, the backup data lives on surviving HDDs. But a freshly -installed OS doesn't know about those drives — they aren't in `/etc/fstab`, aren't -mounted, and the controller can't scan them. Even if we stored mount info in the -local backup, we can't read the local backup without mounting the drives first. - -### The solution: Hub as infra backup store - -The Hub (`hub.felhom.eu`) is always reachable. During normal operation, the -controller pushes its infrastructure state to the Hub. On a fresh deployment: - -``` -[1] docker-setup.sh deploys controller with Hub details (customer_id + API key) -[2] Controller starts → detects empty data dir → "I'm a fresh deployment" -[3] Controller calls Hub: GET /api/v1/infra-backup/{customer_id} -[4] Hub responds with: disk layout, controller.yaml, manifest, restic passwords -[5] Controller scans /dev/ for disks matching stored UUIDs -[6] Controller mounts surviving drives (using its existing disk management) -[7] Local backups on mounted drives are now accessible -[8] Controller auto-restores stack configs → apps appear in dashboard -[9] User opens dashboard → "Restore from backup" wizard -[10] User confirms → controller restores data + starts apps -``` - -### Fallback: local-only detection - -If the Hub is unreachable (no internet, Hub down), the controller falls back to -scanning already-mounted drives for `_infra/manifest.json` — the existing local -backup path. This is less automated (drives must be manually mounted first) but -still works. - ---- - -## Data stored on Hub per customer - -The infra-backup payload is a single JSON blob (~20-50KB per customer): - -```json -{ - "customer_id": "demo-felhom", - "domain": "demo-felhom.eu", - "controller_version": "v0.15.5", - "timestamp": "2026-02-19T03:05:00Z", - - "controller_config_b64": "", - "settings_json_b64": "", - - "disk_layout": { - "mounts": [ - { - "uuid": "242ee4da-d9f8-40ce-b3fa-8e4860204790", - "label": "userdate", - "mount_point": "/mnt/sys_drive", - "fs_type": "ext4", - "size_bytes": 350073856000, - "fstab_options": "defaults,noatime", - "role": "system_data", - "bind_subdir": "", - "raw_mount": "" - }, - { - "uuid": "277a2179-a764-4758-b840-9ea741517914", - "label": "hdd_1", - "mount_point": "/mnt/hdd_1", - "fs_type": "ext4", - "size_bytes": 1000204886016, - "fstab_options": "defaults,nofail,noatime", - "role": "hdd_storage", - "bind_subdir": "felhom_data", - "raw_mount": "/mnt/.felhom-raw/hdd_1" - } - ] - }, - - "deployed_stacks": [ - { - "name": "immich", - "display_name": "Immich", - "hdd_path": "/mnt/hdd_1", - "needs_hdd": true - }, - { - "name": "docmost", - "display_name": "Docmost", - "hdd_path": "", - "needs_hdd": false - } - ], - - "restic_password": "base64-encoded-primary-restic-password", - "cross_drive_password": "hex-encoded-cross-drive-password" -} -``` - -**Security:** The Hub is operator-managed infrastructure. The connection is HTTPS -with Bearer token auth. The infra backup contains sensitive data (CF tokens, -restic passwords) but the Hub already receives all system health data. The -operator trusts the Hub with this data. - ---- - -## Phase 1: Hub infra-backup storage + controller push - -### 1A: Hub — new SQLite table - -**File:** `hub/internal/store/store.go` - -Add migration for a new table: - -```sql -CREATE TABLE IF NOT EXISTS infra_backups ( - customer_id TEXT PRIMARY KEY, - backup_json TEXT NOT NULL, - updated_at DATETIME NOT NULL DEFAULT (datetime('now')) -); -``` - -Add store methods: - -```go -// SaveInfraBackup upserts the infra backup for a customer. -func (s *Store) SaveInfraBackup(customerID string, backupJSON []byte) error { - _, err := s.db.Exec(` - INSERT INTO infra_backups (customer_id, backup_json, updated_at) - VALUES (?, ?, datetime('now')) - ON CONFLICT(customer_id) DO UPDATE SET - backup_json = excluded.backup_json, - updated_at = datetime('now') - `, customerID, string(backupJSON)) - return err -} - -// GetInfraBackup returns the infra backup for a customer, or nil if not found. -func (s *Store) GetInfraBackup(customerID string) ([]byte, error) { - var data string - err := s.db.QueryRow(` - SELECT backup_json FROM infra_backups WHERE customer_id = ? - `, customerID).Scan(&data) - if err == sql.ErrNoRows { - return nil, nil - } - if err != nil { - return nil, err - } - return []byte(data), nil -} -``` - -### 1B: Hub — new API endpoints - -**File:** `hub/internal/api/handler.go` - -Add two endpoints to the existing router: - -```go -// POST /api/v1/infra-backup -// Controller pushes its infrastructure snapshot to the Hub. -func (h *Handler) handleInfraBackupPush(w http.ResponseWriter, r *http.Request) { - // Read body (limit to 1MB) - body, err := io.ReadAll(io.LimitReader(r.Body, 1<<20)) - if err != nil { - writeJSON(w, http.StatusBadRequest, map[string]string{"status": "error", "error": "read body: " + err.Error()}) - return - } - - // Validate JSON structure — extract customer_id - var payload struct { - CustomerID string `json:"customer_id"` - } - if err := json.Unmarshal(body, &payload); err != nil || payload.CustomerID == "" { - writeJSON(w, http.StatusBadRequest, map[string]string{"status": "error", "error": "invalid payload or missing customer_id"}) - return - } - - if err := h.store.SaveInfraBackup(payload.CustomerID, body); err != nil { - writeJSON(w, http.StatusInternalServerError, map[string]string{"status": "error", "error": err.Error()}) - return - } - - h.logger.Printf("[INFO] Infra backup saved for %s (%d bytes)", payload.CustomerID, len(body)) - writeJSON(w, http.StatusOK, map[string]string{"status": "ok"}) -} - -// GET /api/v1/infra-backup/{customer_id} -// Fresh controller pulls the infra backup for its customer. -func (h *Handler) handleInfraBackupGet(w http.ResponseWriter, r *http.Request) { - customerID := strings.TrimPrefix(r.URL.Path, "/api/v1/infra-backup/") - if customerID == "" { - writeJSON(w, http.StatusBadRequest, map[string]string{"status": "error", "error": "missing customer_id"}) - return - } - - data, err := h.store.GetInfraBackup(customerID) - if err != nil { - writeJSON(w, http.StatusInternalServerError, map[string]string{"status": "error", "error": err.Error()}) - return - } - if data == nil { - writeJSON(w, http.StatusNotFound, map[string]string{"status": "error", "error": "no infra backup found"}) - return - } - - w.Header().Set("Content-Type", "application/json") - w.Write(data) -} -``` - -Register routes in the existing `ServeHTTP()` or router setup: - -```go -case r.Method == http.MethodPost && path == "/api/v1/infra-backup": - h.handleInfraBackupPush(w, r) -case r.Method == http.MethodGet && strings.HasPrefix(path, "/api/v1/infra-backup/"): - h.handleInfraBackupGet(w, r) -``` - -Both endpoints use the existing Bearer token auth (same `report_api_key`). - -### 1C: Hub — add infra backup info to dashboard - -**File:** `hub/internal/web/templates/customer.html` - -Add a section to the customer detail page showing infra backup status: - -```html - -
-

Infra Backup

- {{if .InfraBackup}} -

Last updated: {{.InfraBackupAge}} ago

-

Deployed stacks: {{.InfraBackupStackCount}}

-

Disks: {{.InfraBackupDiskCount}}

- {{else}} -

No infra backup received yet

- {{end}} -
-``` - -Add store method and web handler logic to load infra backup metadata for the -customer detail page. - -### 1D: Controller — push infra snapshot to Hub - -**File:** `controller/internal/report/infra_backup.go` (NEW) - -```go -package report - -import ( - "encoding/base64" - "encoding/json" - "os" - "time" - - "gitea.dooplex.hu/admin/felhom-controller/internal/backup" - "gitea.dooplex.hu/admin/felhom-controller/internal/settings" -) - -// InfraBackup is the payload pushed to the Hub for disaster recovery. -type InfraBackup struct { - CustomerID string `json:"customer_id"` - Domain string `json:"domain"` - ControllerVersion string `json:"controller_version"` - Timestamp string `json:"timestamp"` - - ControllerConfigB64 string `json:"controller_config_b64"` - SettingsJSONB64 string `json:"settings_json_b64,omitempty"` - - DiskLayout DiskLayout `json:"disk_layout"` - DeployedStacks []InfraStack `json:"deployed_stacks"` - - ResticPassword string `json:"restic_password,omitempty"` - CrossDrivePassword string `json:"cross_drive_password,omitempty"` -} - -type DiskLayout struct { - Mounts []DiskMount `json:"mounts"` -} - -type DiskMount struct { - UUID string `json:"uuid"` - Label string `json:"label"` - MountPoint string `json:"mount_point"` - FSType string `json:"fs_type"` - SizeBytes int64 `json:"size_bytes"` - FstabOptions string `json:"fstab_options"` - Role string `json:"role"` // "system_data", "hdd_storage", "root" - BindSubdir string `json:"bind_subdir"` // e.g., "felhom_data" for HDD bind mounts - RawMount string `json:"raw_mount"` // e.g., "/mnt/.felhom-raw/hdd_1" -} - -type InfraStack struct { - Name string `json:"name"` - DisplayName string `json:"display_name"` - HDDPath string `json:"hdd_path,omitempty"` - NeedsHDD bool `json:"needs_hdd"` -} - -// BuildInfraBackup collects all infrastructure state for Hub backup. -func BuildInfraBackup( - customerID, domain, version string, - controllerYAMLPath string, - settingsPath string, - resticPasswordFile string, - sett *settings.Settings, - stackProvider backup.StackDataProvider, -) (*InfraBackup, error) { - ib := &InfraBackup{ - CustomerID: customerID, - Domain: domain, - ControllerVersion: version, - Timestamp: time.Now().UTC().Format(time.RFC3339), - } - - // Read and encode controller.yaml - if data, err := os.ReadFile(controllerYAMLPath); err == nil { - ib.ControllerConfigB64 = base64.StdEncoding.EncodeToString(data) - } - - // Read and encode settings.json - if data, err := os.ReadFile(settingsPath); err == nil { - ib.SettingsJSONB64 = base64.StdEncoding.EncodeToString(data) - } - - // Read restic password - if data, err := os.ReadFile(resticPasswordFile); err == nil { - ib.ResticPassword = base64.StdEncoding.EncodeToString(data) - } - - // Read cross-drive password - if pw := sett.GetCrossDriveResticPassword(); pw != "" { - ib.CrossDrivePassword = pw - } - - // Collect disk layout (see implementation note below) - ib.DiskLayout = collectDiskLayout() - - // Collect deployed stacks - deployed := stackProvider.ListDeployedStacks() - for _, s := range deployed { - ib.DeployedStacks = append(ib.DeployedStacks, InfraStack{ - Name: s.Name, - DisplayName: s.DisplayName, - HDDPath: stackProvider.GetStackHDDPath(s.Name), - NeedsHDD: s.NeedsHDD, - }) - } - - return ib, nil -} - -// collectDiskLayout reads /etc/fstab and lsblk to build the disk layout. -// This runs inside the container which has /host-fstab mounted and access to -// /host-dev/ for block device info. -func collectDiskLayout() DiskLayout { - // Implementation: parse /host-fstab (mounted from host /etc/fstab) - // and correlate with lsblk -J output. - // - // The controller already has disk management code in internal/stacks/ - // or similar — reuse the existing lsblk parsing. - // - // For each non-root, non-swap, non-boot mount in fstab: - // - Extract UUID, mount point, fs_type, options - // - Detect role: "system_data" if mount_point matches system_data_path, - // "hdd_storage" if it's under /mnt/.felhom-raw/ or /mnt/hdd_* - // - Detect bind mounts (type=none, options contain "bind") - // - Get size from lsblk - // - // Return the DiskLayout struct. - // - // See the detailed implementation note in the "Implementation details" section. - return DiskLayout{} -} -``` - -### 1E: Controller — push infra backup after each backup cycle - -**File:** `controller/cmd/controller/main.go` - -Add the infra backup push to the backup scheduler (after Tier1 + Tier2 complete): - -```go -// In the "backup" daily scheduler: -sched.Daily("backup", cfg.Backup.ResticSchedule, func(ctx context.Context) error { - err := backupMgr.RunBackup(ctx) - crossDriveRunner.RunAllScheduled(ctx, "daily") - if time.Now().Weekday() == time.Sunday { - crossDriveRunner.RunAllScheduled(ctx, "weekly") - } - - // NEW: Push infra backup to Hub - if hubPusher != nil && cfg.Hub.Enabled { - go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger) - } - - return err -}) -``` - -```go -func pushInfraBackup(cfg *config.Config, sett *settings.Settings, - stackProv backup.StackDataProvider, pusher *report.Pusher, logger *log.Logger) { - - ib, err := report.BuildInfraBackup( - cfg.Customer.ID, cfg.Customer.Domain, Version, - "/opt/docker/felhom-controller/controller.yaml", - filepath.Join(cfg.Paths.DataDir, "settings.json"), - cfg.Backup.ResticPasswordFile, - sett, stackProv, - ) - if err != nil { - logger.Printf("[WARN] Failed to build infra backup: %v", err) - return - } - - data, err := json.Marshal(ib) - if err != nil { - logger.Printf("[WARN] Failed to marshal infra backup: %v", err) - return - } - - if err := pusher.PushInfraBackup(data); err != nil { - logger.Printf("[WARN] Failed to push infra backup to Hub: %v", err) - } else { - logger.Printf("[INFO] Infra backup pushed to Hub (%d bytes)", len(data)) - } -} -``` - -### 1F: Controller — add `PushInfraBackup` to Pusher - -**File:** `controller/internal/report/pusher.go` - -Add a new method alongside the existing `Push()`: - -```go -// PushInfraBackup sends the infrastructure backup to the Hub. -func (p *Pusher) PushInfraBackup(data []byte) error { - if !p.enabled { - return nil - } - - url := p.hubURL + "/api/v1/infra-backup" - - var lastErr error - for attempt := 0; attempt < 3; attempt++ { - if attempt > 0 { - time.Sleep(5 * time.Second) - } - - req, err := http.NewRequest(http.MethodPost, url, bytes.NewReader(data)) - if err != nil { - lastErr = err - continue - } - req.Header.Set("Content-Type", "application/json") - if p.apiKey != "" { - req.Header.Set("Authorization", "Bearer "+p.apiKey) - } - - resp, err := p.httpClient.Do(req) - if err != nil { - lastErr = err - continue - } - io.Copy(io.Discard, resp.Body) - resp.Body.Close() - - if resp.StatusCode >= 200 && resp.StatusCode < 300 { - return nil - } - lastErr = fmt.Errorf("HTTP %d", resp.StatusCode) - } - - return fmt.Errorf("infra backup push failed after 3 attempts: %w", lastErr) -} -``` - ---- - -## Phase 2: New-deployment detection + Hub pull + auto-mount - -### 2A: Controller — detect fresh deployment - -**File:** `controller/cmd/controller/main.go` - -The controller uses a Docker named volume (`controller-data`) at -`/opt/docker/felhom-controller/data`. On a fresh deployment, this volume is -empty — no `settings.json`, no `session_secret`, no `snapshot-history.json`. - -Add detection after settings initialization: - -```go -// Detect fresh deployment (empty data directory = new install) -isFreshDeployment := !fileExists(filepath.Join(cfg.Paths.DataDir, "settings.json")) - -if isFreshDeployment { - logger.Println("[INFO] Fresh deployment detected — checking Hub for infra backup") - - // Write a marker so we don't re-trigger on next restart - // (settings.json will be created by Settings.save() soon anyway) -} -``` - -**Important:** The marker to distinguish "fresh" from "restarted" is the absence -of `settings.json`. Once the Settings package creates it (on first save), subsequent -restarts won't trigger the fresh-deployment path. - -### 2B: Controller — pull infra backup from Hub - -**File:** `controller/internal/report/infra_pull.go` (NEW) - -```go -package report - -import ( - "encoding/json" - "fmt" - "io" - "net/http" - "time" -) - -// PullInfraBackup fetches the infrastructure backup from the Hub. -// Returns nil, nil if no backup exists for this customer. -func PullInfraBackup(hubURL, apiKey, customerID string) (*InfraBackup, error) { - url := hubURL + "/api/v1/infra-backup/" + customerID - - client := &http.Client{Timeout: 30 * time.Second} - - req, err := http.NewRequest(http.MethodGet, url, nil) - if err != nil { - return nil, err - } - if apiKey != "" { - req.Header.Set("Authorization", "Bearer "+apiKey) - } - - resp, err := client.Do(req) - if err != nil { - return nil, fmt.Errorf("hub request failed: %w", err) - } - defer resp.Body.Close() - - if resp.StatusCode == http.StatusNotFound { - return nil, nil // no backup for this customer - } - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("hub returned HTTP %d", resp.StatusCode) - } - - body, err := io.ReadAll(io.LimitReader(resp.Body, 5<<20)) // 5MB limit - if err != nil { - return nil, fmt.Errorf("reading response: %w", err) - } - - var ib InfraBackup - if err := json.Unmarshal(body, &ib); err != nil { - return nil, fmt.Errorf("parsing infra backup: %w", err) - } - - return &ib, nil -} -``` - -### 2C: Controller — auto-mount drives from Hub disk layout - -**File:** `controller/internal/backup/restore_drives.go` (NEW) - -```go -package backup - -import ( - "context" - "encoding/json" - "fmt" - "log" - "os" - "os/exec" - "path/filepath" - "strings" - - "gitea.dooplex.hu/admin/felhom-controller/internal/report" -) - -// MountDrivesFromLayout scans block devices for disks matching the Hub's -// stored disk layout and mounts them. Uses the controller's existing -// two-layer mount pattern: raw mount → bind mount. -// -// The controller container has: -// - /host-dev:/dev (rw) — block device access -// - /host-fstab:/etc/fstab — can update fstab -// - privileged: true — can mount filesystems -// -// Returns the list of successfully mounted paths. -func MountDrivesFromLayout(ctx context.Context, layout report.DiskLayout, logger *log.Logger) ([]string, error) { - // 1. Get current block devices with UUIDs - lsblkDevices, err := getLsblkDevices(ctx) - if err != nil { - return nil, fmt.Errorf("scanning block devices: %w", err) - } - - var mounted []string - - for _, diskMount := range layout.Mounts { - if diskMount.UUID == "" { - continue - } - - // Skip system partitions (root, boot, swap) - if diskMount.Role == "root" || diskMount.Role == "boot" || diskMount.Role == "swap" { - continue - } - - // Find matching device by UUID - device := findDeviceByUUID(lsblkDevices, diskMount.UUID) - if device == "" { - logger.Printf("[WARN] Disk UUID %s (%s) not found — drive may be missing", - diskMount.UUID, diskMount.Label) - continue - } - - // Check if already mounted - if isMounted(diskMount.MountPoint) || isMounted(diskMount.RawMount) { - logger.Printf("[INFO] %s already mounted", diskMount.MountPoint) - mounted = append(mounted, diskMount.MountPoint) - continue - } - - logger.Printf("[INFO] Found disk %s (UUID=%s, label=%s) — mounting to %s", - device, diskMount.UUID[:12], diskMount.Label, diskMount.MountPoint) - - // Mount using the felhom two-layer pattern: - // Layer 1: raw mount → /mnt/.felhom-raw/