Files
felhom-controller/controller/internal/web/backup_handlers.go
T
admin 63484a0bd4 v0.51.0: offsite-backup UI (felhom-pbs DR) + Model-A double-nest fix
- Backups page: whole-guest backup shown as real DR — target label "Biztonsági szerver –
  külön hardver (PBS)"; app-data "Távoli mentés" card now reflects the PBS offsite tier
  (guestBackupView.Offsite) instead of "nincs beállítva".
- Model-A double-nest fix: appbackup path helpers take a felhom-data NAMESPACE ROOT (no
  internal felhom-data join); backup.Manager.namespaceRoot/AppNamespaceRoot resolve
  HDD-vs-systemDataPath provenance so a drive-resident app's backups land single-nested
  (<drive>/backups/... on the guest = <drive>/felhom-data/backups/... on the host) instead
  of .../felhom-data/felhom-data/.... Writes, deletion (GetStackBackupData/RemoveStack/
  ProtectedHDDPaths), wipe-warning scan, and export updated coherently; legacy double-nest
  dirs kept protected. New appbackup test asserts no doubled segment.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-12 20:26:52 +02:00

173 lines
6.4 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package web
import (
"context"
"errors"
"net/http"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/agentapi"
"gitea.dooplex.hu/admin/felhom-controller/internal/quiesce"
)
// Whole-guest backup visibility + manual trigger (spec Part 2). The agent owns whole-guest
// vzdump/PBS backup; the controller is a read-only window onto it (GET /backup/{status,due},
// /restore-test/status) plus a "Mentés most" trigger that goes through the quiesce loop (the
// CONTROLLER owns quiescing — stop stacks → POST /backup → resume — so the captured state is
// app-consistent, not the agent's crash-consistent default). Cadence/retention CONFIG is NOT here
// (hub-served policy, slice 10).
// guestBackupView is the template payload for the "Rendszermentés" section. Times are time.Time so
// the existing fmtTime/timeAgo funcmap helpers format them; size is int64 for fmtBytes.
type guestBackupView struct {
Available bool // agent reachable + a status read succeeded
Note string // shown when not Available (unprovisioned / unreachable)
Phase string // idle | running | snapshotted | done | failed
Running bool // a backup job is in progress now
HasBackup bool
Success bool
StartedAt time.Time
SizeBytes int64
Target string // human label: "Biztonsági szerver külön hardver (PBS)" / "Helyi tároló (local)"
Offsite bool // the whole-guest backup landed on the PBS offsite tier (separate hardware)
Archive string
Mode string // snapshot | stop
StopMode bool // mode == stop → full app downtime during the backup (warn)
Due bool
DueReason string
AgeHours int64 // age of the newest successful backup, hours (for "X órája")
HasRestoreTest bool
RestorePass bool
RestoreVerified string
RestoreTestedAt time.Time
CanTrigger bool // a backup trigger (quiesce loop) is wired
}
// loadGuestBackup fetches the agent's whole-guest backup view (best-effort). Returns a view with
// Available=false (+ a note) when the agent isn't configured/reachable — the page still renders.
func (s *Server) loadGuestBackup(ctx context.Context) *guestBackupView {
v := &guestBackupView{CanTrigger: s.backupTrigger != nil}
client, err := s.agentClient()
if err != nil {
v.Note = "A host-ügynök nincs konfigurálva ezen a gépen."
return v
}
st, err := client.BackupStatus(ctx)
if err != nil {
v.Note = "A host-ügynök jelenleg nem elérhető."
return v
}
v.Available = true
v.Phase = st.Phase
v.Running = st.Phase == agentapi.PhaseRunning || st.Phase == "snapshotted"
if st.Backup != nil {
v.HasBackup = true
v.Success = st.Backup.Success
v.SizeBytes = st.Backup.SizeBytes
v.Archive = st.Backup.Archive
v.Mode = st.Backup.Mode
v.StopMode = st.Backup.Mode == "stop"
v.Target = backupTargetLabel(st.Backup)
v.Offsite = backupIsPBS(st.Backup)
if t, perr := time.Parse(time.RFC3339, st.Backup.StartedAt); perr == nil {
v.StartedAt = t
}
}
// Due window (best-effort; a failure just leaves the due fields zero).
if due, derr := client.BackupDue(ctx); derr == nil {
v.Due = due.Due
v.DueReason = due.Reason
if due.AgeSecs != nil {
v.AgeHours = *due.AgeSecs / 3600
}
}
// Restore-test (the "verified restorable" trust signal; nil until one runs).
if rt, rerr := client.RestoreTestStatus(ctx); rerr == nil && rt != nil {
v.HasRestoreTest = true
v.RestorePass = rt.Pass
v.RestoreVerified = rt.Verified
if t, perr := time.Parse(time.RFC3339, rt.TestedAt); perr == nil {
v.RestoreTestedAt = t
}
}
return v
}
// backupIsPBS reports whether a whole-guest backup landed on the PBS offsite tier (separate
// hardware), inferred from the target id / archive volid ("felhom-pbs"/"pbs:" ⇒ PBS).
func backupIsPBS(b *agentapi.BackupRecord) bool {
id := strings.ToLower(b.TargetID)
arc := strings.ToLower(b.Archive)
return strings.Contains(id, "pbs") || strings.HasPrefix(arc, "felhom-pbs") || strings.Contains(arc, "pbs:")
}
// backupTargetLabel maps the agent's backup target to a customer-facing Hungarian label. The PBS
// case calls out that the backup is on SEPARATE HARDWARE (real disaster recovery — survives a host
// disk/hardware failure), which is the whole point of re-pointing the backup offsite.
func backupTargetLabel(b *agentapi.BackupRecord) string {
if backupIsPBS(b) {
return "Biztonsági szerver külön hardver (PBS)"
}
if b.TargetID != "" {
return "Helyi tároló (" + b.TargetID + ")"
}
return "Helyi tároló"
}
// ServeBackupAPI dispatches /api/guest-backup/* (whole-guest manual trigger + status poll). A
// distinct prefix from apiRouter's app-data /api/backup/{run,status}. Wired behind RequireAuth +
// CsrfProtect in main.go.
func (s *Server) ServeBackupAPI(w http.ResponseWriter, r *http.Request) {
switch {
case r.URL.Path == "/api/guest-backup/trigger" && r.Method == http.MethodPost:
s.handleBackupTriggerAPI(w, r)
case r.URL.Path == "/api/guest-backup/status" && r.Method == http.MethodGet:
s.handleBackupStatusAPI(w, r)
default:
http.NotFound(w, r)
}
}
// handleBackupTriggerAPI starts an app-consistent whole-guest backup NOW via the quiesce loop. It
// returns immediately (the backup runs async, minutes); the page polls /api/backup/status.
func (s *Server) handleBackupTriggerAPI(w http.ResponseWriter, r *http.Request) {
if s.backupTrigger == nil {
writeDiskJSON(w, http.StatusServiceUnavailable, false, "a rendszermentés nem érhető el ezen a gépen", nil)
return
}
if err := s.backupTrigger.TriggerNow(); err != nil {
if errors.Is(err, quiesce.ErrBackupInProgress) {
writeDiskJSON(w, http.StatusConflict, false, "mentés már folyamatban van", nil)
return
}
s.logger.Printf("[ERROR] [web] backup trigger failed: %v", err)
writeDiskJSON(w, http.StatusBadGateway, false, err.Error(), nil)
return
}
s.logger.Printf("[INFO] [web] manual whole-guest backup triggered (quiesce loop)")
writeDiskJSON(w, http.StatusOK, true, "", map[string]any{"started": true})
}
// handleBackupStatusAPI proxies the agent's GET /backup/status for the page's progress poll.
func (s *Server) handleBackupStatusAPI(w http.ResponseWriter, r *http.Request) {
client, err := s.agentClient()
if err != nil {
writeDiskJSON(w, http.StatusServiceUnavailable, false, err.Error(), nil)
return
}
st, err := client.BackupStatus(r.Context())
if err != nil {
writeDiskJSON(w, http.StatusBadGateway, false, err.Error(), nil)
return
}
writeDiskJSON(w, http.StatusOK, true, "", map[string]any{
"phase": st.Phase, "job_id": st.JobID, "error": st.Error, "backup": st.Backup,
})
}