13c6a0929a
Part A of the UI-fixes/storage-spike spec.
A1: enrichHostStorageTargets sorts /api/host-metrics storage_targets
server-side and attaches friendly Hungarian labels + purpose, fixing the
#host-storage-bars reorder-on-poll bug. Display labels only — PVE storage
ids are never renamed.
A2: new GET/POST /stacks/{name}/backup Tier-2 config panel; the "2. mentés"
Beállítás button is repointed there from the dead-end deploy page. Customer
can pin a target drive or disable Tier 2; preference is preserved across the
runner's status writes. Always visible (single-SSD + non-HDD apps included).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
394 lines
15 KiB
Go
394 lines
15 KiB
Go
package backup
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
|
)
|
|
|
|
// Tier 2 = an off-drive (different physical disk) copy of an HDD app's recovery unit + bulk userdata.
|
|
// It is the ONLY off-drive protection that browsable HDD userdata can get — PBS can't reach bind
|
|
// mounts. Auto-enabled for every HDD app; the target is auto-picked: prefer another registered
|
|
// user-data drive (can hold bulk), else the internal SSD for SMALL units only — and the SSD is the
|
|
// guest rootfs (~8 GB), so we REFUSE rather than fill it (a size-aware headroom guard). When no
|
|
// off-drive target fits, we record an honest "needs a 2nd HDD" status instead of silently doing
|
|
// nothing useful.
|
|
|
|
const gibibyte = 1024 * 1024 * 1024
|
|
|
|
var (
|
|
errNoOffDiskTarget = errors.New("no off-drive target (single drive, app already on the system disk)")
|
|
errSSDNoHeadroom = errors.New("the internal SSD lacks headroom for this app's data — a 2nd drive is required for off-drive backup")
|
|
)
|
|
|
|
// Tier2Target is a resolved off-drive destination for an app's Tier 2 copy.
|
|
type Tier2Target struct {
|
|
NamespaceRoot string // felhom-data namespace root on the target drive
|
|
Label string // human label (UI)
|
|
IsSystemDrive bool // target is the internal SSD/system drive (DB/config only)
|
|
Reason string // why this target (Hungarian, for UI/logs)
|
|
}
|
|
|
|
// tier2FitsHeadroom reports whether a unit of unitGB fits on a system/rootfs drive while leaving a
|
|
// reserve free. Reserve = max(2 GB, 20% of total) — this is what protects the small (~8 GB) guest
|
|
// rootfs from being filled by a Tier 2 copy. Pure function (unit-tested).
|
|
func tier2FitsHeadroom(availGB, totalGB, unitGB float64) bool {
|
|
reserve := totalGB * 0.20
|
|
if reserve < 2.0 {
|
|
reserve = 2.0
|
|
}
|
|
return (availGB - unitGB) >= reserve
|
|
}
|
|
|
|
// selectTier2Target picks the off-drive destination for an app's Tier 2 copy. A customer-pinned
|
|
// target (PreferredTarget, set from the config panel) wins when it is still valid; otherwise it
|
|
// auto-picks: another user-data drive, else the internal SSD for small units (headroom-guarded).
|
|
func (m *Manager) selectTier2Target(stackName string, unitSizeBytes int64) (*Tier2Target, error) {
|
|
sourceDrive := m.GetAppDrivePath(stackName)
|
|
if sourceDrive == "" {
|
|
return nil, fmt.Errorf("no source drive for %s", stackName)
|
|
}
|
|
|
|
// 0. Honor a customer-pinned target if it is still valid (registered, schedulable, off-disk).
|
|
// An invalid pin (gone / same physical disk) silently falls through to the auto-pick.
|
|
if m.settings != nil {
|
|
if cd := m.settings.GetCrossDriveConfig(stackName); cd != nil && cd.PreferredTarget != "" {
|
|
for _, sp := range m.settings.GetSchedulableStoragePaths() {
|
|
if sp.Path != cd.PreferredTarget {
|
|
continue
|
|
}
|
|
if sp.Path == sourceDrive || system.SamePhysicalDevice(sourceDrive, sp.Path) {
|
|
break // pinned target is on the same physical disk — not off-drive; fall through
|
|
}
|
|
label := sp.Label
|
|
if label == "" {
|
|
label = filepath.Base(sp.Path)
|
|
}
|
|
return &Tier2Target{
|
|
NamespaceRoot: NamespaceRoot(sp.Path, true),
|
|
Label: label,
|
|
IsSystemDrive: false,
|
|
Reason: "kézi választás",
|
|
}, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// 1. Prefer another registered user-data drive on a DIFFERENT physical disk (can hold bulk userdata).
|
|
if m.settings != nil {
|
|
for _, sp := range m.settings.GetSchedulableStoragePaths() {
|
|
if sp.Path == sourceDrive || system.SamePhysicalDevice(sourceDrive, sp.Path) {
|
|
continue
|
|
}
|
|
label := sp.Label
|
|
if label == "" {
|
|
label = filepath.Base(sp.Path)
|
|
}
|
|
return &Tier2Target{
|
|
NamespaceRoot: NamespaceRoot(sp.Path, true), // Model A: in-guest mount IS the namespace root
|
|
Label: label,
|
|
IsSystemDrive: false,
|
|
Reason: "másik adatmeghajtó",
|
|
}, nil
|
|
}
|
|
}
|
|
|
|
// 2. Fall back to the internal SSD (system data path) — SMALL units only.
|
|
sys := m.systemDataPath
|
|
if sys == "" || system.SamePhysicalDevice(sourceDrive, sys) {
|
|
return nil, errNoOffDiskTarget // single drive / app already on the system disk
|
|
}
|
|
if !m.tier2FitsSystemDrive(sys, unitSizeBytes) {
|
|
return nil, errSSDNoHeadroom // would fill the ~8 GB rootfs — refuse, don't fill
|
|
}
|
|
return &Tier2Target{
|
|
NamespaceRoot: NamespaceRoot(sys, false), // system path is a real root → felhom-data appended
|
|
Label: "belső SSD (rendszer)",
|
|
IsSystemDrive: true,
|
|
Reason: "nincs 2. adatmeghajtó — csak az adatbázis/konfiguráció fér a belső SSD-re; a nagy fájlokhoz 2. meghajtó kell",
|
|
}, nil
|
|
}
|
|
|
|
// tier2FitsSystemDrive checks the size-aware rootfs-headroom guard for the SSD target.
|
|
func (m *Manager) tier2FitsSystemDrive(sys string, unitSizeBytes int64) bool {
|
|
di := system.GetDiskUsage(sys)
|
|
if di == nil {
|
|
return false // can't determine free space → refuse (fail-closed for the rootfs)
|
|
}
|
|
return tier2FitsHeadroom(di.AvailGB, di.TotalGB, float64(unitSizeBytes)/gibibyte)
|
|
}
|
|
|
|
// RunTier2 makes/refreshes the off-drive copy of a single HDD app's recovery unit + userdata.
|
|
// Best-effort and idempotent (rsync mirror). Records status into settings for the UI; returns an
|
|
// error only on an actual copy failure (no valid target is a recorded status, not an error).
|
|
func (m *Manager) RunTier2(stackName string) error {
|
|
// Customer turned Tier 2 off for this app (config panel) — skip without touching status.
|
|
if m.settings != nil {
|
|
if cd := m.settings.GetCrossDriveConfig(stackName); cd != nil && cd.UserDisabled {
|
|
m.logger.Printf("[INFO] [backup] Tier 2 for %s skipped — disabled by customer", stackName)
|
|
return nil
|
|
}
|
|
}
|
|
sourceDrive := m.GetAppDrivePath(stackName)
|
|
if sourceDrive == "" {
|
|
return fmt.Errorf("no source drive for %s", stackName)
|
|
}
|
|
sourceNsRoot := m.namespaceRoot(sourceDrive)
|
|
unitDir := RecoveryUnitPath(sourceNsRoot, stackName)
|
|
appDataDir := AppDataDir(sourceNsRoot, stackName)
|
|
if _, err := os.Stat(unitDir); err != nil {
|
|
return nil // no recovery unit yet — nothing to copy
|
|
}
|
|
|
|
unitSize := dirSizeBytes(unitDir) + dirSizeBytes(appDataDir)
|
|
|
|
target, err := m.selectTier2Target(stackName, unitSize)
|
|
if err != nil {
|
|
reason := tier2NoTargetReason(err)
|
|
m.recordTier2NoTarget(stackName, reason)
|
|
m.logger.Printf("[INFO] [backup] Tier 2 for %s: no off-drive target — %s", stackName, reason)
|
|
return nil
|
|
}
|
|
// Defense-in-depth off-drive guard (selection already enforced it).
|
|
if system.SamePhysicalDevice(sourceDrive, target.NamespaceRoot) {
|
|
m.recordTier2NoTarget(stackName, "a kiválasztott cél ugyanazon a fizikai lemezen van")
|
|
return nil
|
|
}
|
|
|
|
destBase := filepath.Join(target.NamespaceRoot, "backups", "secondary", stackName)
|
|
start := time.Now()
|
|
|
|
if err := rsyncMirror(unitDir, filepath.Join(destBase, "recovery-unit")); err != nil {
|
|
m.recordTier2Failure(stackName, target, err)
|
|
if m.tier2Notify != nil {
|
|
m.tier2Notify(stackName, target.Label, time.Since(start), err)
|
|
}
|
|
return fmt.Errorf("tier2 rsync unit for %s: %w", stackName, err)
|
|
}
|
|
if _, e := os.Stat(appDataDir); e == nil {
|
|
if err := rsyncMirror(appDataDir, filepath.Join(destBase, "appdata")); err != nil {
|
|
m.recordTier2Failure(stackName, target, err)
|
|
if m.tier2Notify != nil {
|
|
m.tier2Notify(stackName, target.Label, time.Since(start), err)
|
|
}
|
|
return fmt.Errorf("tier2 rsync appdata for %s: %w", stackName, err)
|
|
}
|
|
}
|
|
|
|
dur := time.Since(start)
|
|
m.recordTier2Success(stackName, target, unitSize, dur)
|
|
if m.tier2Notify != nil {
|
|
m.tier2Notify(stackName, target.Label, dur, nil)
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Tier 2 copied %s → %s (%s, %s)%s",
|
|
stackName, destBase, humanizeBytes(unitSize), dur.Round(time.Second),
|
|
map[bool]string{true: " [SSD: DB/config only]", false: ""}[target.IsSystemDrive])
|
|
return nil
|
|
}
|
|
|
|
// RunAllTier2 runs Tier 2 for every deployed HDD app (apps whose data lives on an external drive —
|
|
// non-HDD apps live on the rootfs and are already inside the PBS whole-guest snapshot).
|
|
func (m *Manager) RunAllTier2() {
|
|
if m.stackProvider == nil {
|
|
return
|
|
}
|
|
var n int
|
|
for _, stack := range m.stackProvider.ListDeployedStacks() {
|
|
if m.stackProvider.GetStackHDDPath(stack.Name) == "" {
|
|
continue // not an HDD app — its data is on the rootfs, covered by PBS
|
|
}
|
|
if m.settings != nil && (m.settings.IsDisconnected(m.GetAppDrivePath(stack.Name)) ||
|
|
m.settings.IsDecommissioned(m.GetAppDrivePath(stack.Name))) {
|
|
continue
|
|
}
|
|
if err := m.RunTier2(stack.Name); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Tier 2 failed for %s: %v", stack.Name, err)
|
|
}
|
|
n++
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Tier 2 run complete: %d HDD app(s) processed", n)
|
|
}
|
|
|
|
// --- per-app config-panel view (drives the Tier-2 "Beállítás" page) ---
|
|
|
|
// Tier2Option is one selectable off-drive destination in the config panel.
|
|
type Tier2Option struct {
|
|
Path string // registered storage path (the value persisted as PreferredTarget)
|
|
Label string // human label for the dropdown
|
|
}
|
|
|
|
// Tier2Info is the per-app Tier-2 view the config panel renders. It exposes the effective target
|
|
// (pinned or auto), whether that is the size-limited internal SSD, the honest no-target reason, and
|
|
// the off-disk drives the customer may pin — so the control is meaningful even with a single target.
|
|
type Tier2Info struct {
|
|
IsHDDApp bool // false = the app lives on the rootfs (already inside the PBS whole-guest snapshot)
|
|
SourceDrive string // where the app's data currently lives
|
|
Disabled bool // customer turned Tier 2 off
|
|
Preferred string // customer-pinned target path ("" = automatic)
|
|
EffectiveLabel string // label of the target that WOULD be used right now
|
|
EffectiveIsSSD bool // the effective target is the internal SSD (DB/config only)
|
|
EffectiveDesc string // why this target (Hungarian)
|
|
NoTarget bool // no off-drive target fits at all
|
|
NoTargetReason string // honest reason when NoTarget
|
|
Alternatives []Tier2Option
|
|
}
|
|
|
|
// Tier2Info builds the config-panel view for one app. Read-only (no status writes).
|
|
func (m *Manager) Tier2Info(stackName string) Tier2Info {
|
|
var info Tier2Info
|
|
if m.stackProvider != nil {
|
|
info.IsHDDApp = m.stackProvider.GetStackHDDPath(stackName) != ""
|
|
}
|
|
source := m.GetAppDrivePath(stackName)
|
|
info.SourceDrive = source
|
|
|
|
if m.settings != nil {
|
|
if cd := m.settings.GetCrossDriveConfig(stackName); cd != nil {
|
|
info.Disabled = cd.UserDisabled
|
|
info.Preferred = cd.PreferredTarget
|
|
}
|
|
// Eligible alternative drives: registered, schedulable, on a DIFFERENT physical disk.
|
|
for _, sp := range m.settings.GetSchedulableStoragePaths() {
|
|
if sp.Path == source || system.SamePhysicalDevice(source, sp.Path) {
|
|
continue
|
|
}
|
|
label := sp.Label
|
|
if label == "" {
|
|
label = filepath.Base(sp.Path)
|
|
}
|
|
info.Alternatives = append(info.Alternatives, Tier2Option{Path: sp.Path, Label: label})
|
|
}
|
|
}
|
|
|
|
// Resolve what the runner WOULD pick right now (real unit size feeds the SSD headroom guard).
|
|
sourceNsRoot := m.namespaceRoot(source)
|
|
unitSize := dirSizeBytes(RecoveryUnitPath(sourceNsRoot, stackName)) + dirSizeBytes(AppDataDir(sourceNsRoot, stackName))
|
|
target, err := m.selectTier2Target(stackName, unitSize)
|
|
if err != nil {
|
|
info.NoTarget = true
|
|
info.NoTargetReason = tier2NoTargetReason(err)
|
|
return info
|
|
}
|
|
info.EffectiveLabel = target.Label
|
|
info.EffectiveIsSSD = target.IsSystemDrive
|
|
info.EffectiveDesc = target.Reason
|
|
return info
|
|
}
|
|
|
|
// --- status persistence (drives the "2. mentés" UI card) ---
|
|
|
|
// withTier2Prefs carries the customer-preference fields (UserDisabled/PreferredTarget) from any
|
|
// existing config into a freshly-built status struct, so a runner status write never clobbers them.
|
|
func (m *Manager) withTier2Prefs(stackName string, cfg *settings.CrossDriveBackup) *settings.CrossDriveBackup {
|
|
if m.settings != nil {
|
|
if existing := m.settings.GetCrossDriveConfig(stackName); existing != nil {
|
|
cfg.UserDisabled = existing.UserDisabled
|
|
cfg.PreferredTarget = existing.PreferredTarget
|
|
}
|
|
}
|
|
return cfg
|
|
}
|
|
|
|
func (m *Manager) recordTier2Success(stackName string, target *Tier2Target, sizeBytes int64, dur time.Duration) {
|
|
if m.settings == nil {
|
|
return
|
|
}
|
|
_ = m.settings.SetCrossDriveConfig(stackName, m.withTier2Prefs(stackName, &settings.CrossDriveBackup{
|
|
Enabled: true,
|
|
Method: "rsync",
|
|
DestinationPath: target.NamespaceRoot,
|
|
Schedule: "daily",
|
|
LastRun: time.Now().Format(time.RFC3339),
|
|
LastStatus: "ok",
|
|
LastDuration: dur.Round(time.Second).String(),
|
|
LastSizeHuman: humanizeBytes(sizeBytes),
|
|
}))
|
|
}
|
|
|
|
func (m *Manager) recordTier2Failure(stackName string, target *Tier2Target, cause error) {
|
|
if m.settings == nil {
|
|
return
|
|
}
|
|
_ = m.settings.SetCrossDriveConfig(stackName, m.withTier2Prefs(stackName, &settings.CrossDriveBackup{
|
|
Enabled: true,
|
|
Method: "rsync",
|
|
DestinationPath: target.NamespaceRoot,
|
|
Schedule: "daily",
|
|
LastRun: time.Now().Format(time.RFC3339),
|
|
LastStatus: "error",
|
|
LastError: cause.Error(),
|
|
}))
|
|
}
|
|
|
|
func (m *Manager) recordTier2NoTarget(stackName, reason string) {
|
|
if m.settings == nil {
|
|
return
|
|
}
|
|
_ = m.settings.SetCrossDriveConfig(stackName, m.withTier2Prefs(stackName, &settings.CrossDriveBackup{
|
|
Enabled: false,
|
|
Method: "rsync",
|
|
Schedule: "daily",
|
|
LastStatus: "no_target",
|
|
LastError: reason,
|
|
}))
|
|
}
|
|
|
|
func tier2NoTargetReason(err error) string {
|
|
switch {
|
|
case errors.Is(err, errSSDNoHeadroom):
|
|
return "nincs elég hely a belső SSD-n — a nagy fájlok off-drive mentéséhez 2. meghajtó (vagy távoli tárhely) szükséges"
|
|
case errors.Is(err, errNoOffDiskTarget):
|
|
return "nincs másik fizikai meghajtó — a 2. mentéshez 2. meghajtó szükséges"
|
|
default:
|
|
return err.Error()
|
|
}
|
|
}
|
|
|
|
// --- helpers ---
|
|
|
|
// rsyncMirror mirrors src→dst with rsync -a --delete (exact copy, browsable on disk, no versioning).
|
|
func rsyncMirror(src, dst string) error {
|
|
if err := os.MkdirAll(dst, 0755); err != nil {
|
|
return fmt.Errorf("mkdir %s: %w", dst, err)
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute)
|
|
defer cancel()
|
|
// Trailing slashes: copy the CONTENTS of src into dst.
|
|
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", strings.TrimRight(src, "/")+"/", strings.TrimRight(dst, "/")+"/")
|
|
out, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf("%v: %s", err, strings.TrimSpace(string(out)))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// dirSizeBytes returns the total size of a directory via `du -sb` (0 if absent/error).
|
|
func dirSizeBytes(dir string) int64 {
|
|
if _, err := os.Stat(dir); err != nil {
|
|
return 0
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
|
defer cancel()
|
|
out, err := exec.CommandContext(ctx, "du", "-sb", dir).Output()
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
fields := strings.Fields(string(out))
|
|
if len(fields) == 0 {
|
|
return 0
|
|
}
|
|
var size int64
|
|
if _, err := fmt.Sscanf(fields[0], "%d", &size); err != nil {
|
|
return 0
|
|
}
|
|
return size
|
|
}
|