d2071430ea
Tier 2 rsync-mirrors each HDD app's recovery unit + appdata to a DIFFERENT physical disk (the only off-drive protection bind-mounted userdata can get; PBS can't reach it). Auto-enabled, auto-target: prefer another registered drive (different physical disk via system.SamePhysicalDevice), else the internal SSD for SMALL units only — with a size-aware headroom guard that REFUSES rather than fill the ~8G guest rootfs, recording an honest "needs 2nd HDD" status. Status persisted via the surviving CrossDriveBackup; "2. mentés" UI card now populated. Daily tier2-backup job + POST /api/backup/tier2. - backup/tier2.go (engine+selection+headroom), tier2_test.go (headroom arithmetic) - system.SamePhysicalDevice (linux Stat_t.Dev + stub) - handlers.go Tier2 UI population + tier2DestLabel; backups.html honest no-target reason - fixed stale TestBackupCopiesOnPath (old felhom-data layout -> in-guest layout) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
282 lines
10 KiB
Go
282 lines
10 KiB
Go
package backup
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
|
)
|
|
|
|
// Tier 2 = an off-drive (different physical disk) copy of an HDD app's recovery unit + bulk userdata.
|
|
// It is the ONLY off-drive protection that browsable HDD userdata can get — PBS can't reach bind
|
|
// mounts. Auto-enabled for every HDD app; the target is auto-picked: prefer another registered
|
|
// user-data drive (can hold bulk), else the internal SSD for SMALL units only — and the SSD is the
|
|
// guest rootfs (~8 GB), so we REFUSE rather than fill it (a size-aware headroom guard). When no
|
|
// off-drive target fits, we record an honest "needs a 2nd HDD" status instead of silently doing
|
|
// nothing useful.
|
|
|
|
const gibibyte = 1024 * 1024 * 1024
|
|
|
|
var (
|
|
errNoOffDiskTarget = errors.New("no off-drive target (single drive, app already on the system disk)")
|
|
errSSDNoHeadroom = errors.New("the internal SSD lacks headroom for this app's data — a 2nd drive is required for off-drive backup")
|
|
)
|
|
|
|
// Tier2Target is a resolved off-drive destination for an app's Tier 2 copy.
|
|
type Tier2Target struct {
|
|
NamespaceRoot string // felhom-data namespace root on the target drive
|
|
Label string // human label (UI)
|
|
IsSystemDrive bool // target is the internal SSD/system drive (DB/config only)
|
|
Reason string // why this target (Hungarian, for UI/logs)
|
|
}
|
|
|
|
// tier2FitsHeadroom reports whether a unit of unitGB fits on a system/rootfs drive while leaving a
|
|
// reserve free. Reserve = max(2 GB, 20% of total) — this is what protects the small (~8 GB) guest
|
|
// rootfs from being filled by a Tier 2 copy. Pure function (unit-tested).
|
|
func tier2FitsHeadroom(availGB, totalGB, unitGB float64) bool {
|
|
reserve := totalGB * 0.20
|
|
if reserve < 2.0 {
|
|
reserve = 2.0
|
|
}
|
|
return (availGB - unitGB) >= reserve
|
|
}
|
|
|
|
// selectTier2Target auto-picks the off-drive destination for an app's Tier 2 copy.
|
|
func (m *Manager) selectTier2Target(stackName string, unitSizeBytes int64) (*Tier2Target, error) {
|
|
sourceDrive := m.GetAppDrivePath(stackName)
|
|
if sourceDrive == "" {
|
|
return nil, fmt.Errorf("no source drive for %s", stackName)
|
|
}
|
|
|
|
// 1. Prefer another registered user-data drive on a DIFFERENT physical disk (can hold bulk userdata).
|
|
if m.settings != nil {
|
|
for _, sp := range m.settings.GetSchedulableStoragePaths() {
|
|
if sp.Path == sourceDrive || system.SamePhysicalDevice(sourceDrive, sp.Path) {
|
|
continue
|
|
}
|
|
label := sp.Label
|
|
if label == "" {
|
|
label = filepath.Base(sp.Path)
|
|
}
|
|
return &Tier2Target{
|
|
NamespaceRoot: NamespaceRoot(sp.Path, true), // Model A: in-guest mount IS the namespace root
|
|
Label: label,
|
|
IsSystemDrive: false,
|
|
Reason: "másik adatmeghajtó",
|
|
}, nil
|
|
}
|
|
}
|
|
|
|
// 2. Fall back to the internal SSD (system data path) — SMALL units only.
|
|
sys := m.systemDataPath
|
|
if sys == "" || system.SamePhysicalDevice(sourceDrive, sys) {
|
|
return nil, errNoOffDiskTarget // single drive / app already on the system disk
|
|
}
|
|
if !m.tier2FitsSystemDrive(sys, unitSizeBytes) {
|
|
return nil, errSSDNoHeadroom // would fill the ~8 GB rootfs — refuse, don't fill
|
|
}
|
|
return &Tier2Target{
|
|
NamespaceRoot: NamespaceRoot(sys, false), // system path is a real root → felhom-data appended
|
|
Label: "belső SSD (rendszer)",
|
|
IsSystemDrive: true,
|
|
Reason: "nincs 2. adatmeghajtó — csak az adatbázis/konfiguráció fér a belső SSD-re; a nagy fájlokhoz 2. meghajtó kell",
|
|
}, nil
|
|
}
|
|
|
|
// tier2FitsSystemDrive checks the size-aware rootfs-headroom guard for the SSD target.
|
|
func (m *Manager) tier2FitsSystemDrive(sys string, unitSizeBytes int64) bool {
|
|
di := system.GetDiskUsage(sys)
|
|
if di == nil {
|
|
return false // can't determine free space → refuse (fail-closed for the rootfs)
|
|
}
|
|
return tier2FitsHeadroom(di.AvailGB, di.TotalGB, float64(unitSizeBytes)/gibibyte)
|
|
}
|
|
|
|
// RunTier2 makes/refreshes the off-drive copy of a single HDD app's recovery unit + userdata.
|
|
// Best-effort and idempotent (rsync mirror). Records status into settings for the UI; returns an
|
|
// error only on an actual copy failure (no valid target is a recorded status, not an error).
|
|
func (m *Manager) RunTier2(stackName string) error {
|
|
sourceDrive := m.GetAppDrivePath(stackName)
|
|
if sourceDrive == "" {
|
|
return fmt.Errorf("no source drive for %s", stackName)
|
|
}
|
|
sourceNsRoot := m.namespaceRoot(sourceDrive)
|
|
unitDir := RecoveryUnitPath(sourceNsRoot, stackName)
|
|
appDataDir := AppDataDir(sourceNsRoot, stackName)
|
|
if _, err := os.Stat(unitDir); err != nil {
|
|
return nil // no recovery unit yet — nothing to copy
|
|
}
|
|
|
|
unitSize := dirSizeBytes(unitDir) + dirSizeBytes(appDataDir)
|
|
|
|
target, err := m.selectTier2Target(stackName, unitSize)
|
|
if err != nil {
|
|
reason := tier2NoTargetReason(err)
|
|
m.recordTier2NoTarget(stackName, reason)
|
|
m.logger.Printf("[INFO] [backup] Tier 2 for %s: no off-drive target — %s", stackName, reason)
|
|
return nil
|
|
}
|
|
// Defense-in-depth off-drive guard (selection already enforced it).
|
|
if system.SamePhysicalDevice(sourceDrive, target.NamespaceRoot) {
|
|
m.recordTier2NoTarget(stackName, "a kiválasztott cél ugyanazon a fizikai lemezen van")
|
|
return nil
|
|
}
|
|
|
|
destBase := filepath.Join(target.NamespaceRoot, "backups", "secondary", stackName)
|
|
start := time.Now()
|
|
|
|
if err := rsyncMirror(unitDir, filepath.Join(destBase, "recovery-unit")); err != nil {
|
|
m.recordTier2Failure(stackName, target, err)
|
|
if m.tier2Notify != nil {
|
|
m.tier2Notify(stackName, target.Label, time.Since(start), err)
|
|
}
|
|
return fmt.Errorf("tier2 rsync unit for %s: %w", stackName, err)
|
|
}
|
|
if _, e := os.Stat(appDataDir); e == nil {
|
|
if err := rsyncMirror(appDataDir, filepath.Join(destBase, "appdata")); err != nil {
|
|
m.recordTier2Failure(stackName, target, err)
|
|
if m.tier2Notify != nil {
|
|
m.tier2Notify(stackName, target.Label, time.Since(start), err)
|
|
}
|
|
return fmt.Errorf("tier2 rsync appdata for %s: %w", stackName, err)
|
|
}
|
|
}
|
|
|
|
dur := time.Since(start)
|
|
m.recordTier2Success(stackName, target, unitSize, dur)
|
|
if m.tier2Notify != nil {
|
|
m.tier2Notify(stackName, target.Label, dur, nil)
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Tier 2 copied %s → %s (%s, %s)%s",
|
|
stackName, destBase, humanizeBytes(unitSize), dur.Round(time.Second),
|
|
map[bool]string{true: " [SSD: DB/config only]", false: ""}[target.IsSystemDrive])
|
|
return nil
|
|
}
|
|
|
|
// RunAllTier2 runs Tier 2 for every deployed HDD app (apps whose data lives on an external drive —
|
|
// non-HDD apps live on the rootfs and are already inside the PBS whole-guest snapshot).
|
|
func (m *Manager) RunAllTier2() {
|
|
if m.stackProvider == nil {
|
|
return
|
|
}
|
|
var n int
|
|
for _, stack := range m.stackProvider.ListDeployedStacks() {
|
|
if m.stackProvider.GetStackHDDPath(stack.Name) == "" {
|
|
continue // not an HDD app — its data is on the rootfs, covered by PBS
|
|
}
|
|
if m.settings != nil && (m.settings.IsDisconnected(m.GetAppDrivePath(stack.Name)) ||
|
|
m.settings.IsDecommissioned(m.GetAppDrivePath(stack.Name))) {
|
|
continue
|
|
}
|
|
if err := m.RunTier2(stack.Name); err != nil {
|
|
m.logger.Printf("[WARN] [backup] Tier 2 failed for %s: %v", stack.Name, err)
|
|
}
|
|
n++
|
|
}
|
|
m.logger.Printf("[INFO] [backup] Tier 2 run complete: %d HDD app(s) processed", n)
|
|
}
|
|
|
|
// --- status persistence (drives the "2. mentés" UI card) ---
|
|
|
|
func (m *Manager) recordTier2Success(stackName string, target *Tier2Target, sizeBytes int64, dur time.Duration) {
|
|
if m.settings == nil {
|
|
return
|
|
}
|
|
_ = m.settings.SetCrossDriveConfig(stackName, &settings.CrossDriveBackup{
|
|
Enabled: true,
|
|
Method: "rsync",
|
|
DestinationPath: target.NamespaceRoot,
|
|
Schedule: "daily",
|
|
LastRun: time.Now().Format(time.RFC3339),
|
|
LastStatus: "ok",
|
|
LastDuration: dur.Round(time.Second).String(),
|
|
LastSizeHuman: humanizeBytes(sizeBytes),
|
|
})
|
|
}
|
|
|
|
func (m *Manager) recordTier2Failure(stackName string, target *Tier2Target, cause error) {
|
|
if m.settings == nil {
|
|
return
|
|
}
|
|
_ = m.settings.SetCrossDriveConfig(stackName, &settings.CrossDriveBackup{
|
|
Enabled: true,
|
|
Method: "rsync",
|
|
DestinationPath: target.NamespaceRoot,
|
|
Schedule: "daily",
|
|
LastRun: time.Now().Format(time.RFC3339),
|
|
LastStatus: "error",
|
|
LastError: cause.Error(),
|
|
})
|
|
}
|
|
|
|
func (m *Manager) recordTier2NoTarget(stackName, reason string) {
|
|
if m.settings == nil {
|
|
return
|
|
}
|
|
_ = m.settings.SetCrossDriveConfig(stackName, &settings.CrossDriveBackup{
|
|
Enabled: false,
|
|
Method: "rsync",
|
|
Schedule: "daily",
|
|
LastStatus: "no_target",
|
|
LastError: reason,
|
|
})
|
|
}
|
|
|
|
func tier2NoTargetReason(err error) string {
|
|
switch {
|
|
case errors.Is(err, errSSDNoHeadroom):
|
|
return "nincs elég hely a belső SSD-n — a nagy fájlok off-drive mentéséhez 2. meghajtó (vagy távoli tárhely) szükséges"
|
|
case errors.Is(err, errNoOffDiskTarget):
|
|
return "nincs másik fizikai meghajtó — a 2. mentéshez 2. meghajtó szükséges"
|
|
default:
|
|
return err.Error()
|
|
}
|
|
}
|
|
|
|
// --- helpers ---
|
|
|
|
// rsyncMirror mirrors src→dst with rsync -a --delete (exact copy, browsable on disk, no versioning).
|
|
func rsyncMirror(src, dst string) error {
|
|
if err := os.MkdirAll(dst, 0755); err != nil {
|
|
return fmt.Errorf("mkdir %s: %w", dst, err)
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute)
|
|
defer cancel()
|
|
// Trailing slashes: copy the CONTENTS of src into dst.
|
|
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", strings.TrimRight(src, "/")+"/", strings.TrimRight(dst, "/")+"/")
|
|
out, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf("%v: %s", err, strings.TrimSpace(string(out)))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// dirSizeBytes returns the total size of a directory via `du -sb` (0 if absent/error).
|
|
func dirSizeBytes(dir string) int64 {
|
|
if _, err := os.Stat(dir); err != nil {
|
|
return 0
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
|
defer cancel()
|
|
out, err := exec.CommandContext(ctx, "du", "-sb", dir).Output()
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
fields := strings.Fields(string(out))
|
|
if len(fields) == 0 {
|
|
return 0
|
|
}
|
|
var size int64
|
|
if _, err := fmt.Sscanf(fields[0], "%d", &size); err != nil {
|
|
return 0
|
|
}
|
|
return size
|
|
}
|