Files
admin c0cdd95e56 feat: infra backup retention + version picker
Hub: GFS retention (7d/4w/3m, ~14 versions) in new infra_backup_versions
table. Recovery endpoint supports ?version=ID. New /versions API endpoint.
Dashboard shows backup history.

Controller: local drive backups rotated into history/ (last 5 versions).
Setup wizard shows version picker for Hub restores when multiple versions
exist. Scan results enriched with app names, disk count, history badge.
Local restore supports historical versions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 14:47:40 +01:00

318 lines
8.4 KiB
Go

package setup
import (
"bufio"
"context"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
)
// DriveBackup represents a found infra backup on a drive.
type DriveBackup struct {
Device string `json:"device"`
Label string `json:"label"`
MountPoint string `json:"mount_point"`
CustomerID string `json:"customer_id"`
Timestamp string `json:"timestamp"`
CtrlVersion string `json:"controller_version"`
IntegrityOK bool `json:"integrity_ok"`
Error string `json:"error,omitempty"`
StackCount int `json:"stack_count"`
StackNames []string `json:"stack_names,omitempty"`
DiskCount int `json:"disk_count"`
IsHistory bool `json:"is_history"`
HistoryFile string `json:"history_file,omitempty"`
WasTempMounted bool `json:"-"`
}
// lsblkOutput represents the JSON output of lsblk.
type lsblkOutput struct {
Blockdevices []lsblkDevice `json:"blockdevices"`
}
type lsblkDevice struct {
Name string `json:"name"`
Path string `json:"path"`
FSType *string `json:"fstype"`
MountPoint *string `json:"mountpoint"`
Label *string `json:"label"`
Size interface{} `json:"size"` // string or int
Type string `json:"type"` // "disk", "part"
Children []lsblkDevice `json:"children,omitempty"`
}
// ScanDrivesForInfraBackups scans all block devices for .felhom-infra-backup/ directories.
func ScanDrivesForInfraBackups(logger *log.Logger, debug bool) ([]DriveBackup, error) {
logger.Printf("[INFO] Setup: scanning drives for infra backups...")
// Read currently mounted filesystems
mountedFS := readMountedFilesystems()
// Get root device to skip
rootDevices := getRootDevices()
// Run lsblk
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
out, err := exec.CommandContext(ctx, "lsblk", "-J", "-o", "NAME,PATH,FSTYPE,MOUNTPOINT,LABEL,SIZE,TYPE").Output()
if err != nil {
return nil, fmt.Errorf("lsblk failed: %w", err)
}
var lsblk lsblkOutput
if err := json.Unmarshal(out, &lsblk); err != nil {
return nil, fmt.Errorf("parsing lsblk: %w", err)
}
if debug {
logger.Printf("[DEBUG] Setup scan: lsblk returned %d block devices", len(lsblk.Blockdevices))
}
var results []DriveBackup
// Flatten all partitions
var partitions []lsblkDevice
for _, disk := range lsblk.Blockdevices {
if disk.Type == "part" {
partitions = append(partitions, disk)
}
for _, child := range disk.Children {
if child.Type == "part" {
partitions = append(partitions, child)
}
}
}
if debug {
logger.Printf("[DEBUG] Setup scan: found %d partitions to check, %d root devices to skip", len(partitions), len(rootDevices))
}
for _, part := range partitions {
// Skip partitions without filesystem
if part.FSType == nil || *part.FSType == "" || *part.FSType == "swap" {
continue
}
// Skip LUKS encrypted partitions
if *part.FSType == "crypto_LUKS" {
logger.Printf("[DEBUG] Setup: skipping LUKS partition %s", part.Path)
continue
}
// Skip LVM
if part.Type == "lvm" {
logger.Printf("[DEBUG] Setup: skipping LVM volume %s", part.Path)
continue
}
// Skip root partitions
if isRootPartition(part.Path, rootDevices) {
continue
}
partResults := scanPartition(part, mountedFS, logger)
results = append(results, partResults...)
}
logger.Printf("[INFO] Setup: drive scan complete — found %d backup(s)", countValid(results))
return results, nil
}
// CleanupTempMounts unmounts any partitions that were temporarily mounted during scanning.
func CleanupTempMounts(results []DriveBackup, logger *log.Logger) {
for _, r := range results {
if r.WasTempMounted && r.MountPoint != "" {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
exec.CommandContext(ctx, "umount", r.MountPoint).Run()
cancel()
os.Remove(r.MountPoint)
logger.Printf("[DEBUG] Setup: unmounted temp mount %s", r.MountPoint)
}
}
}
func scanPartition(part lsblkDevice, mountedFS map[string]string, logger *log.Logger) []DriveBackup {
label := ""
if part.Label != nil {
label = *part.Label
}
// Check if already mounted
var mountPoint string
var tempMounted bool
if part.MountPoint != nil && *part.MountPoint != "" {
mountPoint = *part.MountPoint
} else if mp, ok := mountedFS[part.Path]; ok {
mountPoint = mp
} else {
// Try to mount temporarily
tmpDir := filepath.Join("/mnt", ".felhom-scan", part.Name)
if err := os.MkdirAll(tmpDir, 0700); err != nil {
logger.Printf("[DEBUG] Setup: skip %s — cannot create temp dir: %v", part.Path, err)
return nil
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Try read-only mount
err := exec.CommandContext(ctx, "mount", "-o", "ro", part.Path, tmpDir).Run()
if err != nil {
// Retry with noload for journal errors
err = exec.CommandContext(ctx, "mount", "-o", "ro,noload", part.Path, tmpDir).Run()
}
if err != nil {
os.Remove(tmpDir)
logger.Printf("[DEBUG] Setup: skip %s — mount failed: %v", part.Path, err)
return nil
}
mountPoint = tmpDir
tempMounted = true
}
// Check for .felhom-infra-backup/
infraDir := backup.InfraBackupDir(mountPoint)
if _, err := os.Stat(infraDir); os.IsNotExist(err) {
if tempMounted {
exec.Command("umount", mountPoint).Run()
os.Remove(mountPoint)
}
return nil
}
var results []DriveBackup
// Read current backup
backupData, meta, err := backup.ReadLocalInfraBackup(mountPoint)
current := DriveBackup{
Device: part.Path,
Label: label,
MountPoint: mountPoint,
WasTempMounted: tempMounted,
}
if err != nil {
current.IntegrityOK = false
current.Error = err.Error()
if meta != nil {
current.CustomerID = meta.CustomerID
current.Timestamp = meta.Timestamp
current.CtrlVersion = meta.ControllerVersion
}
} else {
current.IntegrityOK = true
current.CustomerID = meta.CustomerID
current.Timestamp = meta.Timestamp
current.CtrlVersion = meta.ControllerVersion
backup.ParseBackupCounts(backupData, &current.StackCount, &current.StackNames, &current.DiskCount)
}
results = append(results, current)
logger.Printf("[INFO] Setup: found infra backup on %s (%s) — customer=%s, integrity=%v",
part.Path, label, current.CustomerID, current.IntegrityOK)
// Also scan history directory for older versions
history := backup.ReadLocalInfraHistory(mountPoint)
for _, hv := range history {
hResult := DriveBackup{
Device: part.Path,
Label: label,
MountPoint: mountPoint,
CustomerID: hv.CustomerID,
Timestamp: hv.Timestamp,
CtrlVersion: hv.ControllerVersion,
IntegrityOK: hv.IntegrityOK,
Error: hv.Error,
StackCount: hv.StackCount,
StackNames: hv.StackNames,
DiskCount: hv.DiskCount,
IsHistory: true,
HistoryFile: hv.HistoryFile,
}
results = append(results, hResult)
}
if len(history) > 0 {
logger.Printf("[INFO] Setup: found %d historical backup version(s) on %s", len(history), part.Path)
}
return results
}
func readMountedFilesystems() map[string]string {
result := make(map[string]string)
f, err := os.Open("/proc/mounts")
if err != nil {
return result
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
fields := strings.Fields(scanner.Text())
if len(fields) >= 2 {
result[fields[0]] = fields[1]
}
}
return result
}
func getRootDevices() map[string]bool {
result := make(map[string]bool)
mountedFS := readMountedFilesystems()
for dev, mp := range mountedFS {
if mp == "/" || mp == "/boot" || mp == "/boot/efi" {
result[dev] = true
}
}
return result
}
func isRootPartition(devPath string, rootDevices map[string]bool) bool {
return rootDevices[devPath]
}
func countValid(results []DriveBackup) int {
n := 0
for _, r := range results {
if r.IntegrityOK {
n++
}
}
return n
}
// runDriveScan runs the scan asynchronously and stores results on the Server.
func (s *Server) runDriveScan() {
results, err := ScanDrivesForInfraBackups(s.logger, s.isDebug())
// Clean up any temporary mounts created during scan
if results != nil {
CleanupTempMounts(results, s.logger)
}
s.scanMu.Lock()
defer s.scanMu.Unlock()
s.scanRunning = false
s.scanDone = true
if err != nil {
s.scanError = err.Error()
} else {
s.scanResults = results
}
}