Files
deploy-felhom-compose/controller/internal/backup/crossdrive.go
T

339 lines
10 KiB
Go

package backup
import (
"context"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
)
// CrossDriveRunner handles per-app backup to secondary storage.
type CrossDriveRunner struct {
sett *settings.Settings
stackProvider StackDataProvider
logger *log.Logger
mu sync.Mutex
running map[string]bool // per-app running state
}
// NewCrossDriveRunner creates a new CrossDriveRunner.
func NewCrossDriveRunner(sett *settings.Settings, provider StackDataProvider, logger *log.Logger) *CrossDriveRunner {
return &CrossDriveRunner{
sett: sett,
stackProvider: provider,
logger: logger,
running: make(map[string]bool),
}
}
// RunAppBackup runs cross-drive backup for a single app.
func (r *CrossDriveRunner) RunAppBackup(ctx context.Context, stackName string) error {
cfg := r.sett.GetCrossDriveConfig(stackName)
if cfg == nil || !cfg.Enabled {
return fmt.Errorf("cross-drive backup not configured or disabled for %s", stackName)
}
// Prevent concurrent runs for the same app
r.mu.Lock()
if r.running[stackName] {
r.mu.Unlock()
return fmt.Errorf("cross-drive backup already running for %s", stackName)
}
r.running[stackName] = true
r.mu.Unlock()
defer func() {
r.mu.Lock()
r.running[stackName] = false
r.mu.Unlock()
}()
// Mark as running in settings
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastStatus = "running"
})
start := time.Now()
r.logger.Printf("[INFO] Cross-drive backup starting: %s → %s (method: %s)",
stackName, cfg.DestinationPath, cfg.Method)
if err := r.ValidateDestination(cfg.DestinationPath); err != nil {
r.updateStatus(stackName, "error", err.Error(), time.Since(start), "")
return fmt.Errorf("destination validation failed: %w", err)
}
// Resolve HDD mounts for this app
mounts := r.stackProvider.GetStackHDDMounts(stackName)
if len(mounts) == 0 {
r.updateStatus(stackName, "error", "no HDD data paths found for this app", time.Since(start), "")
return fmt.Errorf("no HDD data paths found for %s", stackName)
}
// Safety: destination must not overlap with any source
for _, m := range mounts {
if system.PathsOverlap(cfg.DestinationPath, m) {
msg := fmt.Sprintf("destination %s overlaps with source %s — aborted", cfg.DestinationPath, m)
r.updateStatus(stackName, "error", msg, time.Since(start), "")
return fmt.Errorf("%s", msg)
}
}
var runErr error
switch cfg.Method {
case "rsync":
runErr = r.runRsyncBackup(ctx, stackName, cfg.DestinationPath, mounts)
case "restic":
runErr = r.runResticBackup(ctx, stackName, cfg.DestinationPath, mounts)
default:
runErr = fmt.Errorf("unknown backup method: %s", cfg.Method)
}
duration := time.Since(start)
if runErr != nil {
r.logger.Printf("[ERROR] Cross-drive backup failed: %s: %v", stackName, runErr)
r.updateStatus(stackName, "error", runErr.Error(), duration, "")
return runErr
}
// Calculate backup size
var sizeHuman string
if cfg.Method == "rsync" {
destDir := filepath.Join(cfg.DestinationPath, "backups", "rsync", stackName)
if sz, err := dirSizeBytes(destDir); err == nil {
sizeHuman = humanizeBytes(sz)
}
}
r.logger.Printf("[INFO] Cross-drive backup completed: %s (%s)", stackName, duration.Round(time.Second))
r.updateStatus(stackName, "ok", "", duration, sizeHuman)
return nil
}
// RunAllScheduled runs cross-drive backups for all apps matching the schedule.
// Runs sequentially (disk I/O bound).
func (r *CrossDriveRunner) RunAllScheduled(ctx context.Context, schedule string) error {
configs := r.sett.GetAllCrossDriveConfigs()
if len(configs) == 0 {
return nil
}
var errs []string
for stackName, cfg := range configs {
if !cfg.Enabled {
continue
}
if cfg.Schedule != schedule {
continue
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
if err := r.RunAppBackup(ctx, stackName); err != nil {
errs = append(errs, fmt.Sprintf("%s: %v", stackName, err))
}
}
if len(errs) > 0 {
return fmt.Errorf("cross-drive backup errors: %s", strings.Join(errs, "; "))
}
return nil
}
// IsRunning returns true if the given app's backup is currently running.
func (r *CrossDriveRunner) IsRunning(stackName string) bool {
r.mu.Lock()
defer r.mu.Unlock()
return r.running[stackName]
}
// ValidateDestination checks that the destination path exists, is writable,
// and has sufficient free space (at least 100MB).
// A non-mount-point destination (e.g. a folder on the system drive) is allowed
// with a logged warning — consistent with the web UI's CheckBackupDestination.
func (r *CrossDriveRunner) ValidateDestination(path string) error {
if path == "" {
return fmt.Errorf("destination path is empty")
}
if _, err := os.Stat(path); os.IsNotExist(err) {
return fmt.Errorf("destination %s does not exist", path)
}
if !system.IsMountPoint(path) {
r.logger.Printf("[WARN] Destination %s is not a separate mount point (system drive) — backup will proceed but data is not protected against drive failure", path)
}
if !system.IsWritable(path) {
return fmt.Errorf("destination %s is not writable", path)
}
di := system.GetDiskUsage(path)
if di != nil && di.AvailGB < 0.1 {
return fmt.Errorf("destination %s has insufficient free space (%.1f GB)", path, di.AvailGB)
}
return nil
}
// --- rsync ---
func (r *CrossDriveRunner) runRsyncBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
destDir := filepath.Join(destBase, "backups", "rsync", stackName)
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("creating rsync dest dir: %w", err)
}
for _, srcMount := range mounts {
// Preserve directory structure: strip the storage path prefix to get relative subpath
// e.g., /mnt/hdd_placeholder/storage/immich/ → storage/immich/
rel := srcMount
// Find the topmost non-root segment of the mount path (after the mount point itself)
// Use a simple approach: keep everything from the first significant segment after /mnt/...
parts := strings.SplitN(strings.TrimPrefix(srcMount, "/"), "/", 3)
if len(parts) >= 3 {
rel = parts[2] // e.g., "storage/immich"
} else {
rel = filepath.Base(srcMount)
}
dstPath := filepath.Join(destDir, rel)
if err := os.MkdirAll(dstPath, 0755); err != nil {
return fmt.Errorf("creating rsync destination: %w", err)
}
// Ensure trailing slash on source for rsync semantics (copy contents, not the dir itself)
src := strings.TrimRight(srcMount, "/") + "/"
dst := strings.TrimRight(dstPath, "/") + "/"
cmd := exec.CommandContext(ctx, "rsync", "-a", "--delete", src, dst)
r.logger.Printf("[DEBUG] rsync: %s → %s", src, dst)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("rsync failed for %s: %v (%s)", srcMount, err, strings.TrimSpace(string(out)))
}
}
return nil
}
// --- restic ---
func (r *CrossDriveRunner) runResticBackup(ctx context.Context, stackName, destBase string, mounts []string) error {
repoPath := filepath.Join(destBase, "backups", "restic")
// Get or create the cross-drive restic password
password, err := r.sett.GetOrCreateCrossDrivePassword()
if err != nil {
return fmt.Errorf("getting restic password: %w", err)
}
// H6: Write password to temp file with safe cleanup order (close before deferred remove).
pwFile, err := os.CreateTemp("", "felhom-crossdrive-pw-*")
if err != nil {
return fmt.Errorf("creating password file: %w", err)
}
pwPath := pwFile.Name()
if _, err := pwFile.WriteString(password); err != nil {
pwFile.Close()
os.Remove(pwPath)
return fmt.Errorf("writing password file: %w", err)
}
pwFile.Close()
defer os.Remove(pwPath)
// Ensure repo is initialized
if err := r.ensureResticRepo(ctx, repoPath, pwPath); err != nil {
return err
}
// Run restic backup
args := []string{
"backup", "--repo", repoPath,
"--password-file", pwPath,
"--tag", stackName,
"--tag", "cross-drive",
}
args = append(args, mounts...)
cmd := exec.CommandContext(ctx, "restic", args...)
r.logger.Printf("[DEBUG] restic backup: %v", args)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("restic backup failed: %v (%s)", err, strings.TrimSpace(string(out)))
}
// H5: Prune old snapshots to prevent unbounded accumulation.
return r.pruneResticRepo(ctx, repoPath, pwPath)
}
// pruneResticRepo forgets old snapshots in a cross-drive restic repo, keeping recent ones.
func (r *CrossDriveRunner) pruneResticRepo(ctx context.Context, repoPath, pwPath string) error {
args := []string{
"forget", "--repo", repoPath,
"--password-file", pwPath,
"--keep-daily", "7",
"--keep-weekly", "4",
"--prune",
}
cmd := exec.CommandContext(ctx, "restic", args...)
r.logger.Printf("[DEBUG] restic forget (prune): %s", repoPath)
if out, err := cmd.CombinedOutput(); err != nil {
// Non-fatal: log warning but don't fail the backup
r.logger.Printf("[WARN] restic forget failed for %s: %v (%s)", repoPath, err, strings.TrimSpace(string(out)))
}
return nil
}
func (r *CrossDriveRunner) ensureResticRepo(ctx context.Context, repoPath, pwFile string) error {
// Check if repo config exists
if _, err := os.Stat(filepath.Join(repoPath, "config")); err == nil {
return nil // already initialized
}
if err := os.MkdirAll(repoPath, 0755); err != nil {
return fmt.Errorf("creating restic repo dir: %w", err)
}
cmd := exec.CommandContext(ctx, "restic", "init", "--repo", repoPath, "--password-file", pwFile)
r.logger.Printf("[INFO] Initializing cross-drive restic repo at %s", repoPath)
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("restic init failed: %v (%s)", err, strings.TrimSpace(string(out)))
}
return nil
}
// --- helpers ---
func (r *CrossDriveRunner) updateStatus(stackName, status, errMsg string, duration time.Duration, sizeHuman string) {
_ = r.sett.UpdateCrossDriveStatus(stackName, func(c *settings.CrossDriveBackup) {
c.LastRun = time.Now().UTC().Format(time.RFC3339)
c.LastStatus = status
c.LastError = errMsg
c.LastDuration = duration.Round(time.Second).String()
if sizeHuman != "" {
c.LastSizeHuman = sizeHuman
}
})
}
// dirSizeBytes returns the total byte size of all files under path.
// H7: Walk errors are now propagated instead of silently swallowed.
func dirSizeBytes(path string) (int64, error) {
var total int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err // propagate permission/IO errors
}
if !info.IsDir() {
total += info.Size()
}
return nil
})
return total, err
}