Files
deploy-felhom-compose/controller/internal/backup/restic.go
T
admin c929948f27 feat: Docker volume backup, Tier 2 restore, restore dropdown fixes (v0.33.0)
- Add Docker named volume backup to Tier 1 (dump to tar, include in restic)
  and Tier 2 (copy tars to rsync mirror _volumes/ dir)
- Fix volume name resolution: use project-prefixed names (mealie_mealie_data)
- Fix double Tier 1 in restore dropdown: filter snapshots by app's home drive
- Add Tier 2 restore: RestoreAppFromTier2() restores from rsync mirror
- Show Tier 2 entry in restore dropdown when cross-drive backup succeeded
- Add .fab import link in restore section
- Volume-aware restore type banners and backup content labels

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 21:43:02 +01:00

498 lines
15 KiB
Go

package backup
import (
"context"
"crypto/rand"
"encoding/base64"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
)
// ResticManager handles restic backup operations.
// All methods accept repoPath as parameter to support per-drive repos.
type ResticManager struct {
passwordFile string
logger *log.Logger
customerID string
cacheDir string
debug bool
}
// SnapshotResult holds the outcome of a restic backup.
type SnapshotResult struct {
SnapshotID string
FilesNew int
FilesChanged int
DataAdded string
Duration time.Duration
}
// SnapshotInfo holds information about a restic snapshot.
type SnapshotInfo struct {
ID string `json:"short_id"`
Time time.Time `json:"time"`
Paths []string `json:"paths"`
Tags []string `json:"tags"`
RepoPath string `json:"-"` // set by caller for multi-repo aggregation
Tier int `json:"tier"` // 1 = primary, 2 = secondary
DriveLabel string `json:"drive_label"` // filled by caller from settings
Source string `json:"source"` // "restic" or "rsync"
}
// RepoStats holds repository statistics.
type RepoStats struct {
TotalSize string
TotalSizeBytes int64
SnapshotCount int
LatestSnapshot *SnapshotInfo
}
// NewResticManager creates a new restic manager.
func NewResticManager(cfg *config.Config, logger *log.Logger) *ResticManager {
return &ResticManager{
passwordFile: cfg.Backup.ResticPasswordFile,
logger: logger,
customerID: cfg.Customer.ID,
cacheDir: filepath.Join(cfg.Paths.DataDir, "restic-cache"),
}
}
// SetDebug enables or disables debug logging.
func (r *ResticManager) SetDebug(debug bool) {
r.debug = debug
}
// EnsureInitialized checks if the restic repo exists and initializes it if not.
// Also auto-generates the password file if missing.
func (r *ResticManager) EnsureInitialized(repoPath string) error {
if r.debug {
r.logger.Printf("[DEBUG] [restic] EnsureInitialized: repoPath=%s, passwordFile=%s", repoPath, r.passwordFile)
}
// Ensure password file exists
if _, err := os.Stat(r.passwordFile); os.IsNotExist(err) {
if err := r.generatePassword(); err != nil {
return fmt.Errorf("generating restic password: %w", err)
}
}
// Ensure cache dir exists
os.MkdirAll(r.cacheDir, 0700)
// Check if repo is already initialized
configPath := filepath.Join(repoPath, "config")
if _, err := os.Stat(configPath); err == nil {
r.logger.Printf("[INFO] [backup] Restic repo already initialized at %s", repoPath)
return nil
}
// Ensure repo directory exists
if err := os.MkdirAll(repoPath, 0700); err != nil {
return fmt.Errorf("creating repo dir: %w", err)
}
// Initialize repo
r.logger.Printf("[INFO] [backup] Initializing restic repository at %s", repoPath)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
cmd := r.command(ctx, repoPath, "init")
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("restic init failed: %v — %s", err, truncate(string(out), 200))
}
r.logger.Printf("[INFO] [backup] Restic repository initialized successfully")
return nil
}
// Snapshot creates a new backup snapshot of the given paths.
func (r *ResticManager) Snapshot(repoPath string, paths []string, tags []string) (*SnapshotResult, error) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
start := time.Now()
if r.debug {
r.logger.Printf("[DEBUG] [restic] Snapshot: repo=%s, paths=%v, tags=%v", repoPath, paths, tags)
}
args := []string{"backup", "--json"}
for _, tag := range tags {
args = append(args, "--tag", tag)
}
args = append(args, "--host", r.customerID)
// Only include paths that exist
var existingPaths []string
for _, p := range paths {
if _, err := os.Stat(p); err == nil {
existingPaths = append(existingPaths, p)
} else {
r.logger.Printf("[WARN] [backup] Backup path does not exist, skipping: %s", p)
}
}
if len(existingPaths) == 0 {
return nil, fmt.Errorf("no backup paths exist")
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] Snapshot: %d/%d paths exist, backing up: %v", len(existingPaths), len(paths), existingPaths)
}
args = append(args, existingPaths...)
cmd := r.command(ctx, repoPath, args...)
out, err := cmd.Output()
if err != nil {
// Check for stale lock — restic writes lock errors to stderr, not stdout
errStr := string(out)
if exitErr, ok := err.(*exec.ExitError); ok {
errStr += string(exitErr.Stderr)
}
if strings.Contains(errStr, "lock") || strings.Contains(errStr, "locked") {
r.logger.Printf("[WARN] [backup] Restic repo locked — attempting unlock")
unlockCmd := r.command(ctx, repoPath, "unlock")
if unlockErr := unlockCmd.Run(); unlockErr != nil {
r.logger.Printf("[WARN] [backup] Restic unlock failed: %v", unlockErr)
}
// Retry once with a fresh context (H9 fix — original may be nearly expired).
retryCtx, retryCancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer retryCancel()
cmd = r.command(retryCtx, repoPath, args...)
out, err = cmd.Output()
if err != nil {
return nil, fmt.Errorf("restic backup failed after unlock: %v", err)
}
} else {
return nil, fmt.Errorf("restic backup failed: %v", err)
}
}
result := &SnapshotResult{
Duration: time.Since(start),
}
// Parse JSON output — look for the summary line
for _, line := range strings.Split(string(out), "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
var msg struct {
MessageType string `json:"message_type"`
FilesNew int `json:"files_new"`
FilesChanged int `json:"files_changed"`
DataAdded int64 `json:"data_added"`
SnapshotID string `json:"snapshot_id"`
}
if err := json.Unmarshal([]byte(line), &msg); err != nil {
continue
}
if msg.MessageType == "summary" {
result.SnapshotID = msg.SnapshotID
result.FilesNew = msg.FilesNew
result.FilesChanged = msg.FilesChanged
result.DataAdded = humanizeBytes(msg.DataAdded)
}
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] Snapshot: completed in %s, snapshotID=%s, filesNew=%d, filesChanged=%d, dataAdded=%s",
result.Duration, result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded)
}
r.logger.Printf("[INFO] [backup] Restic snapshot complete for %s", repoPath)
return result, nil
}
// Prune removes old snapshots according to retention policy.
func (r *ResticManager) Prune(repoPath string, retention config.RetentionConfig) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] Prune: repo=%s, keepDaily=%d, keepWeekly=%d, keepMonthly=%d",
repoPath, retention.KeepDaily, retention.KeepWeekly, retention.KeepMonthly)
}
start := time.Now()
args := []string{
"forget",
"--keep-daily", fmt.Sprintf("%d", retention.KeepDaily),
"--keep-weekly", fmt.Sprintf("%d", retention.KeepWeekly),
"--keep-monthly", fmt.Sprintf("%d", retention.KeepMonthly),
"--prune",
}
cmd := r.command(ctx, repoPath, args...)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("restic forget/prune failed: %v — %s", err, truncate(string(out), 200))
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] Prune: completed in %s, output=%d bytes", time.Since(start), len(out))
}
r.logger.Printf("[INFO] [backup] Restic prune completed for %s", repoPath)
return nil
}
// Check verifies repository integrity.
func (r *ResticManager) Check(repoPath string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] Check: repo=%s", repoPath)
}
start := time.Now()
cmd := r.command(ctx, repoPath, "check")
out, err := cmd.CombinedOutput()
if err != nil {
if r.debug {
r.logger.Printf("[DEBUG] [restic] Check: failed after %s, output=%s", time.Since(start), truncate(string(out), 300))
}
return fmt.Errorf("restic check failed: %v — %s", err, truncate(string(out), 200))
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] Check: repo=%s OK, completed in %s", repoPath, time.Since(start))
}
r.logger.Printf("[INFO] [backup] Restic check passed for repo %s", repoPath)
return nil
}
// ListSnapshots returns all snapshots, newest first, limited to N entries.
func (r *ResticManager) ListSnapshots(repoPath string, limit int) ([]SnapshotInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] ListSnapshots: repo=%s, limit=%d", repoPath, limit)
}
cmd := r.command(ctx, repoPath, "snapshots", "--json")
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("restic snapshots failed: %v", err)
}
var snapshots []SnapshotInfo
if err := json.Unmarshal(out, &snapshots); err != nil {
return nil, fmt.Errorf("parsing snapshot JSON: %v", err)
}
// Reverse for newest first
for i, j := 0, len(snapshots)-1; i < j; i, j = i+1, j-1 {
snapshots[i], snapshots[j] = snapshots[j], snapshots[i]
}
if limit > 0 && len(snapshots) > limit {
snapshots = snapshots[:limit]
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] ListSnapshots: repo=%s, found %d total snapshots, returning %d",
repoPath, len(snapshots), len(snapshots))
}
return snapshots, nil
}
// LatestSnapshot returns the most recent snapshot info.
func (r *ResticManager) LatestSnapshot(repoPath string) (*SnapshotInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] LatestSnapshot: repo=%s", repoPath)
}
cmd := r.command(ctx, repoPath, "snapshots", "--latest", "1", "--json")
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("restic snapshots failed: %v", err)
}
var snapshots []SnapshotInfo
if err := json.Unmarshal(out, &snapshots); err != nil {
return nil, fmt.Errorf("parsing snapshot JSON: %v", err)
}
if len(snapshots) == 0 {
if r.debug {
r.logger.Printf("[DEBUG] [restic] LatestSnapshot: repo=%s, no snapshots found", repoPath)
}
return nil, nil
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] LatestSnapshot: repo=%s, id=%s, time=%s, paths=%v",
repoPath, snapshots[0].ID, snapshots[0].Time.Format(time.RFC3339), snapshots[0].Paths)
}
return &snapshots[0], nil
}
// Stats returns repository statistics.
func (r *ResticManager) Stats(repoPath string) (*RepoStats, error) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
if r.debug {
r.logger.Printf("[DEBUG] [restic] Stats: repo=%s", repoPath)
}
start := time.Now()
stats := &RepoStats{}
// Get repo size
cmd := r.command(ctx, repoPath, "stats", "--json")
out, err := cmd.Output()
if err == nil {
var raw struct {
TotalSize uint64 `json:"total_size"`
}
if json.Unmarshal(out, &raw) == nil {
stats.TotalSizeBytes = int64(raw.TotalSize)
stats.TotalSize = humanizeBytes(stats.TotalSizeBytes)
}
}
// Count snapshots
cmd = r.command(ctx, repoPath, "snapshots", "--json")
out, err = cmd.Output()
if err == nil {
var snapshots []SnapshotInfo
if json.Unmarshal(out, &snapshots) == nil {
stats.SnapshotCount = len(snapshots)
if len(snapshots) > 0 {
latest := snapshots[len(snapshots)-1]
stats.LatestSnapshot = &latest
}
}
}
if r.debug {
latestID := "none"
if stats.LatestSnapshot != nil {
latestID = stats.LatestSnapshot.ID
}
r.logger.Printf("[DEBUG] [restic] Stats: repo=%s, totalSize=%s, snapshots=%d, latest=%s, took %s",
repoPath, stats.TotalSize, stats.SnapshotCount, latestID, time.Since(start))
}
return stats, nil
}
// GetPassword reads and returns the restic repository password.
func (r *ResticManager) GetPassword() (string, error) {
data, err := os.ReadFile(r.passwordFile)
if err != nil {
return "", fmt.Errorf("reading restic password: %w", err)
}
return strings.TrimSpace(string(data)), nil
}
// RestoreAppData restores specific paths from a restic snapshot.
func (r *ResticManager) RestoreAppData(repoPath string, snapshotID string, paths []string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
defer cancel()
args := []string{
"restore", snapshotID,
"--target", "/",
}
for _, p := range paths {
args = append(args, "--include", p)
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] RestoreAppData: repo=%s, snapshot=%s, %d include paths=%v",
repoPath, snapshotID, len(paths), paths)
}
start := time.Now()
r.logger.Printf("[INFO] [backup] Restore started: repo=%s, snapshot=%s, paths=%v", repoPath, snapshotID, paths)
cmd := r.command(ctx, repoPath, args...)
output, err := cmd.CombinedOutput()
if err != nil {
r.logger.Printf("[ERROR] [backup] Restore failed: %v, output: %s", err, truncate(string(output), 500))
return fmt.Errorf("restic restore failed: %w", err)
}
if r.debug {
r.logger.Printf("[DEBUG] [restic] RestoreAppData: completed in %s, output=%d bytes", time.Since(start), len(output))
}
r.logger.Printf("[INFO] [backup] Restore completed: snapshot=%s, paths=%v", snapshotID, paths)
return nil
}
// RepoExists checks if a restic repo is initialized at the given path.
func (r *ResticManager) RepoExists(repoPath string) bool {
exists := false
_, err := os.Stat(filepath.Join(repoPath, "config"))
exists = err == nil
if r.debug {
r.logger.Printf("[DEBUG] [restic] RepoExists: repo=%s, exists=%v", repoPath, exists)
}
return exists
}
// UnlockCommand returns an exec.Cmd that runs restic unlock on the given repo.
func (r *ResticManager) UnlockCommand(ctx context.Context, repoPath string) *exec.Cmd {
return r.command(ctx, repoPath, "unlock")
}
func (r *ResticManager) command(ctx context.Context, repoPath string, args ...string) *exec.Cmd {
if r.debug {
r.logger.Printf("[DEBUG] [restic] command: restic %s (repo=%s)", strings.Join(args, " "), repoPath)
}
cmd := exec.CommandContext(ctx, "restic", args...)
cmd.Env = append(os.Environ(),
"RESTIC_REPOSITORY="+repoPath,
"RESTIC_PASSWORD_FILE="+r.passwordFile,
"RESTIC_CACHE_DIR="+r.cacheDir,
)
return cmd
}
func (r *ResticManager) generatePassword() error {
// Ensure directory exists
dir := filepath.Dir(r.passwordFile)
if err := os.MkdirAll(dir, 0700); err != nil {
return fmt.Errorf("creating password dir: %w", err)
}
// Generate 32 random bytes, base64url-encode
b := make([]byte, 32)
if _, err := rand.Read(b); err != nil {
return fmt.Errorf("generating random bytes: %w", err)
}
password := base64.URLEncoding.EncodeToString(b)
if err := os.WriteFile(r.passwordFile, []byte(password), 0600); err != nil {
return fmt.Errorf("writing password file: %w", err)
}
r.logger.Printf("[INFO] [backup] Generated new restic repository password at %s", r.passwordFile)
r.logger.Printf("[WARN] [backup] Save this password externally — losing it means losing access to ALL backups")
return nil
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}