Files
deploy-felhom-compose/controller/internal/backup/backup.go
T
admin 37ff296a0d v0.4.5: Add dedicated Backup page (Biztonsági mentés)
New /backups page with full backup system visibility:
- Status overview cards (local/remote backup, DB count, repo size)
- Schedule section with next-run times and retention policy
- Database table with type, size, validation (table count), status
- Snapshot history table with per-snapshot stats
- Repository info card with paths, integrity status, remote placeholder
- "Mentés most" button with auto-refresh polling
- Empty state when backup not configured

Backend: SnapshotRecord history (ring buffer), DumpValidation,
ListDumpFiles, ListSnapshots, GetFullStatus, restic check tracking.
Server accepts scheduler for next-run time calculation.

Sidebar nav updated with 3rd item, dashboard backup card title clickable.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 07:43:24 +01:00

418 lines
11 KiB
Go

package backup
import (
"context"
"fmt"
"log"
"strings"
"sync"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
)
// Manager orchestrates database dumps and restic backups.
type Manager struct {
cfg *config.Config
restic *ResticManager
logger *log.Logger
pinger *monitor.Pinger
mu sync.Mutex
lastDBDump *DBDumpStatus
lastBackup *BackupStatus
running bool
snapshotHistory []SnapshotRecord // ring buffer, last 20 entries
lastCheckTime time.Time
lastCheckOK bool
}
// SnapshotRecord combines restic snapshot metadata with our run stats.
type SnapshotRecord struct {
SnapshotID string `json:"snapshot_id"`
Time time.Time `json:"time"`
FilesNew int `json:"files_new"`
FilesChanged int `json:"files_changed"`
DataAdded string `json:"data_added"`
Duration time.Duration `json:"duration"`
Success bool `json:"success"`
HasStats bool `json:"has_stats"` // false for historical entries loaded from restic
}
// FullBackupStatus contains everything the backup page needs.
type FullBackupStatus struct {
Enabled bool
Running bool
// DB Dumps
LastDBDump *DBDumpStatus
DumpFiles []DumpFileInfo
DiscoveredDBs []DiscoveredDB
// Restic
LastBackup *BackupStatus
SnapshotHistory []SnapshotRecord
RepoStats *RepoStats
// Schedule
DBDumpSchedule string
ResticSchedule string
PruneSchedule string
NextDBDump time.Time
NextBackup time.Time
Retention config.RetentionConfig
// Repository health
RepoPath string
BackupPaths []string
LastCheckTime time.Time
LastCheckOK bool
// Remote (placeholder)
RemoteEnabled bool
}
// DBDumpStatus holds the last DB dump result.
type DBDumpStatus struct {
LastRun time.Time
Results []DumpResult
Success bool
Duration time.Duration
}
// BackupStatus holds the last backup result.
type BackupStatus struct {
LastRun time.Time
Snapshot *SnapshotResult
Success bool
Duration time.Duration
RepoStats *RepoStats
}
// NewManager creates a new backup manager.
func NewManager(cfg *config.Config, pinger *monitor.Pinger, logger *log.Logger) *Manager {
return &Manager{
cfg: cfg,
restic: NewResticManager(cfg, logger),
logger: logger,
pinger: pinger,
}
}
// RunDBDumps discovers and dumps all databases.
func (m *Manager) RunDBDumps(ctx context.Context) error {
start := time.Now()
m.logger.Printf("[INFO] Starting database dump run")
dbs, err := DiscoverDatabases(ctx, m.logger)
if err != nil {
m.logger.Printf("[ERROR] Database discovery failed: %v", err)
return err
}
if len(dbs) == 0 {
m.logger.Printf("[INFO] No database containers found")
m.mu.Lock()
m.lastDBDump = &DBDumpStatus{
LastRun: time.Now(),
Success: true,
Duration: time.Since(start),
}
m.mu.Unlock()
return nil
}
m.logger.Printf("[INFO] Discovered %d database(s): %s", len(dbs), dbNames(dbs))
results := DumpAll(ctx, dbs, m.cfg.Paths.DBDumpDir, m.logger)
// Check results
allOK := true
var summary []string
var totalSize int64
for _, r := range results {
if r.Error != nil {
allOK = false
summary = append(summary, fmt.Sprintf("FAIL %s: %v", r.DB.ContainerName, r.Error))
m.logger.Printf("[ERROR] DB dump failed for %s: %v", r.DB.ContainerName, r.Error)
} else {
totalSize += r.Size
summary = append(summary, fmt.Sprintf("OK %s (%s)", r.DB.ContainerName, formatBytes(r.Size)))
}
}
duration := time.Since(start)
m.mu.Lock()
m.lastDBDump = &DBDumpStatus{
LastRun: time.Now(),
Results: results,
Success: allOK,
Duration: duration,
}
m.mu.Unlock()
// Ping healthcheck
uuid := m.cfg.Monitoring.PingUUIDs.DBDump
body := fmt.Sprintf("DB dump: %d databases, %s total\n%s",
len(results), formatBytes(totalSize), strings.Join(summary, "\n"))
if allOK {
m.pinger.Ping(uuid, body)
m.logger.Printf("[INFO] DB dump completed: %d databases, %s total (%s)",
len(results), formatBytes(totalSize), duration.Round(time.Millisecond))
} else {
m.pinger.Fail(uuid, body)
return fmt.Errorf("some database dumps failed")
}
return nil
}
// RunBackup runs a restic backup snapshot.
func (m *Manager) RunBackup(ctx context.Context) error {
start := time.Now()
m.logger.Printf("[INFO] Starting restic backup")
// Ensure repo is initialized
if err := m.restic.EnsureInitialized(); err != nil {
m.logger.Printf("[ERROR] Restic init failed: %v", err)
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Restic init failed: %v", err))
return err
}
// Backup paths
paths := []string{
m.cfg.Paths.StacksDir,
m.cfg.Paths.DBDumpDir,
"/opt/docker/felhom-controller/controller.yaml",
}
tags := []string{"felhom", m.cfg.Customer.ID}
result, err := m.restic.Snapshot(paths, tags)
if err != nil {
m.logger.Printf("[ERROR] Restic backup failed: %v", err)
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", err))
m.mu.Lock()
m.lastBackup = &BackupStatus{
LastRun: time.Now(),
Success: false,
Duration: time.Since(start),
}
m.mu.Unlock()
return err
}
// Prune check (weekly — Sunday)
if shouldPrune(m.cfg.Backup.PruneSchedule) {
m.logger.Printf("[INFO] Running weekly prune")
if err := m.restic.Prune(m.cfg.Backup.Retention); err != nil {
m.logger.Printf("[WARN] Restic prune failed: %v", err)
}
checkErr := m.restic.Check()
if checkErr != nil {
m.logger.Printf("[WARN] Restic check failed: %v", checkErr)
}
m.mu.Lock()
m.lastCheckTime = time.Now()
m.lastCheckOK = checkErr == nil
m.mu.Unlock()
}
// Get stats
stats, _ := m.restic.Stats()
duration := time.Since(start)
m.mu.Lock()
m.lastBackup = &BackupStatus{
LastRun: time.Now(),
Snapshot: result,
Success: true,
Duration: duration,
RepoStats: stats,
}
// Append to snapshot history
m.appendSnapshotRecord(SnapshotRecord{
SnapshotID: result.SnapshotID,
Time: time.Now(),
FilesNew: result.FilesNew,
FilesChanged: result.FilesChanged,
DataAdded: result.DataAdded,
Duration: duration,
Success: true,
HasStats: true,
})
m.mu.Unlock()
body := fmt.Sprintf("Backup OK\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s",
result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded,
duration.Round(time.Second))
m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body)
m.logger.Printf("[INFO] Restic backup completed: snapshot %s, %d new, %d changed, %s added (%s)",
result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded,
duration.Round(time.Millisecond))
return nil
}
// RunFullBackup runs DB dumps followed by restic backup.
func (m *Manager) RunFullBackup(ctx context.Context) error {
m.mu.Lock()
if m.running {
m.mu.Unlock()
return fmt.Errorf("backup already in progress")
}
m.running = true
m.mu.Unlock()
defer func() {
m.mu.Lock()
m.running = false
m.mu.Unlock()
}()
// Step 1: DB dumps
if err := m.RunDBDumps(ctx); err != nil {
m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway")
}
// Step 2: Restic backup
return m.RunBackup(ctx)
}
// GetStatus returns the current backup status.
func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) {
m.mu.Lock()
defer m.mu.Unlock()
return m.lastDBDump, m.lastBackup
}
// GetRepoStats returns repository statistics.
func (m *Manager) GetRepoStats() (*RepoStats, error) {
return m.restic.Stats()
}
// IsRunning returns whether a backup is currently in progress.
func (m *Manager) IsRunning() bool {
m.mu.Lock()
defer m.mu.Unlock()
return m.running
}
func shouldPrune(schedule string) bool {
loc, err := time.LoadLocation("Europe/Budapest")
if err != nil {
loc = time.UTC
}
now := time.Now().In(loc)
switch strings.ToLower(schedule) {
case "weekly":
return now.Weekday() == time.Sunday
case "daily":
return true
default:
return now.Weekday() == time.Sunday
}
}
// appendSnapshotRecord adds a record to the ring buffer (max 20). Caller must hold m.mu.
func (m *Manager) appendSnapshotRecord(rec SnapshotRecord) {
m.snapshotHistory = append(m.snapshotHistory, rec)
if len(m.snapshotHistory) > 20 {
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
}
}
// LoadSnapshotHistory populates the snapshot history from restic on startup.
func (m *Manager) LoadSnapshotHistory() {
snapshots, err := m.restic.ListSnapshots(20)
if err != nil {
m.logger.Printf("[WARN] Could not load snapshot history: %v", err)
return
}
m.mu.Lock()
defer m.mu.Unlock()
for _, s := range snapshots {
m.snapshotHistory = append(m.snapshotHistory, SnapshotRecord{
SnapshotID: s.ID,
Time: s.Time,
HasStats: false, // historical — no delta stats available
Success: true,
})
}
if len(m.snapshotHistory) > 20 {
m.snapshotHistory = m.snapshotHistory[len(m.snapshotHistory)-20:]
}
m.logger.Printf("[INFO] Loaded %d historical snapshots", len(m.snapshotHistory))
}
// GetFullStatus returns everything the backup page needs.
func (m *Manager) GetFullStatus(nextDBDump, nextBackup time.Time) *FullBackupStatus {
m.mu.Lock()
status := &FullBackupStatus{
Enabled: m.cfg.Backup.Enabled,
Running: m.running,
LastDBDump: m.lastDBDump,
LastBackup: m.lastBackup,
DBDumpSchedule: m.cfg.Backup.DBDumpSchedule,
ResticSchedule: m.cfg.Backup.ResticSchedule,
PruneSchedule: m.cfg.Backup.PruneSchedule,
NextDBDump: nextDBDump,
NextBackup: nextBackup,
Retention: m.cfg.Backup.Retention,
RepoPath: m.cfg.Backup.ResticRepo,
LastCheckTime: m.lastCheckTime,
LastCheckOK: m.lastCheckOK,
}
// Copy snapshot history
status.SnapshotHistory = make([]SnapshotRecord, len(m.snapshotHistory))
copy(status.SnapshotHistory, m.snapshotHistory)
m.mu.Unlock()
// Reverse so newest first
for i, j := 0, len(status.SnapshotHistory)-1; i < j; i, j = i+1, j-1 {
status.SnapshotHistory[i], status.SnapshotHistory[j] = status.SnapshotHistory[j], status.SnapshotHistory[i]
}
// Backup paths
status.BackupPaths = []string{
m.cfg.Paths.StacksDir,
m.cfg.Paths.DBDumpDir,
"/opt/docker/felhom-controller/controller.yaml",
}
// Get repo stats (non-locked)
if stats, err := m.restic.Stats(); err == nil {
status.RepoStats = stats
}
// List dump files from disk
if files, err := ListDumpFiles(m.cfg.Paths.DBDumpDir); err == nil {
status.DumpFiles = files
}
// Discover running DBs
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if dbs, err := DiscoverDatabases(ctx, m.logger); err == nil {
status.DiscoveredDBs = dbs
}
return status
}
func dbNames(dbs []DiscoveredDB) string {
var names []string
for _, db := range dbs {
names = append(names, fmt.Sprintf("%s(%s)", db.ContainerName, db.DBType))
}
return strings.Join(names, ", ")
}