v0.15.5: Disaster recovery — Hub-based infra backup, auto-mount, restore UI
Complete DR implementation (TASK2.md Phases 1-4): - Hub infra-backup push/pull endpoints (controller.yaml, disk layout, stacks) - Fresh-deployment detection pulls config from Hub, auto-mounts drives by UUID - Full-page restore UI with drive status, app table, sequential restore - docker-setup.sh shows DR instructions when customer_id is configured New files: disk_layout.go, restore_scan.go, restore_app_linux.go, restore_drives_linux.go, infra_backup.go, infra_pull.go, handler_restore.go, restore.html Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,8 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
@@ -61,6 +63,76 @@ func main() {
|
||||
logger.Fatalf("[FATAL] Failed to load settings from %s: %v", settingsPath, err)
|
||||
}
|
||||
|
||||
// --- Detect fresh deployment (Phase 2+3: DR restore from Hub) ---
|
||||
var restorePlan *backup.RestorePlan
|
||||
isFreshDeployment := !fileExists(settingsPath)
|
||||
if isFreshDeployment && cfg.Hub.Enabled && cfg.Hub.URL != "" {
|
||||
logger.Println("[INFO] Fresh deployment detected — checking Hub for infra backup")
|
||||
|
||||
ib, pullErr := report.PullInfraBackup(cfg.Hub.URL, cfg.Hub.APIKey, cfg.Customer.ID)
|
||||
if pullErr != nil {
|
||||
logger.Printf("[WARN] Could not reach Hub for infra backup: %v", pullErr)
|
||||
} else if ib != nil {
|
||||
logger.Printf("[INFO] Found infra backup on Hub: %s (%s), %d stacks, synced %s",
|
||||
ib.Domain, ib.CustomerID, len(ib.DeployedStacks), ib.Timestamp)
|
||||
|
||||
// Restore restic passwords
|
||||
restorePasswordsFromHub(ib, cfg, sett, logger)
|
||||
|
||||
// Restore settings.json from Hub backup
|
||||
restoreSettingsFromHub(ib, cfg, logger)
|
||||
|
||||
// Re-load settings (now from restored file)
|
||||
if restoredSett, loadErr := settings.Load(settingsPath, logger); loadErr == nil {
|
||||
sett = restoredSett
|
||||
logger.Println("[INFO] Settings reloaded after Hub restore")
|
||||
}
|
||||
|
||||
// Mount drives using stored disk layout
|
||||
mountCtx, mountCancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
mountedPaths, mountErr := backup.MountDrivesFromLayout(mountCtx, ib.DiskLayout, logger)
|
||||
mountCancel()
|
||||
if mountErr != nil {
|
||||
logger.Printf("[WARN] Drive mounting error: %v", mountErr)
|
||||
} else if len(mountedPaths) > 0 {
|
||||
logger.Printf("[INFO] Mounted %d drives from Hub disk layout: %v", len(mountedPaths), mountedPaths)
|
||||
} else {
|
||||
logger.Println("[INFO] No matching drives found to mount from Hub disk layout")
|
||||
}
|
||||
|
||||
// Phase 3: Scan mounted drives for backup data and build restore plan
|
||||
if len(ib.DeployedStacks) > 0 {
|
||||
// Collect mount paths from disk layout
|
||||
var drivePaths []string
|
||||
for _, dm := range ib.DiskLayout.Mounts {
|
||||
if dm.MountPoint != "" {
|
||||
drivePaths = append(drivePaths, dm.MountPoint)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert report stacks to backup scan format
|
||||
var infraStacks []backup.InfraStackInfo
|
||||
for _, s := range ib.DeployedStacks {
|
||||
infraStacks = append(infraStacks, backup.InfraStackInfo{
|
||||
Name: s.Name,
|
||||
DisplayName: s.DisplayName,
|
||||
HDDPath: s.HDDPath,
|
||||
NeedsHDD: s.NeedsHDD,
|
||||
})
|
||||
}
|
||||
|
||||
restorePlan = backup.ScanDrivesForBackups(drivePaths, infraStacks, logger)
|
||||
restorePlan.CustomerID = ib.CustomerID
|
||||
restorePlan.Domain = ib.Domain
|
||||
restorePlan.Timestamp = ib.Timestamp
|
||||
|
||||
logger.Printf("[INFO] DR restore plan ready: %d apps to restore", len(restorePlan.Apps))
|
||||
}
|
||||
} else {
|
||||
logger.Println("[INFO] No infra backup found on Hub for this customer")
|
||||
}
|
||||
}
|
||||
|
||||
// --- Auto-discover storage paths from deployed apps ---
|
||||
discoveredPaths := discoverHDDPaths(cfg.Paths.StacksDir, logger)
|
||||
sett.AutoDiscoverStoragePaths(discoveredPaths, cfg.Paths.HDDPath, logger)
|
||||
@@ -183,6 +255,12 @@ func main() {
|
||||
return nil
|
||||
})
|
||||
|
||||
// --- Central hub pusher (declared early so backup closure can reference it) ---
|
||||
var hubPusher *report.Pusher
|
||||
if cfg.Hub.URL != "" && cfg.Hub.APIKey != "" {
|
||||
hubPusher = report.NewPusher(&cfg.Hub, logger)
|
||||
}
|
||||
|
||||
// Backup daily jobs
|
||||
if cfg.Backup.Enabled && backupMgr != nil {
|
||||
sched.Daily("db-dump", cfg.Backup.DBDumpSchedule, func(ctx context.Context) error {
|
||||
@@ -209,6 +287,10 @@ func main() {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Push infra backup to Hub after all backup tiers complete
|
||||
if hubPusher != nil && cfg.Hub.Enabled {
|
||||
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
|
||||
}
|
||||
return err
|
||||
})
|
||||
|
||||
@@ -245,10 +327,8 @@ func main() {
|
||||
})
|
||||
}
|
||||
|
||||
// --- Central hub reporting ---
|
||||
var hubPusher *report.Pusher
|
||||
if cfg.Hub.URL != "" && cfg.Hub.APIKey != "" {
|
||||
hubPusher = report.NewPusher(&cfg.Hub, logger)
|
||||
// --- Central hub reporting schedule ---
|
||||
if hubPusher != nil {
|
||||
if cfg.Hub.Enabled {
|
||||
pushInterval, err := time.ParseDuration(cfg.Hub.PushInterval)
|
||||
if err != nil {
|
||||
@@ -305,6 +385,8 @@ func main() {
|
||||
if pushErr != nil {
|
||||
logger.Printf("[WARN] Startup hub report failed after 3 attempts — next scheduled push in %s", cfg.Hub.PushInterval)
|
||||
}
|
||||
// Also push infra backup on startup
|
||||
go pushInfraBackup(cfg, sett, stackProv, hubPusher, logger)
|
||||
} else {
|
||||
// Send a minimal "disabled" notification so hub knows reporting is intentionally off
|
||||
r := &report.Report{
|
||||
@@ -356,6 +438,12 @@ func main() {
|
||||
// --- Initialize web server ---
|
||||
webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, crossDriveRunner, sched, sett, alertMgr, notifier, logger, Version)
|
||||
|
||||
// Phase 3: Set DR restore mode if a restore plan was built
|
||||
if restorePlan != nil && len(restorePlan.Apps) > 0 {
|
||||
webServer.SetRestoreState(restorePlan)
|
||||
logger.Println("[INFO] DR restore mode activated — all web routes redirect to /restore")
|
||||
}
|
||||
|
||||
// --- Build HTTP mux ---
|
||||
mux := http.NewServeMux()
|
||||
|
||||
@@ -491,6 +579,84 @@ func (a *stackAdapter) GetStackHDDPath(name string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// pushInfraBackup builds and sends the infrastructure snapshot to the Hub.
|
||||
func pushInfraBackup(cfg *config.Config, sett *settings.Settings,
|
||||
stackProv *stackAdapter, pusher *report.Pusher, logger *log.Logger) {
|
||||
|
||||
ib, err := report.BuildInfraBackup(
|
||||
cfg.Customer.ID, cfg.Customer.Domain, Version,
|
||||
"/opt/docker/felhom-controller/controller.yaml",
|
||||
filepath.Join(cfg.Paths.DataDir, "settings.json"),
|
||||
cfg.Backup.ResticPasswordFile,
|
||||
cfg.Paths.SystemDataPath,
|
||||
sett, stackProv,
|
||||
)
|
||||
if err != nil {
|
||||
logger.Printf("[WARN] Failed to build infra backup: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
data, err := json.Marshal(ib)
|
||||
if err != nil {
|
||||
logger.Printf("[WARN] Failed to marshal infra backup: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := pusher.PushInfraBackup(data); err != nil {
|
||||
logger.Printf("[WARN] Failed to push infra backup to Hub: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// fileExists returns true if the path exists (file or directory).
|
||||
func fileExists(path string) bool {
|
||||
_, err := os.Stat(path)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// restorePasswordsFromHub restores restic passwords from a Hub infra backup.
|
||||
func restorePasswordsFromHub(ib *report.InfraBackup, cfg *config.Config,
|
||||
sett *settings.Settings, logger *log.Logger) {
|
||||
|
||||
if ib.ResticPassword != "" {
|
||||
decoded, err := base64.StdEncoding.DecodeString(ib.ResticPassword)
|
||||
if err == nil && len(decoded) > 0 {
|
||||
dir := filepath.Dir(cfg.Backup.ResticPasswordFile)
|
||||
os.MkdirAll(dir, 0700)
|
||||
if err := os.WriteFile(cfg.Backup.ResticPasswordFile, decoded, 0600); err == nil {
|
||||
logger.Println("[INFO] Primary restic password restored from Hub")
|
||||
} else {
|
||||
logger.Printf("[WARN] Failed to write restic password file: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ib.CrossDrivePassword != "" {
|
||||
if err := sett.SetCrossDriveResticPassword(ib.CrossDrivePassword); err == nil {
|
||||
logger.Println("[INFO] Cross-drive restic password restored from Hub")
|
||||
} else {
|
||||
logger.Printf("[WARN] Failed to set cross-drive password: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// restoreSettingsFromHub restores settings.json from a Hub infra backup.
|
||||
func restoreSettingsFromHub(ib *report.InfraBackup, cfg *config.Config, logger *log.Logger) {
|
||||
if ib.SettingsJSONB64 == "" {
|
||||
return
|
||||
}
|
||||
decoded, err := base64.StdEncoding.DecodeString(ib.SettingsJSONB64)
|
||||
if err != nil {
|
||||
logger.Printf("[WARN] Failed to decode settings from Hub: %v", err)
|
||||
return
|
||||
}
|
||||
settingsPath := filepath.Join(cfg.Paths.DataDir, "settings.json")
|
||||
if err := os.WriteFile(settingsPath, decoded, 0600); err != nil {
|
||||
logger.Printf("[WARN] Failed to write restored settings.json: %v", err)
|
||||
} else {
|
||||
logger.Println("[INFO] Settings restored from Hub backup")
|
||||
}
|
||||
}
|
||||
|
||||
// discoverHDDPaths scans deployed apps' app.yaml for HDD_PATH env values.
|
||||
func discoverHDDPaths(stacksDir string, logger *log.Logger) []string {
|
||||
entries, err := os.ReadDir(stacksDir)
|
||||
|
||||
Reference in New Issue
Block a user