Files
deploy-felhom-compose/controller/internal/backup/local_infra.go
T
admin 8e61cd7ec4 feat: comprehensive INFO/WARN/ERROR logging across all controller modules
Add structured operational logging at INFO, WARN, and ERROR levels to
every controller module. Standardize custom prefixes ([GEO], [SCHED],
[SYNC]) to use [INFO/WARN/ERROR] [module] format. Fix misleveled logs
(WARN->ERROR for data loss scenarios, WARN->INFO for routine operations).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 19:58:27 +01:00

369 lines
11 KiB
Go

package backup
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
// MaxSchemaVersion is the highest infra backup schema version this controller can read.
const MaxSchemaVersion = 1
// maxLocalHistory is the number of previous backup versions to keep per drive.
const maxLocalHistory = 5
// InfraMetadata is the lightweight metadata file written alongside backup.json.
type InfraMetadata struct {
SchemaVersion int `json:"schema_version"`
Timestamp string `json:"timestamp"`
CustomerID string `json:"customer_id"`
ControllerVersion string `json:"controller_version"`
Checksum string `json:"checksum"` // SHA256 hex of backup.json
}
// WriteLocalInfraBackup writes the infra backup to .felhom-infra-backup/ on each drive.
// Individual drive failures are logged but not returned — the function is best-effort.
func WriteLocalInfraBackup(backupJSON []byte, customerID, controllerVersion, timestamp string, drives []string, logger *log.Logger, debug bool) {
if len(drives) == 0 {
logger.Printf("[DEBUG] No drives configured for local infra backup")
return
}
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: payload size=%d bytes, %d target drive(s): %v", len(backupJSON), len(drives), drives)
}
// Compute checksum of backup data
hash := sha256.Sum256(backupJSON)
checksum := hex.EncodeToString(hash[:])
meta := InfraMetadata{
SchemaVersion: 1,
Timestamp: timestamp,
CustomerID: customerID,
ControllerVersion: controllerVersion,
Checksum: checksum,
}
metaJSON, err := json.MarshalIndent(meta, "", " ")
if err != nil {
logger.Printf("[ERROR] Local infra backup: failed to marshal metadata: %v", err)
return
}
written := 0
for _, drive := range drives {
dir := InfraBackupDir(drive)
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: writing to drive=%s, dir=%s", drive, dir)
}
if err := writeInfraToDir(dir, backupJSON, metaJSON, logger); err != nil {
logger.Printf("[WARN] Local infra backup: failed to write to %s: %v", drive, err)
continue
}
if debug {
logger.Printf("[DEBUG] WriteLocalInfraBackup: write OK to %s", drive)
}
written++
}
logger.Printf("[INFO] Local infra backup written to %d/%d drive(s)", written, len(drives))
}
// writeInfraToDir rotates the current backup into history/ then writes new backup.json and metadata.json.
func writeInfraToDir(dir string, backupData, metaData []byte, logger *log.Logger) error {
if err := os.MkdirAll(dir, 0700); err != nil {
return fmt.Errorf("creating dir: %w", err)
}
// Rotate current backup to history (best-effort)
rotateToHistory(dir, logger)
// Write backup.json atomically
backupPath := filepath.Join(dir, "backup.json")
if err := atomicWrite(backupPath, backupData, 0600); err != nil {
return fmt.Errorf("writing backup.json: %w", err)
}
// Write metadata.json atomically
metaPath := filepath.Join(dir, "metadata.json")
if err := atomicWrite(metaPath, metaData, 0600); err != nil {
return fmt.Errorf("writing metadata.json: %w", err)
}
return nil
}
// rotateToHistory moves the current backup.json + metadata.json into history/{timestamp}-*.
func rotateToHistory(dir string, logger *log.Logger) {
metaPath := filepath.Join(dir, "metadata.json")
backupPath := filepath.Join(dir, "backup.json")
// Read current metadata to get timestamp
metaData, err := os.ReadFile(metaPath)
if err != nil {
return // no existing backup to rotate
}
var meta InfraMetadata
if err := json.Unmarshal(metaData, &meta); err != nil {
return
}
// Parse timestamp, fall back to file mtime
ts := sanitizeTimestamp(meta.Timestamp)
if ts == "" {
if fi, err := os.Stat(metaPath); err == nil {
ts = fi.ModTime().UTC().Format("20060102T150405Z")
} else {
ts = time.Now().UTC().Format("20060102T150405Z")
}
}
histDir := filepath.Join(dir, "history")
if err := os.MkdirAll(histDir, 0700); err != nil {
if logger != nil {
logger.Printf("[WARN] Local infra history: cannot create history dir: %v", err)
}
return
}
// Move files
histBackup := filepath.Join(histDir, ts+"-backup.json")
histMeta := filepath.Join(histDir, ts+"-metadata.json")
// Copy rather than rename to avoid cross-device issues
if data, err := os.ReadFile(backupPath); err == nil {
os.WriteFile(histBackup, data, 0600) //nolint:errcheck
}
os.WriteFile(histMeta, metaData, 0600) //nolint:errcheck
// Prune old history entries
pruneLocalHistory(histDir, maxLocalHistory, logger)
}
// pruneLocalHistory keeps at most maxKeep metadata+backup pairs, deleting the oldest.
func pruneLocalHistory(histDir string, maxKeep int, logger *log.Logger) {
entries, err := os.ReadDir(histDir)
if err != nil {
return
}
// Collect unique timestamps (each has -backup.json and -metadata.json)
timestamps := make(map[string]bool)
for _, e := range entries {
name := e.Name()
if strings.HasSuffix(name, "-metadata.json") {
ts := strings.TrimSuffix(name, "-metadata.json")
timestamps[ts] = true
}
}
if len(timestamps) <= maxKeep {
return
}
// Sort timestamps ascending (oldest first)
sorted := make([]string, 0, len(timestamps))
for ts := range timestamps {
sorted = append(sorted, ts)
}
sort.Strings(sorted)
// Delete oldest entries beyond limit
toDelete := len(sorted) - maxKeep
for i := 0; i < toDelete; i++ {
ts := sorted[i]
os.Remove(filepath.Join(histDir, ts+"-backup.json"))
os.Remove(filepath.Join(histDir, ts+"-metadata.json"))
if logger != nil {
logger.Printf("[DEBUG] Local infra history: pruned old version %s", ts)
}
}
if logger != nil && toDelete > 0 {
logger.Printf("[INFO] [backup] Pruning old backup versions: kept %d, removed %d", maxKeep, toDelete)
}
}
// sanitizeTimestamp converts an RFC3339 timestamp to a filename-safe format.
func sanitizeTimestamp(ts string) string {
t, err := time.Parse(time.RFC3339, ts)
if err != nil {
t, err = time.Parse(time.RFC3339Nano, ts)
if err != nil {
return ""
}
}
return t.UTC().Format("20060102T150405Z")
}
// atomicWrite writes data to a .tmp file then renames to the target path.
func atomicWrite(path string, data []byte, perm os.FileMode) error {
tmp := path + ".tmp"
if err := os.WriteFile(tmp, data, perm); err != nil {
os.Remove(tmp)
return err
}
if err := os.Rename(tmp, path); err != nil {
os.Remove(tmp)
return err
}
return nil
}
// ReadLocalInfraBackup reads and validates an infra backup from a mount point.
// Returns the raw backup JSON, metadata, and any error.
func ReadLocalInfraBackup(mountPath string) ([]byte, *InfraMetadata, error) {
dir := InfraBackupDir(mountPath)
return readInfraBackupFromDir(dir)
}
// ReadLocalInfraBackupFromHistory reads a specific historical version by its timestamp prefix.
func ReadLocalInfraBackupFromHistory(mountPath, historyPrefix string) ([]byte, *InfraMetadata, error) {
histDir := InfraBackupHistoryDir(mountPath)
metaPath := filepath.Join(histDir, historyPrefix+"-metadata.json")
backupPath := filepath.Join(histDir, historyPrefix+"-backup.json")
return readInfraBackupFromFiles(backupPath, metaPath)
}
// LocalBackupVersion holds summary info for a historical backup version found on a drive.
type LocalBackupVersion struct {
Timestamp string `json:"timestamp"`
CustomerID string `json:"customer_id"`
ControllerVersion string `json:"controller_version"`
IntegrityOK bool `json:"integrity_ok"`
Error string `json:"error,omitempty"`
StackCount int `json:"stack_count"`
StackNames []string `json:"stack_names,omitempty"`
DiskCount int `json:"disk_count"`
HistoryFile string `json:"history_file,omitempty"` // empty = current, timestamp prefix for history
}
// ReadLocalInfraHistory reads all historical backup versions from a mount point's history/ directory.
// Returns newest-first. Does NOT include the current backup (use ReadLocalInfraBackup for that).
func ReadLocalInfraHistory(mountPath string) []LocalBackupVersion {
histDir := InfraBackupHistoryDir(mountPath)
entries, err := os.ReadDir(histDir)
if err != nil {
return nil
}
// Collect unique timestamps
var timestamps []string
seen := make(map[string]bool)
for _, e := range entries {
name := e.Name()
if strings.HasSuffix(name, "-metadata.json") {
ts := strings.TrimSuffix(name, "-metadata.json")
if !seen[ts] {
seen[ts] = true
timestamps = append(timestamps, ts)
}
}
}
// Sort descending (newest first)
sort.Sort(sort.Reverse(sort.StringSlice(timestamps)))
var versions []LocalBackupVersion
for _, ts := range timestamps {
v := LocalBackupVersion{HistoryFile: ts}
backupPath := filepath.Join(histDir, ts+"-backup.json")
metaPath := filepath.Join(histDir, ts+"-metadata.json")
backupData, meta, err := readInfraBackupFromFiles(backupPath, metaPath)
if meta != nil {
v.Timestamp = meta.Timestamp
v.CustomerID = meta.CustomerID
v.ControllerVersion = meta.ControllerVersion
}
if err != nil {
v.IntegrityOK = false
v.Error = err.Error()
} else {
v.IntegrityOK = true
ParseBackupCounts(backupData, &v.StackCount, &v.StackNames, &v.DiskCount)
}
versions = append(versions, v)
}
return versions
}
// ParseBackupCounts extracts stack/disk counts from backup JSON (for display purposes).
func ParseBackupCounts(backupJSON []byte, stackCount *int, stackNames *[]string, diskCount *int) {
var parsed struct {
DeployedStacks []struct {
Name string `json:"name"`
DisplayName string `json:"display_name"`
} `json:"deployed_stacks"`
DiskLayout struct {
Mounts []json.RawMessage `json:"mounts"`
} `json:"disk_layout"`
}
if err := json.Unmarshal(backupJSON, &parsed); err != nil {
return
}
*stackCount = len(parsed.DeployedStacks)
*diskCount = len(parsed.DiskLayout.Mounts)
if stackNames != nil {
for _, s := range parsed.DeployedStacks {
name := s.DisplayName
if name == "" {
name = s.Name
}
*stackNames = append(*stackNames, name)
}
}
}
func readInfraBackupFromDir(dir string) ([]byte, *InfraMetadata, error) {
metaPath := filepath.Join(dir, "metadata.json")
backupPath := filepath.Join(dir, "backup.json")
return readInfraBackupFromFiles(backupPath, metaPath)
}
func readInfraBackupFromFiles(backupPath, metaPath string) ([]byte, *InfraMetadata, error) {
// Read metadata
metaData, err := os.ReadFile(metaPath)
if err != nil {
return nil, nil, fmt.Errorf("reading metadata.json: %w", err)
}
var meta InfraMetadata
if err := json.Unmarshal(metaData, &meta); err != nil {
return nil, nil, fmt.Errorf("parsing metadata.json: %w", err)
}
// Check schema version
if meta.SchemaVersion > MaxSchemaVersion {
return nil, &meta, fmt.Errorf("backup schema version %d is newer than supported version %d — upgrade the controller", meta.SchemaVersion, MaxSchemaVersion)
}
// Read backup data
backupData, err := os.ReadFile(backupPath)
if err != nil {
return nil, &meta, fmt.Errorf("reading backup.json: %w", err)
}
// Verify checksum
hash := sha256.Sum256(backupData)
actual := hex.EncodeToString(hash[:])
if actual != meta.Checksum {
return nil, &meta, fmt.Errorf("checksum mismatch: expected %s, got %s", meta.Checksum, actual)
}
return backupData, &meta, nil
}