hub v0.7.3: ingest agent backups + restore_tests (slice 6 Phase A)

Accept + persist the now-populated host-report backups/restore_tests. Mirror structs in
hostReportPayload; persisted via report_json (no schema change); a FAILED restore-test is
logged prominently (loudest DR signal). Shared golden updated byte-identical with
felhom-agent; bidirectional key-set tests added. Build/deploy deferred (backward-compatible).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-09 13:56:18 +02:00
parent 0c6ec27054
commit 41f2d2b5da
5 changed files with 169 additions and 33 deletions
+49 -2
View File
@@ -258,11 +258,43 @@ type hostReportPayload struct {
ControllerVersion string `json:"controller_version"`
} `json:"guests"`
StorageTargets []hostStorageTarget `json:"storage_targets"`
Backups []hostBackup `json:"backups"` // slice 6
RestoreTests []hostRestoreTest `json:"restore_tests"` // slice 6
Cloudflared struct {
Status string `json:"status"`
} `json:"cloudflared"`
}
// hostBackup / hostRestoreTest mirror the agent's hub.Backup / hub.RestoreTest wire
// contract field-for-field (slice 6, doc 03 §8). DUPLICATED contract — the golden stays
// byte-identical with felhom-agent's copy and the key-set tests guard drift. The hub
// persists these via report_json (no new columns this slice) and surfaces a FAILED
// restore-test prominently (the loudest DR signal). The rich backup policy is slice 10.
type hostBackup struct {
TargetID string `json:"target_id"`
VMID int `json:"vmid"`
Archive string `json:"archive"`
Mode string `json:"mode"`
CrashConsistent bool `json:"crash_consistent"`
SizeBytes int64 `json:"size_bytes"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
StartedAt string `json:"started_at"`
DurationSeconds float64 `json:"duration_seconds"`
UncoveredVolumes []string `json:"uncovered_volumes"`
}
type hostRestoreTest struct {
SourceArchive string `json:"source_archive"`
SourceTier string `json:"source_tier"`
ScratchVMID int `json:"scratch_vmid"`
Pass bool `json:"pass"`
Verified string `json:"verified"`
Error string `json:"error,omitempty"`
TestedAt string `json:"tested_at"`
DurationSeconds float64 `json:"duration_seconds"`
}
// hostStorageTarget mirrors the agent's hub.StorageTarget wire contract field-for-field.
// It is a DUPLICATED contract (no shared types module yet); testdata/host-report.golden.json
// must stay byte-identical with felhom-agent's copy and the key-set test guards drift.
@@ -398,8 +430,23 @@ func (h *Handler) handleHostReport(w http.ResponseWriter, r *http.Request) {
hostID, disconnected, len(rep.StorageTargets))
}
h.logger.Printf("[INFO] host-report from %s (%d guests, %d storage targets, %d bytes)",
hostID, len(rep.Guests), len(rep.StorageTargets), len(body))
// restore_tests (slice 6): a FAILED self-restore-test is the loudest DR signal there is
// — surface it prominently. A backup whose vzdump failed is also worth a warning.
for _, rt := range rep.RestoreTests {
if !rt.Pass {
h.logger.Printf("[WARN] host %s restore-test FAILED: archive=%s tier=%s scratch=%d err=%q",
hostID, rt.SourceArchive, rt.SourceTier, rt.ScratchVMID, rt.Error)
}
}
for _, bk := range rep.Backups {
if !bk.Success {
h.logger.Printf("[WARN] host %s backup FAILED: target=%s vmid=%d err=%q",
hostID, bk.TargetID, bk.VMID, bk.Error)
}
}
h.logger.Printf("[INFO] host-report from %s (%d guests, %d storage targets, %d backups, %d restore-tests, %d bytes)",
hostID, len(rep.Guests), len(rep.StorageTargets), len(rep.Backups), len(rep.RestoreTests), len(body))
blocked := false
if cc, err := h.store.GetCustomerConfig(custID); err == nil && cc != nil && cc.Status == "blocked" {
+52
View File
@@ -286,6 +286,58 @@ func TestHostStorageTarget_GoldenContract(t *testing.T) {
assertSameStorageKeys(t, "storage_targets[0].thin_pool", goldenKeys["thin_pool"], mirrorKeys["thin_pool"])
}
func TestHostBackup_GoldenContract(t *testing.T) {
raw, err := os.ReadFile("testdata/host-report.golden.json")
if err != nil {
t.Fatal(err)
}
var golden struct {
Backups []json.RawMessage `json:"backups"`
}
if err := json.Unmarshal(raw, &golden); err != nil {
t.Fatal(err)
}
if len(golden.Backups) == 0 {
t.Fatal("golden has no backups to check")
}
var goldenKeys map[string]any
json.Unmarshal(golden.Backups[0], &goldenKeys)
var mirror hostBackup
if err := json.Unmarshal(golden.Backups[0], &mirror); err != nil {
t.Fatalf("golden backup does not parse into the mirror: %v", err)
}
b, _ := json.Marshal(mirror)
var mirrorKeys map[string]any
json.Unmarshal(b, &mirrorKeys)
assertSameStorageKeys(t, "backups[0]", goldenKeys, mirrorKeys)
}
func TestHostRestoreTest_GoldenContract(t *testing.T) {
raw, err := os.ReadFile("testdata/host-report.golden.json")
if err != nil {
t.Fatal(err)
}
var golden struct {
RestoreTests []json.RawMessage `json:"restore_tests"`
}
if err := json.Unmarshal(raw, &golden); err != nil {
t.Fatal(err)
}
if len(golden.RestoreTests) == 0 {
t.Fatal("golden has no restore_tests to check")
}
var goldenKeys map[string]any
json.Unmarshal(golden.RestoreTests[0], &goldenKeys)
var mirror hostRestoreTest
if err := json.Unmarshal(golden.RestoreTests[0], &mirror); err != nil {
t.Fatalf("golden restore-test does not parse into the mirror: %v", err)
}
b, _ := json.Marshal(mirror)
var mirrorKeys map[string]any
json.Unmarshal(b, &mirrorKeys)
assertSameStorageKeys(t, "restore_tests[0]", goldenKeys, mirrorKeys)
}
func assertSameStorageKeys(t *testing.T, where string, a, b any) {
t.Helper()
ka, kb := sortedKeys(a), sortedKeys(b)
+25 -2
View File
@@ -86,8 +86,31 @@
}
}
],
"backups": [],
"restore_tests": [],
"backups": [
{
"target_id": "local",
"vmid": 9001,
"archive": "local:backup/vzdump-lxc-9001-2026_06_09-11_00_00.tar.zst",
"mode": "snapshot",
"crash_consistent": true,
"size_bytes": 524288000,
"success": true,
"started_at": "2026-06-09T11:00:00Z",
"duration_seconds": 42.5,
"uncovered_volumes": ["/mnt/bulk"]
}
],
"restore_tests": [
{
"source_archive": "local:backup/vzdump-lxc-9001-2026_06_09-11_00_00.tar.zst",
"source_tier": "local",
"scratch_vmid": 990000,
"pass": true,
"verified": "boot+running",
"tested_at": "2026-06-09T11:05:00Z",
"duration_seconds": 38.2
}
],
"pbs_snapshots": [],
"cloudflared": { "status": "active" },
"audit_tail": []