From 41f2d2b5dafe0b9405c425270657ac32507bad59 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Tue, 9 Jun 2026 13:56:18 +0200 Subject: [PATCH] hub v0.7.3: ingest agent backups + restore_tests (slice 6 Phase A) Accept + persist the now-populated host-report backups/restore_tests. Mirror structs in hostReportPayload; persisted via report_json (no schema change); a FAILED restore-test is logged prominently (loudest DR signal). Shared golden updated byte-identical with felhom-agent; bidirectional key-set tests added. Build/deploy deferred (backward-compatible). Co-Authored-By: Claude Opus 4.8 (1M context) --- REPORT.md | 49 +++++++---------- hub/CHANGELOG.md | 23 ++++++++ hub/internal/api/handler.go | 51 +++++++++++++++++- hub/internal/api/host_test.go | 52 +++++++++++++++++++ .../api/testdata/host-report.golden.json | 27 +++++++++- 5 files changed, 169 insertions(+), 33 deletions(-) diff --git a/REPORT.md b/REPORT.md index 1cc5216..2192fea 100644 --- a/REPORT.md +++ b/REPORT.md @@ -4,44 +4,35 @@ --- -# REPORT — Hub: ingest agent `storage_targets` (v0.7.2) (2026-06-09) +# REPORT — Hub: ingest agent backups + restore_tests (v0.7.3) (2026-06-09) ## Outcome -**Code committed + pushed (changelogged as `v0.7.2`); image build/deploy deferred.** The -felhom-agent slice-5 Phase A work populates the host-report's `storage_targets` (previously a -defined-but-empty stub). This change is the hub half: accept and persist them. Deliberately -minimal — the authoritative storage manifest (desired class/role/policy/creds) is hub-owned and -arrives at slice 10; this slice only mirrors what the agent observes. - -> **Deploy note:** the live hub is **v0.6.3**, behind the changelog (v0.7.0/v0.7.1 were -> committed but appear undeployed). Building+deploying v0.7.2 would also ship those intervening -> versions, so the deploy is deferred to an operator decision — flagged, not silently done. The -> change is backward-compatible (an agent sending `storage_targets: []` is accepted unchanged), -> so the live hub keeps ingesting host-reports fine until then. +**Code committed + pushed (changelogged as `v0.7.3`); image build/deploy deferred to an +operator decision.** The felhom-agent slice-6 Phase A work populates the host-report's +`backups` + `restore_tests`. This change is the hub half: accept + persist them. Minimal — +the authoritative backup policy is hub-owned (slice 10); this mirrors what the agent reports. ## What landed (`hub/internal/api/handler.go`, `host_test.go`, golden) -- `hostReportPayload` now parses `storage_targets` via a `hostStorageTarget` mirror struct that - matches the agent's `hub.StorageTarget` wire contract field-for-field (name/type/durable_id/ - state/reachable/usage/content/mount/class_hint/role/`thin_pool`/`smart`). -- Persistence: the targets are stored verbatim in the existing `report_json` column (no schema - change / no migration). The handler counts them and logs a `[WARN]` listing disconnected - targets — the storage analog of host-down visibility. -- The shared `testdata/host-report.golden.json` now carries two populated targets (an lvmthin - with `thin_pool`, a usb) and is **byte-identical** with felhom-agent's copy. -- Tests: `TestHostStorageTarget_GoldenContract` is the hub half of the bidirectional key-set - test (round-trips the golden through the mirror, asserts exact key match); - `TestHostReport_GoldenContract` also asserts the targets persist + parse back. `go test - ./internal/api/ ./internal/store/` is green. +- `hostReportPayload` gains `hostBackup` / `hostRestoreTest` mirror structs matching the + agent's `hub.Backup` / `hub.RestoreTest` field-for-field. +- Persistence via the existing `report_json` column (no schema change). The handler logs a + **FAILED restore-test prominently** (`[WARN]` — the loudest DR signal) and a failed backup; + the host-report info line counts backups + restore-tests. +- The shared `testdata/host-report.golden.json` now carries a populated `backups[0]` / + `restore_tests[0]`, **byte-identical** with felhom-agent's copy. +- `TestHostBackup_GoldenContract` / `TestHostRestoreTest_GoldenContract` are the hub half of + the bidirectional key-set test. `go test ./internal/api/ ./internal/store/` is green. ## Backward compatibility -An older agent that sends `storage_targets: []` (or omits the field) is accepted unchanged. -The legacy controller report path is untouched (frozen until the slice-10 cutover). +An agent that omits/empties `backups`/`restore_tests` is accepted unchanged. The legacy +controller report path is untouched (frozen until the slice-10 cutover). ## Deploy -Standard hub flow (build server 192.168.0.180): `./build.sh v0.7.2 --push` then deploy. If the -hub deployment is ArgoCD-managed, update the image tag via the managed path rather than a bare -`kubectl set image` (drift-correction would revert it). +> Per the GitOps flow (`CLAUDE.md`): build+push `gitea.dooplex.hu/admin/felhom-hub:v0.7.3`, +> bump `manifests/hub.yaml`, commit, then sync the `felhom` ArgoCD app. **Deferred** at this +> checkpoint — the change is backward-compatible, so the live hub (v0.7.2) keeps ingesting +> host-reports fine until then. diff --git a/hub/CHANGELOG.md b/hub/CHANGELOG.md index d813330..7ab5869 100644 --- a/hub/CHANGELOG.md +++ b/hub/CHANGELOG.md @@ -1,5 +1,28 @@ # Felhom Hub — Changelog +## v0.7.3 — ingest agent backups + restore_tests (slice 6 Phase A) (2026-06-09) + +The agent's slice-6 work populates the host-report's `backups` + `restore_tests` (the +self-restore-test result). This is the hub half: accept + persist them. Minimal — the rich +backup policy (schedule/retention/target selection) is hub-manifest-owned and lands at +slice 10; this slice only mirrors what the agent reports. + +### Added +- **`hostBackup` / `hostRestoreTest`** mirror structs in `hostReportPayload` + (`internal/api/handler.go`) — field-for-field with the agent's `hub.Backup` / + `hub.RestoreTest` wire contract. Persisted verbatim in `report_json` (no new columns — + slice-5 precedent). +- **A FAILED restore-test is logged prominently** (`[WARN]`, the loudest DR signal there is); + a failed backup is logged too. The `host-report` info line now counts backups + restore-tests. +- **`testdata/host-report.golden.json`** updated with a populated `backups[0]` / + `restore_tests[0]`, kept **byte-identical** with felhom-agent's copy. +- **`TestHostBackup_GoldenContract` / `TestHostRestoreTest_GoldenContract`** — the hub half of + the bidirectional key-set test (round-trip the golden through the mirror, assert exact keys). + +### Notes +- Backward-compatible: an agent that omits/empties these is accepted unchanged. The legacy + controller report path is untouched (frozen until slice 10). + ## v0.7.2 — ingest agent storage_targets (slice 5 Phase A) (2026-06-09) The agent's slice-5 work populates the host-report's `storage_targets` (previously empty). diff --git a/hub/internal/api/handler.go b/hub/internal/api/handler.go index 13832d9..8a96810 100644 --- a/hub/internal/api/handler.go +++ b/hub/internal/api/handler.go @@ -258,11 +258,43 @@ type hostReportPayload struct { ControllerVersion string `json:"controller_version"` } `json:"guests"` StorageTargets []hostStorageTarget `json:"storage_targets"` + Backups []hostBackup `json:"backups"` // slice 6 + RestoreTests []hostRestoreTest `json:"restore_tests"` // slice 6 Cloudflared struct { Status string `json:"status"` } `json:"cloudflared"` } +// hostBackup / hostRestoreTest mirror the agent's hub.Backup / hub.RestoreTest wire +// contract field-for-field (slice 6, doc 03 §8). DUPLICATED contract — the golden stays +// byte-identical with felhom-agent's copy and the key-set tests guard drift. The hub +// persists these via report_json (no new columns this slice) and surfaces a FAILED +// restore-test prominently (the loudest DR signal). The rich backup policy is slice 10. +type hostBackup struct { + TargetID string `json:"target_id"` + VMID int `json:"vmid"` + Archive string `json:"archive"` + Mode string `json:"mode"` + CrashConsistent bool `json:"crash_consistent"` + SizeBytes int64 `json:"size_bytes"` + Success bool `json:"success"` + Error string `json:"error,omitempty"` + StartedAt string `json:"started_at"` + DurationSeconds float64 `json:"duration_seconds"` + UncoveredVolumes []string `json:"uncovered_volumes"` +} + +type hostRestoreTest struct { + SourceArchive string `json:"source_archive"` + SourceTier string `json:"source_tier"` + ScratchVMID int `json:"scratch_vmid"` + Pass bool `json:"pass"` + Verified string `json:"verified"` + Error string `json:"error,omitempty"` + TestedAt string `json:"tested_at"` + DurationSeconds float64 `json:"duration_seconds"` +} + // hostStorageTarget mirrors the agent's hub.StorageTarget wire contract field-for-field. // It is a DUPLICATED contract (no shared types module yet); testdata/host-report.golden.json // must stay byte-identical with felhom-agent's copy and the key-set test guards drift. @@ -398,8 +430,23 @@ func (h *Handler) handleHostReport(w http.ResponseWriter, r *http.Request) { hostID, disconnected, len(rep.StorageTargets)) } - h.logger.Printf("[INFO] host-report from %s (%d guests, %d storage targets, %d bytes)", - hostID, len(rep.Guests), len(rep.StorageTargets), len(body)) + // restore_tests (slice 6): a FAILED self-restore-test is the loudest DR signal there is + // — surface it prominently. A backup whose vzdump failed is also worth a warning. + for _, rt := range rep.RestoreTests { + if !rt.Pass { + h.logger.Printf("[WARN] host %s restore-test FAILED: archive=%s tier=%s scratch=%d err=%q", + hostID, rt.SourceArchive, rt.SourceTier, rt.ScratchVMID, rt.Error) + } + } + for _, bk := range rep.Backups { + if !bk.Success { + h.logger.Printf("[WARN] host %s backup FAILED: target=%s vmid=%d err=%q", + hostID, bk.TargetID, bk.VMID, bk.Error) + } + } + + h.logger.Printf("[INFO] host-report from %s (%d guests, %d storage targets, %d backups, %d restore-tests, %d bytes)", + hostID, len(rep.Guests), len(rep.StorageTargets), len(rep.Backups), len(rep.RestoreTests), len(body)) blocked := false if cc, err := h.store.GetCustomerConfig(custID); err == nil && cc != nil && cc.Status == "blocked" { diff --git a/hub/internal/api/host_test.go b/hub/internal/api/host_test.go index c6aa30d..80d65c8 100644 --- a/hub/internal/api/host_test.go +++ b/hub/internal/api/host_test.go @@ -286,6 +286,58 @@ func TestHostStorageTarget_GoldenContract(t *testing.T) { assertSameStorageKeys(t, "storage_targets[0].thin_pool", goldenKeys["thin_pool"], mirrorKeys["thin_pool"]) } +func TestHostBackup_GoldenContract(t *testing.T) { + raw, err := os.ReadFile("testdata/host-report.golden.json") + if err != nil { + t.Fatal(err) + } + var golden struct { + Backups []json.RawMessage `json:"backups"` + } + if err := json.Unmarshal(raw, &golden); err != nil { + t.Fatal(err) + } + if len(golden.Backups) == 0 { + t.Fatal("golden has no backups to check") + } + var goldenKeys map[string]any + json.Unmarshal(golden.Backups[0], &goldenKeys) + var mirror hostBackup + if err := json.Unmarshal(golden.Backups[0], &mirror); err != nil { + t.Fatalf("golden backup does not parse into the mirror: %v", err) + } + b, _ := json.Marshal(mirror) + var mirrorKeys map[string]any + json.Unmarshal(b, &mirrorKeys) + assertSameStorageKeys(t, "backups[0]", goldenKeys, mirrorKeys) +} + +func TestHostRestoreTest_GoldenContract(t *testing.T) { + raw, err := os.ReadFile("testdata/host-report.golden.json") + if err != nil { + t.Fatal(err) + } + var golden struct { + RestoreTests []json.RawMessage `json:"restore_tests"` + } + if err := json.Unmarshal(raw, &golden); err != nil { + t.Fatal(err) + } + if len(golden.RestoreTests) == 0 { + t.Fatal("golden has no restore_tests to check") + } + var goldenKeys map[string]any + json.Unmarshal(golden.RestoreTests[0], &goldenKeys) + var mirror hostRestoreTest + if err := json.Unmarshal(golden.RestoreTests[0], &mirror); err != nil { + t.Fatalf("golden restore-test does not parse into the mirror: %v", err) + } + b, _ := json.Marshal(mirror) + var mirrorKeys map[string]any + json.Unmarshal(b, &mirrorKeys) + assertSameStorageKeys(t, "restore_tests[0]", goldenKeys, mirrorKeys) +} + func assertSameStorageKeys(t *testing.T, where string, a, b any) { t.Helper() ka, kb := sortedKeys(a), sortedKeys(b) diff --git a/hub/internal/api/testdata/host-report.golden.json b/hub/internal/api/testdata/host-report.golden.json index 73ff263..8778f0a 100644 --- a/hub/internal/api/testdata/host-report.golden.json +++ b/hub/internal/api/testdata/host-report.golden.json @@ -86,8 +86,31 @@ } } ], - "backups": [], - "restore_tests": [], + "backups": [ + { + "target_id": "local", + "vmid": 9001, + "archive": "local:backup/vzdump-lxc-9001-2026_06_09-11_00_00.tar.zst", + "mode": "snapshot", + "crash_consistent": true, + "size_bytes": 524288000, + "success": true, + "started_at": "2026-06-09T11:00:00Z", + "duration_seconds": 42.5, + "uncovered_volumes": ["/mnt/bulk"] + } + ], + "restore_tests": [ + { + "source_archive": "local:backup/vzdump-lxc-9001-2026_06_09-11_00_00.tar.zst", + "source_tier": "local", + "scratch_vmid": 990000, + "pass": true, + "verified": "boot+running", + "tested_at": "2026-06-09T11:05:00Z", + "duration_seconds": 38.2 + } + ], "pbs_snapshots": [], "cloudflared": { "status": "active" }, "audit_tail": []