hub: restore-test "passed with warnings" visibility (v0.7.5)
Phase B (hub half) of the restore-test warning fix. The agent v0.7.0 now passes a restore-test that emitted a benign start advisory (systemd-nesting) and carries the warning text on the wire. - hostRestoreTest gains warnings + warnings_recognized mirror fields (omitempty; absent recognized => false => louder unrecognized path) - ingest logs [INFO] passed WITH WARNINGS (recognized), [WARN] for unrecognized; FAILED still [WARN] - golden restore_tests[0] gains the keys, byte-identical with felhom-agent (sha256 e6999d77...); bidirectional key-set contract test round-trips them - no dashboard widget: no host-domain dashboard surface exists yet (log+persist only, as with pbs_snapshots) -- deferred to slice 10 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -310,6 +310,13 @@ type hostRestoreTest struct {
|
||||
Error string `json:"error,omitempty"`
|
||||
TestedAt string `json:"tested_at"`
|
||||
DurationSeconds float64 `json:"duration_seconds"`
|
||||
// Warnings are the guest-start task's warning line(s) on a PASS (e.g. the systemd-nesting
|
||||
// advisory). The verdict is liveness-only, so a passed restore-test can carry warnings.
|
||||
Warnings []string `json:"warnings,omitempty"`
|
||||
// WarningsRecognized is true iff every warning is the known-benign anchor. Absent ⇒ false,
|
||||
// which is the SAFE default: the hub then treats it as an unrecognized warning (the louder
|
||||
// path), so a missing flag can only over-notice, never hide a real warning.
|
||||
WarningsRecognized bool `json:"warnings_recognized,omitempty"`
|
||||
}
|
||||
|
||||
// hostStorageTarget mirrors the agent's hub.StorageTarget wire contract field-for-field.
|
||||
@@ -448,11 +455,24 @@ func (h *Handler) handleHostReport(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// restore_tests (slice 6): a FAILED self-restore-test is the loudest DR signal there is
|
||||
// — surface it prominently. A backup whose vzdump failed is also worth a warning.
|
||||
// — surface it prominently. A PASS that carried start warnings (e.g. the systemd-nesting
|
||||
// advisory) is surfaced too: INFO when every warning is recognized-benign, escalated to
|
||||
// WARN when an UNRECOGNIZED warning stood out (as loud as a failed PBS verify is for
|
||||
// backups), so a real restore warning can't hide behind a green pass. A backup whose
|
||||
// vzdump failed is also worth a warning.
|
||||
for _, rt := range rep.RestoreTests {
|
||||
if !rt.Pass {
|
||||
switch {
|
||||
case !rt.Pass:
|
||||
h.logger.Printf("[WARN] host %s restore-test FAILED: archive=%s tier=%s scratch=%d err=%q",
|
||||
hostID, rt.SourceArchive, rt.SourceTier, rt.ScratchVMID, rt.Error)
|
||||
case len(rt.Warnings) == 0:
|
||||
// clean pass — nothing to surface here (counted in the summary line below).
|
||||
case rt.WarningsRecognized:
|
||||
h.logger.Printf("[INFO] host %s restore-test passed WITH WARNINGS (recognized): archive=%s tier=%s warnings=%v",
|
||||
hostID, rt.SourceArchive, rt.SourceTier, rt.Warnings)
|
||||
default:
|
||||
h.logger.Printf("[WARN] host %s restore-test passed WITH UNRECOGNIZED WARNINGS: archive=%s tier=%s warnings=%v",
|
||||
hostID, rt.SourceArchive, rt.SourceTier, rt.Warnings)
|
||||
}
|
||||
}
|
||||
for _, bk := range rep.Backups {
|
||||
|
||||
+5
-1
@@ -108,7 +108,11 @@
|
||||
"pass": true,
|
||||
"verified": "boot+running",
|
||||
"tested_at": "2026-06-09T11:05:00Z",
|
||||
"duration_seconds": 38.2
|
||||
"duration_seconds": 38.2,
|
||||
"warnings": [
|
||||
"WARN: Systemd 257 detected. You may need to enable nesting."
|
||||
],
|
||||
"warnings_recognized": true
|
||||
}
|
||||
],
|
||||
"pbs_snapshots": [
|
||||
|
||||
Reference in New Issue
Block a user