feat: Hub monitoring takeover — event system, dead man's switch, notifications (v0.3.0)

Replace external Healthchecks.io with Hub-native monitoring. New events
table + /api/v1/event endpoint for structured events from controllers.
Staleness checker (60s) detects unresponsive nodes. Backup deadline
checker (daily 05:00) catches missed backups. Notification dispatcher
sends operator (English) + customer (Hungarian) emails via Resend with
per-event cooldowns. Event timeline on customer page, dashboard badges.
Config form deprecates Monitoring UUIDs section.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-20 18:53:24 +01:00
parent b4cb92e09f
commit 3217cb4751
16 changed files with 1319 additions and 64 deletions
+15
View File
@@ -40,6 +40,13 @@ func New(store *store.Store, passwordHash, apiKey string, staleThreshold time.Du
b, _ := json.Marshal(v)
return template.JS(b)
},
"add": func(a, b int) int { return a + b },
"mapGet": func(m map[string]int, key string) int {
if m == nil {
return 0
}
return m[key]
},
}
tmpl := template.Must(template.New("").Funcs(funcMap).ParseFS(templateFS, "templates/*.html"))
@@ -232,6 +239,8 @@ func (s *Server) handleDashboard(w http.ResponseWriter, r *http.Request) {
store.CustomerSummary
OverallStatus string // "ok", "warn", "down", "pending"
BackupAge string
EventErrors int
EventWarnings int
}
// Build map of report customers keyed by ID
@@ -266,6 +275,12 @@ func (s *Server) handleDashboard(w http.ResponseWriter, r *http.Request) {
dc.BackupAge = ""
}
// Event counts (last 24h)
if counts, err := s.store.CountEventsBySeverity(c.CustomerID, time.Now().Add(-24*time.Hour)); err == nil {
dc.EventErrors = counts["error"]
dc.EventWarnings = counts["warning"]
}
data = append(data, dc)
}