feat: Hub monitoring takeover — event system, dead man's switch, notifications (v0.3.0)
Replace external Healthchecks.io with Hub-native monitoring. New events table + /api/v1/event endpoint for structured events from controllers. Staleness checker (60s) detects unresponsive nodes. Backup deadline checker (daily 05:00) catches missed backups. Notification dispatcher sends operator (English) + customer (Hungarian) emails via Resend with per-event cooldowns. Event timeline on customer page, dashboard badges. Config form deprecates Monitoring UUIDs section. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+149
-11
@@ -12,6 +12,7 @@ import (
|
||||
"time"
|
||||
|
||||
"gitea.dooplex.hu/admin/felhom-hub/internal/configgen"
|
||||
"gitea.dooplex.hu/admin/felhom-hub/internal/notify"
|
||||
"gitea.dooplex.hu/admin/felhom-hub/internal/store"
|
||||
)
|
||||
|
||||
@@ -29,6 +30,7 @@ type Handler struct {
|
||||
logger *log.Logger
|
||||
httpClient *http.Client
|
||||
templateProvider ConfigTemplateProvider
|
||||
dispatcher *notify.Dispatcher
|
||||
}
|
||||
|
||||
// New creates a new API handler.
|
||||
@@ -44,23 +46,40 @@ func New(store *store.Store, apiKey, resendAPIKey, fromEmail string, templatePro
|
||||
}
|
||||
}
|
||||
|
||||
// SetDispatcher sets the notification dispatcher for event-triggered emails.
|
||||
func (h *Handler) SetDispatcher(d *notify.Dispatcher) {
|
||||
h.dispatcher = d
|
||||
}
|
||||
|
||||
// checkAuth verifies the Bearer token against the global API key or a per-customer API key.
|
||||
// Returns true if authorized.
|
||||
func (h *Handler) checkAuth(r *http.Request) bool {
|
||||
_, _, ok := h.checkAuthCustomer(r)
|
||||
return ok
|
||||
}
|
||||
|
||||
// checkAuthCustomer verifies the Bearer token and returns the authenticated customer identity.
|
||||
// For per-customer keys: returns (customerID, false, true).
|
||||
// For global key: returns ("", true, true) — caller must allow any customer_id.
|
||||
// On failure: returns ("", false, false).
|
||||
func (h *Handler) checkAuthCustomer(r *http.Request) (customerID string, isGlobal bool, ok bool) {
|
||||
auth := r.Header.Get("Authorization")
|
||||
if !strings.HasPrefix(auth, "Bearer ") {
|
||||
return false
|
||||
return "", false, false
|
||||
}
|
||||
token := strings.TrimPrefix(auth, "Bearer ")
|
||||
|
||||
// Check global key first
|
||||
if h.apiKey != "" && subtle.ConstantTimeCompare([]byte(token), []byte(h.apiKey)) == 1 {
|
||||
return true
|
||||
return "", true, true
|
||||
}
|
||||
|
||||
// Check per-customer key
|
||||
cfg, err := h.store.GetCustomerConfigByAPIKey(token)
|
||||
return err == nil && cfg != nil
|
||||
if err != nil || cfg == nil {
|
||||
return "", false, false
|
||||
}
|
||||
return cfg.CustomerID, false, true
|
||||
}
|
||||
|
||||
// ServeHTTP routes API requests.
|
||||
@@ -70,6 +89,8 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Method == http.MethodPost && path == "/report":
|
||||
h.handleReport(w, r)
|
||||
case r.Method == http.MethodPost && path == "/event":
|
||||
h.handleEvent(w, r)
|
||||
case r.Method == http.MethodPost && path == "/notify":
|
||||
h.handleNotify(w, r)
|
||||
case r.Method == http.MethodPost && path == "/infra-backup":
|
||||
@@ -97,7 +118,8 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
func (h *Handler) handleReport(w http.ResponseWriter, r *http.Request) {
|
||||
if !h.checkAuth(r) {
|
||||
authCustomerID, isGlobal, ok := h.checkAuthCustomer(r)
|
||||
if !ok {
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
@@ -117,6 +139,12 @@ func (h *Handler) handleReport(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Validate customer_id matches authenticated customer (unless global key)
|
||||
if !isGlobal && authCustomerID != payload.CustomerID {
|
||||
http.Error(w, "Forbidden: customer_id mismatch", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.store.SaveReport(payload.CustomerID, body); err != nil {
|
||||
h.logger.Printf("[ERROR] Failed to save report from %s: %v", payload.CustomerID, err)
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
@@ -128,6 +156,114 @@ func (h *Handler) handleReport(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte(`{"status":"ok"}`))
|
||||
}
|
||||
|
||||
// allowedEventTypes lists all valid event_type values the Hub accepts.
|
||||
var allowedEventTypes = map[string]bool{
|
||||
// Controller-pushed events
|
||||
"controller_started": true,
|
||||
"controller_updated": true,
|
||||
"backup_completed": true,
|
||||
"backup_failed": true,
|
||||
"db_dump_completed": true,
|
||||
"db_dump_failed": true,
|
||||
"backup_integrity_ok": true,
|
||||
"backup_integrity_failed": true,
|
||||
"crossdrive_completed": true,
|
||||
"crossdrive_failed": true,
|
||||
"storage_disconnected": true,
|
||||
"storage_reconnected": true,
|
||||
"disk_warning": true,
|
||||
"disk_critical": true,
|
||||
"health_degraded": true,
|
||||
"health_critical": true,
|
||||
"health_recovered": true,
|
||||
"app_deployed": true,
|
||||
"app_removed": true,
|
||||
"disaster_recovery_started": true,
|
||||
"disaster_recovery_completed": true,
|
||||
// Hub-generated events
|
||||
"node_stale": true,
|
||||
"node_down": true,
|
||||
"node_recovered": true,
|
||||
"expected_backup_missed": true,
|
||||
"expected_dbdump_missed": true,
|
||||
// Special
|
||||
"test": true,
|
||||
}
|
||||
|
||||
// handleEvent processes structured events from controllers (new endpoint, replaces /notify for updated controllers).
|
||||
func (h *Handler) handleEvent(w http.ResponseWriter, r *http.Request) {
|
||||
authCustomerID, isGlobal, ok := h.checkAuthCustomer(r)
|
||||
if !ok {
|
||||
http.Error(w, "Unauthorized", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(r.Body, 1<<20))
|
||||
if err != nil {
|
||||
http.Error(w, "Bad request", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
CustomerID string `json:"customer_id"`
|
||||
EventType string `json:"event_type"`
|
||||
Severity string `json:"severity"`
|
||||
Message string `json:"message"`
|
||||
Details json.RawMessage `json:"details"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &payload); err != nil {
|
||||
http.Error(w, "Invalid JSON", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if payload.CustomerID == "" || payload.EventType == "" {
|
||||
http.Error(w, "customer_id and event_type are required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Validate customer_id matches authenticated customer (unless global key)
|
||||
if !isGlobal && authCustomerID != payload.CustomerID {
|
||||
http.Error(w, "Forbidden: customer_id mismatch", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
// Validate event_type
|
||||
if !allowedEventTypes[payload.EventType] {
|
||||
http.Error(w, fmt.Sprintf("Invalid event_type: %s", payload.EventType), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Validate/default severity
|
||||
switch payload.Severity {
|
||||
case "info", "warning", "error":
|
||||
default:
|
||||
payload.Severity = "info"
|
||||
}
|
||||
|
||||
// Store details as JSON string
|
||||
detailsStr := "{}"
|
||||
if len(payload.Details) > 0 && string(payload.Details) != "null" {
|
||||
detailsStr = string(payload.Details)
|
||||
}
|
||||
|
||||
_, err = h.store.SaveEvent(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, detailsStr, "controller")
|
||||
if err != nil {
|
||||
h.logger.Printf("[ERROR] Failed to save event from %s: %v", payload.CustomerID, err)
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
h.logger.Printf("[INFO] Event from %s: %s (%s) — %s", payload.CustomerID, payload.EventType, payload.Severity, payload.Message)
|
||||
|
||||
// Dispatch notifications (non-blocking)
|
||||
if h.dispatcher != nil {
|
||||
go h.dispatcher.ProcessEvent(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, detailsStr, "controller")
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"ok":true}`))
|
||||
}
|
||||
|
||||
func (h *Handler) handleCustomers(w http.ResponseWriter, r *http.Request) {
|
||||
customers, err := h.store.GetCustomers()
|
||||
if err != nil {
|
||||
@@ -258,7 +394,7 @@ func (h *Handler) handleNotify(w http.ResponseWriter, r *http.Request) {
|
||||
// Check if customer is blocked
|
||||
if h.store.IsCustomerBlocked(payload.CustomerID) {
|
||||
h.logger.Printf("[INFO] Notification suppressed for blocked customer %s", payload.CustomerID)
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "skipped", "customer blocked")
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "skipped", "customer blocked", "customer")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok","sent":false,"reason":"blocked"}`))
|
||||
return
|
||||
@@ -275,7 +411,7 @@ func (h *Handler) handleNotify(w http.ResponseWriter, r *http.Request) {
|
||||
// Check if customer has email configured and event type is enabled
|
||||
if prefs == nil || prefs.Email == "" {
|
||||
h.logger.Printf("[INFO] No email configured for %s, skipping notification", payload.CustomerID)
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "skipped", "no email configured")
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "skipped", "no email configured", "customer")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok","sent":false,"reason":"no_email"}`))
|
||||
return
|
||||
@@ -291,7 +427,7 @@ func (h *Handler) handleNotify(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
if !eventEnabled {
|
||||
h.logger.Printf("[INFO] Event %s not enabled for %s, skipping", payload.EventType, payload.CustomerID)
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "skipped", "event not enabled")
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "skipped", "event not enabled", "customer")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok","sent":false,"reason":"event_disabled"}`))
|
||||
return
|
||||
@@ -300,7 +436,7 @@ func (h *Handler) handleNotify(w http.ResponseWriter, r *http.Request) {
|
||||
// Send email via Resend API
|
||||
if h.resendAPIKey == "" {
|
||||
h.logger.Printf("[WARN] Resend API key not configured, cannot send notification email")
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "skipped", "resend api key not configured")
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "skipped", "resend api key not configured", "customer")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok","sent":false,"reason":"no_api_key"}`))
|
||||
return
|
||||
@@ -310,13 +446,13 @@ func (h *Handler) handleNotify(w http.ResponseWriter, r *http.Request) {
|
||||
sendErr := h.sendResendEmail(prefs.Email, subject, emailBody)
|
||||
if sendErr != nil {
|
||||
h.logger.Printf("[ERROR] Failed to send notification email to %s: %v", prefs.Email, sendErr)
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "failed", sendErr.Error())
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "failed", sendErr.Error(), "customer")
|
||||
http.Error(w, "Failed to send email", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
h.logger.Printf("[INFO] Notification email sent to %s for %s/%s", prefs.Email, payload.CustomerID, payload.EventType)
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "sent", "")
|
||||
h.store.LogNotification(payload.CustomerID, payload.EventType, payload.Severity, payload.Message, "sent", "", "customer")
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok","sent":true}`))
|
||||
@@ -339,13 +475,14 @@ func (h *Handler) handleSavePreferences(w http.ResponseWriter, r *http.Request)
|
||||
CustomerID string `json:"customer_id"`
|
||||
Email string `json:"email"`
|
||||
EnabledEvents []string `json:"enabled_events"`
|
||||
CooldownHours int `json:"cooldown_hours"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &payload); err != nil || payload.CustomerID == "" {
|
||||
http.Error(w, "Invalid payload: customer_id required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.store.SaveNotificationPrefs(payload.CustomerID, payload.Email, payload.EnabledEvents); err != nil {
|
||||
if err := h.store.SaveNotificationPrefs(payload.CustomerID, payload.Email, payload.EnabledEvents, payload.CooldownHours); err != nil {
|
||||
h.logger.Printf("[ERROR] Failed to save notification prefs for %s: %v", payload.CustomerID, err)
|
||||
http.Error(w, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
@@ -503,6 +640,7 @@ func formatNotificationEmail(customerID, eventType, severity, message, details s
|
||||
severityLabel := map[string]string{
|
||||
"info": "Információ",
|
||||
"warning": "Figyelmeztetés",
|
||||
"error": "Hiba",
|
||||
"critical": "Kritikus",
|
||||
}
|
||||
label := severityLabel[severity]
|
||||
|
||||
Reference in New Issue
Block a user