feat: Hub monitoring takeover — event system, dead man's switch, notifications (v0.3.0)
Replace external Healthchecks.io with Hub-native monitoring. New events table + /api/v1/event endpoint for structured events from controllers. Staleness checker (60s) detects unresponsive nodes. Backup deadline checker (daily 05:00) catches missed backups. Notification dispatcher sends operator (English) + customer (Hungarian) emails via Resend with per-event cooldowns. Event timeline on customer page, dashboard badges. Config form deprecates Monitoring UUIDs section. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+225
-12
@@ -121,6 +121,33 @@ func (s *Store) migrate() error {
|
||||
// v0.2.1: add status column to customer_configs (idempotent)
|
||||
s.db.Exec("ALTER TABLE customer_configs ADD COLUMN status TEXT NOT NULL DEFAULT 'active'")
|
||||
|
||||
// v0.3.0: events table for hub-native monitoring
|
||||
_, err = s.db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
customer_id TEXT NOT NULL,
|
||||
event_type TEXT NOT NULL,
|
||||
severity TEXT NOT NULL DEFAULT 'info',
|
||||
message TEXT NOT NULL DEFAULT '',
|
||||
details_json TEXT NOT NULL DEFAULT '{}',
|
||||
source TEXT NOT NULL DEFAULT 'controller',
|
||||
created_at DATETIME NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_events_customer_created
|
||||
ON events(customer_id, created_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_events_type
|
||||
ON events(event_type, created_at DESC);
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// v0.3.0: add cooldown_hours to customer_notifications (idempotent)
|
||||
s.db.Exec("ALTER TABLE customer_notifications ADD COLUMN cooldown_hours INTEGER DEFAULT 6")
|
||||
|
||||
// v0.3.0: add channel column to notification_log (idempotent)
|
||||
s.db.Exec("ALTER TABLE notification_log ADD COLUMN channel TEXT NOT NULL DEFAULT 'customer'")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -129,15 +156,17 @@ type NotificationPrefs struct {
|
||||
CustomerID string
|
||||
Email string
|
||||
EnabledEvents []string
|
||||
CooldownHours int
|
||||
}
|
||||
|
||||
// GetNotificationPrefs returns notification preferences for a customer.
|
||||
func (s *Store) GetNotificationPrefs(customerID string) (*NotificationPrefs, error) {
|
||||
var email, eventsJSON string
|
||||
var cooldownHours int
|
||||
err := s.db.QueryRow(
|
||||
"SELECT email, enabled_events FROM customer_notifications WHERE customer_id = ?",
|
||||
"SELECT email, enabled_events, COALESCE(cooldown_hours, 6) FROM customer_notifications WHERE customer_id = ?",
|
||||
customerID,
|
||||
).Scan(&email, &eventsJSON)
|
||||
).Scan(&email, &eventsJSON, &cooldownHours)
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
@@ -150,34 +179,46 @@ func (s *Store) GetNotificationPrefs(customerID string) (*NotificationPrefs, err
|
||||
s.logger.Printf("[WARN] Corrupt enabled_events JSON for %s: %v", customerID, err)
|
||||
}
|
||||
|
||||
if cooldownHours <= 0 {
|
||||
cooldownHours = 6
|
||||
}
|
||||
|
||||
return &NotificationPrefs{
|
||||
CustomerID: customerID,
|
||||
Email: email,
|
||||
EnabledEvents: events,
|
||||
CooldownHours: cooldownHours,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// SaveNotificationPrefs creates or updates notification preferences for a customer.
|
||||
func (s *Store) SaveNotificationPrefs(customerID, email string, enabledEvents []string) error {
|
||||
func (s *Store) SaveNotificationPrefs(customerID, email string, enabledEvents []string, cooldownHours int) error {
|
||||
eventsJSON, _ := json.Marshal(enabledEvents)
|
||||
if cooldownHours <= 0 {
|
||||
cooldownHours = 6
|
||||
}
|
||||
_, err := s.db.Exec(`
|
||||
INSERT INTO customer_notifications (customer_id, email, enabled_events, updated_at)
|
||||
VALUES (?, ?, ?, datetime('now'))
|
||||
INSERT INTO customer_notifications (customer_id, email, enabled_events, cooldown_hours, updated_at)
|
||||
VALUES (?, ?, ?, ?, datetime('now'))
|
||||
ON CONFLICT(customer_id) DO UPDATE SET
|
||||
email = excluded.email,
|
||||
enabled_events = excluded.enabled_events,
|
||||
cooldown_hours = excluded.cooldown_hours,
|
||||
updated_at = datetime('now')`,
|
||||
customerID, email, string(eventsJSON),
|
||||
customerID, email, string(eventsJSON), cooldownHours,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
// LogNotification records a notification attempt.
|
||||
func (s *Store) LogNotification(customerID, eventType, severity, message, status, errorMsg string) error {
|
||||
func (s *Store) LogNotification(customerID, eventType, severity, message, status, errorMsg, channel string) error {
|
||||
if channel == "" {
|
||||
channel = "customer"
|
||||
}
|
||||
_, err := s.db.Exec(`
|
||||
INSERT INTO notification_log (customer_id, event_type, severity, message, status, error_message)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`,
|
||||
customerID, eventType, severity, message, status, errorMsg,
|
||||
INSERT INTO notification_log (customer_id, event_type, severity, message, status, error_message, channel)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
||||
customerID, eventType, severity, message, status, errorMsg, channel,
|
||||
)
|
||||
return err
|
||||
}
|
||||
@@ -189,13 +230,14 @@ type NotificationLogEntry struct {
|
||||
Message string
|
||||
Status string // "sent", "skipped", "failed"
|
||||
ErrorMessage string
|
||||
Channel string // "operator" or "customer"
|
||||
CreatedAt time.Time
|
||||
}
|
||||
|
||||
// GetRecentNotifications returns the most recent notification log entries for a customer.
|
||||
func (s *Store) GetRecentNotifications(customerID string, limit int) ([]NotificationLogEntry, error) {
|
||||
rows, err := s.db.Query(`
|
||||
SELECT event_type, severity, message, status, COALESCE(error_message, ''), created_at
|
||||
SELECT event_type, severity, message, status, COALESCE(error_message, ''), COALESCE(channel, 'customer'), created_at
|
||||
FROM notification_log
|
||||
WHERE customer_id = ?
|
||||
ORDER BY created_at DESC
|
||||
@@ -209,7 +251,7 @@ func (s *Store) GetRecentNotifications(customerID string, limit int) ([]Notifica
|
||||
for rows.Next() {
|
||||
var e NotificationLogEntry
|
||||
var createdAt, errorMsg string
|
||||
if err := rows.Scan(&e.EventType, &e.Severity, &e.Message, &e.Status, &errorMsg, &createdAt); err != nil {
|
||||
if err := rows.Scan(&e.EventType, &e.Severity, &e.Message, &e.Status, &errorMsg, &e.Channel, &createdAt); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
e.CreatedAt = parseSQLiteTime(createdAt)
|
||||
@@ -658,6 +700,177 @@ func (s *Store) UpdateRetrievalPassword(customerID, newPassword string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// --- Event system ---
|
||||
|
||||
// Event represents a single event record.
|
||||
type Event struct {
|
||||
ID int64
|
||||
CustomerID string
|
||||
EventType string
|
||||
Severity string // "info", "warning", "error"
|
||||
Message string
|
||||
DetailsJSON string // raw JSON
|
||||
Source string // "controller" or "hub"
|
||||
CreatedAt time.Time
|
||||
}
|
||||
|
||||
// SaveEvent inserts a new event and returns its ID.
|
||||
func (s *Store) SaveEvent(customerID, eventType, severity, message, detailsJSON, source string) (int64, error) {
|
||||
if detailsJSON == "" {
|
||||
detailsJSON = "{}"
|
||||
}
|
||||
if source == "" {
|
||||
source = "controller"
|
||||
}
|
||||
res, err := s.db.Exec(`
|
||||
INSERT INTO events (customer_id, event_type, severity, message, details_json, source)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`,
|
||||
customerID, eventType, severity, message, detailsJSON, source,
|
||||
)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
// GetRecentEvents returns the most recent events for a customer, newest first.
|
||||
func (s *Store) GetRecentEvents(customerID string, limit int) ([]Event, error) {
|
||||
rows, err := s.db.Query(`
|
||||
SELECT id, customer_id, event_type, severity, message, details_json, source, created_at
|
||||
FROM events
|
||||
WHERE customer_id = ?
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ?`, customerID, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
return scanEvents(rows)
|
||||
}
|
||||
|
||||
// GetEventsByType returns events of a specific type for a customer since a given time.
|
||||
func (s *Store) GetEventsByType(customerID, eventType string, since time.Time) ([]Event, error) {
|
||||
rows, err := s.db.Query(`
|
||||
SELECT id, customer_id, event_type, severity, message, details_json, source, created_at
|
||||
FROM events
|
||||
WHERE customer_id = ? AND event_type = ? AND created_at >= ?
|
||||
ORDER BY created_at DESC`,
|
||||
customerID, eventType, since.UTC().Format("2006-01-02 15:04:05"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
return scanEvents(rows)
|
||||
}
|
||||
|
||||
// GetLatestEventByType returns the most recent event of a given type for a customer.
|
||||
func (s *Store) GetLatestEventByType(customerID, eventType string) (*Event, error) {
|
||||
var e Event
|
||||
var createdAt string
|
||||
err := s.db.QueryRow(`
|
||||
SELECT id, customer_id, event_type, severity, message, details_json, source, created_at
|
||||
FROM events
|
||||
WHERE customer_id = ? AND event_type = ?
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1`, customerID, eventType,
|
||||
).Scan(&e.ID, &e.CustomerID, &e.EventType, &e.Severity, &e.Message, &e.DetailsJSON, &e.Source, &createdAt)
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
e.CreatedAt = parseSQLiteTime(createdAt)
|
||||
return &e, nil
|
||||
}
|
||||
|
||||
// GetAllRecentEvents returns the most recent events across all customers.
|
||||
func (s *Store) GetAllRecentEvents(limit int) ([]Event, error) {
|
||||
rows, err := s.db.Query(`
|
||||
SELECT id, customer_id, event_type, severity, message, details_json, source, created_at
|
||||
FROM events
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ?`, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
return scanEvents(rows)
|
||||
}
|
||||
|
||||
// CountEventsBySeverity returns a count of events per severity for a customer since a given time.
|
||||
func (s *Store) CountEventsBySeverity(customerID string, since time.Time) (map[string]int, error) {
|
||||
rows, err := s.db.Query(`
|
||||
SELECT severity, COUNT(*) FROM events
|
||||
WHERE customer_id = ? AND created_at >= ?
|
||||
GROUP BY severity`,
|
||||
customerID, since.UTC().Format("2006-01-02 15:04:05"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
counts := make(map[string]int)
|
||||
for rows.Next() {
|
||||
var sev string
|
||||
var count int
|
||||
if err := rows.Scan(&sev, &count); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
counts[sev] = count
|
||||
}
|
||||
return counts, rows.Err()
|
||||
}
|
||||
|
||||
// PruneEvents deletes events older than the given number of days.
|
||||
func (s *Store) PruneEvents(maxDays int) (int64, error) {
|
||||
cutoff := time.Now().AddDate(0, 0, -maxDays).UTC().Format("2006-01-02 15:04:05")
|
||||
res, err := s.db.Exec("DELETE FROM events WHERE created_at < ?", cutoff)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return res.RowsAffected()
|
||||
}
|
||||
|
||||
// GetActiveCustomerIDs returns customer IDs from customer_configs where status is 'active'.
|
||||
func (s *Store) GetActiveCustomerIDs() ([]string, error) {
|
||||
rows, err := s.db.Query("SELECT customer_id FROM customer_configs WHERE status = 'active'")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var ids []string
|
||||
for rows.Next() {
|
||||
var id string
|
||||
if err := rows.Scan(&id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ids = append(ids, id)
|
||||
}
|
||||
return ids, rows.Err()
|
||||
}
|
||||
|
||||
// Ping verifies the database is accessible.
|
||||
func (s *Store) Ping() error {
|
||||
var n int
|
||||
return s.db.QueryRow("SELECT 1").Scan(&n)
|
||||
}
|
||||
|
||||
func scanEvents(rows *sql.Rows) ([]Event, error) {
|
||||
var events []Event
|
||||
for rows.Next() {
|
||||
var e Event
|
||||
var createdAt string
|
||||
if err := rows.Scan(&e.ID, &e.CustomerID, &e.EventType, &e.Severity, &e.Message, &e.DetailsJSON, &e.Source, &createdAt); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
e.CreatedAt = parseSQLiteTime(createdAt)
|
||||
events = append(events, e)
|
||||
}
|
||||
return events, rows.Err()
|
||||
}
|
||||
|
||||
// parseSQLiteTime tries multiple formats that modernc.org/sqlite may return.
|
||||
func parseSQLiteTime(s string) time.Time {
|
||||
formats := []string{
|
||||
|
||||
Reference in New Issue
Block a user