v0.4.0: monitoring & backup — scheduler, CPU/temp metrics, healthchecks, restic backups
Phase 2 (Monitoring & Health): - Central job scheduler replacing ad-hoc goroutines (internal/scheduler) - CPU usage collector via /proc/stat background sampling (internal/system/cpu_linux.go) - Temperature reading from /sys/class/thermal + /host/sys (Docker mount) - Load average from /proc/loadavg - Healthchecks.io-compatible HTTP pinger (internal/monitor/pinger.go) - System health checks: disk, memory, CPU, temp, Docker, protected containers (internal/monitor/healthcheck.go) Phase 3 (Backups): - Database auto-discovery via docker ps + docker inspect (internal/backup/dbdump.go) - Database dumping via docker exec (pg_dump / mariadb-dump) with atomic writes - Restic backup integration with auto-password generation (internal/backup/restic.go) - Backup orchestrator: DB dumps + restic snapshots + weekly prune (internal/backup/backup.go) - Manual backup trigger via dashboard button and POST /api/backup/run Dashboard UI: - CPU usage bar with load average display - Temperature with colored indicator dot - Backup status card with last run time, DB count, repo stats - "Mentés most" button for manual backup trigger Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -12,9 +12,13 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitea.dooplex.hu/admin/felhom-controller/internal/api"
|
"gitea.dooplex.hu/admin/felhom-controller/internal/api"
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
|
||||||
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/scheduler"
|
||||||
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
|
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
|
||||||
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
|
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
||||||
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
|
"gitea.dooplex.hu/admin/felhom-controller/internal/web"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -61,11 +65,70 @@ func main() {
|
|||||||
syncer.Start()
|
syncer.Start()
|
||||||
defer syncer.Stop()
|
defer syncer.Stop()
|
||||||
|
|
||||||
|
// --- Graceful shutdown context ---
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// --- Start CPU collector ---
|
||||||
|
cpuCollector := system.NewCPUCollector(5 * time.Second)
|
||||||
|
cpuCollector.Start(ctx)
|
||||||
|
defer cpuCollector.Stop()
|
||||||
|
|
||||||
|
// --- Initialize health pinger ---
|
||||||
|
pinger := monitor.NewPinger(&cfg.Monitoring, logger)
|
||||||
|
|
||||||
|
// --- Initialize backup manager ---
|
||||||
|
var backupMgr *backup.Manager
|
||||||
|
if cfg.Backup.Enabled {
|
||||||
|
backupMgr = backup.NewManager(cfg, pinger, logger)
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Initialize scheduler ---
|
||||||
|
sched := scheduler.New(logger)
|
||||||
|
|
||||||
|
// Existing periodic tasks (migrated from ad-hoc goroutines)
|
||||||
|
sched.Every("status-refresh", 30*time.Second, func(ctx context.Context) error {
|
||||||
|
return stackMgr.RefreshStatus()
|
||||||
|
})
|
||||||
|
sched.Every("stack-scan", 2*time.Minute, func(ctx context.Context) error {
|
||||||
|
return stackMgr.ScanStacks()
|
||||||
|
})
|
||||||
|
|
||||||
|
// System health ping
|
||||||
|
healthInterval, err := time.ParseDuration(cfg.Monitoring.SystemHealthInterval)
|
||||||
|
if err != nil {
|
||||||
|
healthInterval = 5 * time.Minute
|
||||||
|
}
|
||||||
|
sched.Every("system-health", healthInterval, func(ctx context.Context) error {
|
||||||
|
report := monitor.RunHealthCheck(cfg, cpuCollector)
|
||||||
|
body := report.FormatMessage()
|
||||||
|
healthUUID := cfg.Monitoring.PingUUIDs.SystemHealth
|
||||||
|
if report.Status == "fail" {
|
||||||
|
pinger.Fail(healthUUID, body)
|
||||||
|
} else {
|
||||||
|
pinger.Ping(healthUUID, body)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
// Backup daily jobs
|
||||||
|
if cfg.Backup.Enabled && backupMgr != nil {
|
||||||
|
sched.Daily("db-dump", cfg.Backup.DBDumpSchedule, func(ctx context.Context) error {
|
||||||
|
return backupMgr.RunDBDumps(ctx)
|
||||||
|
})
|
||||||
|
sched.Daily("backup", cfg.Backup.ResticSchedule, func(ctx context.Context) error {
|
||||||
|
return backupMgr.RunBackup(ctx)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
sched.Start(ctx)
|
||||||
|
defer sched.Stop()
|
||||||
|
|
||||||
// --- Initialize API router ---
|
// --- Initialize API router ---
|
||||||
apiRouter := api.NewRouter(cfg, stackMgr, syncer, logger)
|
apiRouter := api.NewRouter(cfg, stackMgr, syncer, cpuCollector, backupMgr, logger)
|
||||||
|
|
||||||
// --- Initialize web server ---
|
// --- Initialize web server ---
|
||||||
webServer := web.NewServer(cfg, stackMgr, logger, Version)
|
webServer := web.NewServer(cfg, stackMgr, cpuCollector, backupMgr, logger, Version)
|
||||||
|
|
||||||
// --- Build HTTP mux ---
|
// --- Build HTTP mux ---
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
@@ -86,10 +149,6 @@ func main() {
|
|||||||
IdleTimeout: 120 * time.Second,
|
IdleTimeout: 120 * time.Second,
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Graceful shutdown ---
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
sigCh := make(chan os.Signal, 1)
|
sigCh := make(chan os.Signal, 1)
|
||||||
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
|
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
|
||||||
|
|
||||||
@@ -106,44 +165,6 @@ func main() {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// --- Start background tasks ---
|
|
||||||
|
|
||||||
// Periodic container status refresh (lightweight — just runs docker ps)
|
|
||||||
go func() {
|
|
||||||
ticker := time.NewTicker(30 * time.Second)
|
|
||||||
defer ticker.Stop()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case <-ticker.C:
|
|
||||||
if err := stackMgr.RefreshStatus(); err != nil {
|
|
||||||
logger.Printf("[WARN] Status refresh failed: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Periodic stack scan (discovers new/removed stacks from disk)
|
|
||||||
// Runs less frequently since it reads the filesystem.
|
|
||||||
// This allows adding new stacks without restarting the controller.
|
|
||||||
go func() {
|
|
||||||
ticker := time.NewTicker(2 * time.Minute)
|
|
||||||
defer ticker.Stop()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case <-ticker.C:
|
|
||||||
if err := stackMgr.ScanStacks(); err != nil {
|
|
||||||
logger.Printf("[WARN] Periodic stack scan failed: %v", err)
|
|
||||||
} else {
|
|
||||||
logger.Printf("[DEBUG] Periodic stack scan completed")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
logger.Printf("[INFO] Web UI listening on %s", cfg.Web.Listen)
|
logger.Printf("[INFO] Web UI listening on %s", cfg.Web.Listen)
|
||||||
if err := server.ListenAndServe(); err != http.ErrServerClosed {
|
if err := server.ListenAndServe(); err != http.ErrServerClosed {
|
||||||
logger.Fatalf("[FATAL] HTTP server error: %v", err)
|
logger.Fatalf("[FATAL] HTTP server error: %v", err)
|
||||||
@@ -161,4 +182,4 @@ func setupLogger(cfg *config.Config) *log.Logger {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return logger
|
return logger
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,6 +31,11 @@ paths:
|
|||||||
data_dir: "/opt/docker/felhom-controller/data"
|
data_dir: "/opt/docker/felhom-controller/data"
|
||||||
backup_dir: "/srv/backups"
|
backup_dir: "/srv/backups"
|
||||||
db_dump_dir: "/srv/backups/db-dumps"
|
db_dump_dir: "/srv/backups/db-dumps"
|
||||||
|
hdd_path: "" # Optional: HDD mount path (e.g., /mnt/hdd)
|
||||||
|
|
||||||
|
# --- System ---
|
||||||
|
system:
|
||||||
|
reserved_memory_mb: 384 # Memory reserved for OS (excluded from app budget)
|
||||||
|
|
||||||
# --- Web UI ---
|
# --- Web UI ---
|
||||||
web:
|
web:
|
||||||
@@ -61,7 +66,7 @@ stacks:
|
|||||||
backup:
|
backup:
|
||||||
enabled: true
|
enabled: true
|
||||||
restic_repo: "/srv/backups/restic-repo"
|
restic_repo: "/srv/backups/restic-repo"
|
||||||
restic_password_file: "/opt/docker/felhom-controller/restic-password"
|
restic_password_file: "/opt/docker/felhom-controller/data/restic-password"
|
||||||
db_dump_schedule: "02:30"
|
db_dump_schedule: "02:30"
|
||||||
restic_schedule: "03:00"
|
restic_schedule: "03:00"
|
||||||
retention:
|
retention:
|
||||||
@@ -78,6 +83,7 @@ monitoring:
|
|||||||
db_dump: "CHANGEME-uuid-for-db-dump"
|
db_dump: "CHANGEME-uuid-for-db-dump"
|
||||||
backup: "CHANGEME-uuid-for-backup"
|
backup: "CHANGEME-uuid-for-backup"
|
||||||
system_health: "CHANGEME-uuid-for-system-health"
|
system_health: "CHANGEME-uuid-for-system-health"
|
||||||
|
system_health_interval: "5m"
|
||||||
health_check_schedule: "06:00"
|
health_check_schedule: "06:00"
|
||||||
thresholds:
|
thresholds:
|
||||||
disk_warn_percent: 80
|
disk_warn_percent: 80
|
||||||
|
|||||||
@@ -11,20 +11,20 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "8080:8080"
|
- "8080:8080"
|
||||||
volumes:
|
volumes:
|
||||||
# Docker socket — required for compose operations
|
# Docker socket — required for compose operations + DB dumps (docker exec)
|
||||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
# Controller config
|
# Controller config
|
||||||
- /opt/docker/felhom-controller/controller.yaml:/opt/docker/felhom-controller/controller.yaml:ro
|
- /opt/docker/felhom-controller/controller.yaml:/opt/docker/felhom-controller/controller.yaml:ro
|
||||||
# Controller persistent data (sessions, state)
|
# Controller persistent data (sessions, restic cache, restic password)
|
||||||
- controller-data:/opt/docker/felhom-controller/data
|
- controller-data:/opt/docker/felhom-controller/data
|
||||||
# Stack compose files (read + write for git sync)
|
# Stack compose files (read + write for git sync)
|
||||||
- /opt/docker/stacks:/opt/docker/stacks
|
- /opt/docker/stacks:/opt/docker/stacks
|
||||||
# Backup directories
|
# Backup directories (restic repo + db dumps)
|
||||||
- /srv/backups:/srv/backups
|
- /srv/backups:/srv/backups
|
||||||
# Restic password file
|
# HDD mount (if available, for monitoring disk usage)
|
||||||
- /opt/docker/felhom-controller/restic-password:/opt/docker/felhom-controller/restic-password:ro
|
|
||||||
# HDD mount (if available, for backup paths)
|
|
||||||
- ${HDD_PATH:-/mnt/hdd_placeholder}:${HDD_PATH:-/mnt/hdd_placeholder}:ro
|
- ${HDD_PATH:-/mnt/hdd_placeholder}:${HDD_PATH:-/mnt/hdd_placeholder}:ro
|
||||||
|
# Host /sys — for CPU temperature reading (read-only)
|
||||||
|
- /sys:/host/sys:ro
|
||||||
environment:
|
environment:
|
||||||
- TZ=Europe/Budapest
|
- TZ=Europe/Budapest
|
||||||
labels:
|
labels:
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
@@ -8,6 +9,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
|
||||||
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
||||||
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
|
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
|
||||||
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
|
catalogsync "gitea.dooplex.hu/admin/felhom-controller/internal/sync"
|
||||||
@@ -16,14 +18,16 @@ import (
|
|||||||
|
|
||||||
// Router handles all /api/* requests.
|
// Router handles all /api/* requests.
|
||||||
type Router struct {
|
type Router struct {
|
||||||
cfg *config.Config
|
cfg *config.Config
|
||||||
stackMgr *stacks.Manager
|
stackMgr *stacks.Manager
|
||||||
syncer *catalogsync.Syncer
|
syncer *catalogsync.Syncer
|
||||||
logger *log.Logger
|
cpuCollector *system.CPUCollector
|
||||||
|
backupMgr *backup.Manager
|
||||||
|
logger *log.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewRouter(cfg *config.Config, stackMgr *stacks.Manager, syncer *catalogsync.Syncer, logger *log.Logger) *Router {
|
func NewRouter(cfg *config.Config, stackMgr *stacks.Manager, syncer *catalogsync.Syncer, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, logger *log.Logger) *Router {
|
||||||
return &Router{cfg: cfg, stackMgr: stackMgr, syncer: syncer, logger: logger}
|
return &Router{cfg: cfg, stackMgr: stackMgr, syncer: syncer, cpuCollector: cpuCollector, backupMgr: backupMgr, logger: logger}
|
||||||
}
|
}
|
||||||
|
|
||||||
type apiResponse struct {
|
type apiResponse struct {
|
||||||
@@ -99,6 +103,14 @@ func (r *Router) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
|||||||
case path == "/system/info" && req.Method == http.MethodGet:
|
case path == "/system/info" && req.Method == http.MethodGet:
|
||||||
r.systemInfo(w, req)
|
r.systemInfo(w, req)
|
||||||
|
|
||||||
|
// GET /api/backup/status
|
||||||
|
case path == "/backup/status" && req.Method == http.MethodGet:
|
||||||
|
r.backupStatus(w, req)
|
||||||
|
|
||||||
|
// POST /api/backup/run
|
||||||
|
case path == "/backup/run" && req.Method == http.MethodPost:
|
||||||
|
r.triggerBackup(w, req)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"})
|
writeJSON(w, http.StatusNotFound, apiResponse{OK: false, Error: "endpoint not found"})
|
||||||
}
|
}
|
||||||
@@ -309,7 +321,7 @@ func (r *Router) triggerSync(w http.ResponseWriter, _ *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *Router) systemInfo(w http.ResponseWriter, _ *http.Request) {
|
func (r *Router) systemInfo(w http.ResponseWriter, _ *http.Request) {
|
||||||
info := system.GetInfo(r.cfg.Paths.HDDPath)
|
info := system.GetInfo(r.cfg.Paths.HDDPath, r.cpuCollector)
|
||||||
syncStatus := r.syncer.Status()
|
syncStatus := r.syncer.Status()
|
||||||
data := map[string]interface{}{
|
data := map[string]interface{}{
|
||||||
"system": info,
|
"system": info,
|
||||||
@@ -318,6 +330,69 @@ func (r *Router) systemInfo(w http.ResponseWriter, _ *http.Request) {
|
|||||||
writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: data})
|
writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: data})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Backup handlers ---
|
||||||
|
|
||||||
|
func (r *Router) backupStatus(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
if r.backupMgr == nil {
|
||||||
|
writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: map[string]interface{}{
|
||||||
|
"enabled": false,
|
||||||
|
}})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dbDump, backupSt := r.backupMgr.GetStatus()
|
||||||
|
data := map[string]interface{}{
|
||||||
|
"enabled": true,
|
||||||
|
"running": r.backupMgr.IsRunning(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if dbDump != nil {
|
||||||
|
data["db_dump"] = map[string]interface{}{
|
||||||
|
"last_run": dbDump.LastRun,
|
||||||
|
"success": dbDump.Success,
|
||||||
|
"duration": dbDump.Duration.String(),
|
||||||
|
"count": len(dbDump.Results),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if backupSt != nil {
|
||||||
|
backupData := map[string]interface{}{
|
||||||
|
"last_run": backupSt.LastRun,
|
||||||
|
"success": backupSt.Success,
|
||||||
|
"duration": backupSt.Duration.String(),
|
||||||
|
}
|
||||||
|
if backupSt.Snapshot != nil {
|
||||||
|
backupData["snapshot_id"] = backupSt.Snapshot.SnapshotID
|
||||||
|
backupData["files_new"] = backupSt.Snapshot.FilesNew
|
||||||
|
backupData["data_added"] = backupSt.Snapshot.DataAdded
|
||||||
|
}
|
||||||
|
if backupSt.RepoStats != nil {
|
||||||
|
backupData["repo_size"] = backupSt.RepoStats.TotalSize
|
||||||
|
backupData["snapshot_count"] = backupSt.RepoStats.SnapshotCount
|
||||||
|
}
|
||||||
|
data["backup"] = backupData
|
||||||
|
}
|
||||||
|
|
||||||
|
writeJSON(w, http.StatusOK, apiResponse{OK: true, Data: data})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Router) triggerBackup(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
if r.backupMgr == nil {
|
||||||
|
writeJSON(w, http.StatusBadRequest, apiResponse{OK: false, Error: "Backup not configured"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.backupMgr.IsRunning() {
|
||||||
|
writeJSON(w, http.StatusConflict, apiResponse{OK: false, Error: "Mentés már folyamatban"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r.logger.Println("[API] Manual backup triggered")
|
||||||
|
go r.backupMgr.RunFullBackup(context.Background())
|
||||||
|
|
||||||
|
writeJSON(w, http.StatusOK, apiResponse{OK: true, Message: "Mentés elindítva"})
|
||||||
|
}
|
||||||
|
|
||||||
// --- Helpers ---
|
// --- Helpers ---
|
||||||
|
|
||||||
func hasSuffix(path, suffix string) bool { return strings.HasSuffix(path, suffix) }
|
func hasSuffix(path, suffix string) bool { return strings.HasSuffix(path, suffix) }
|
||||||
@@ -342,4 +417,4 @@ func writeJSON(w http.ResponseWriter, status int, v interface{}) {
|
|||||||
if err := json.NewEncoder(w).Encode(v); err != nil {
|
if err := json.NewEncoder(w).Encode(v); err != nil {
|
||||||
log.Printf("[ERROR] Failed to write JSON response: %v", err)
|
log.Printf("[ERROR] Failed to write JSON response: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,263 @@
|
|||||||
|
package backup
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/monitor"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Manager orchestrates database dumps and restic backups.
|
||||||
|
type Manager struct {
|
||||||
|
cfg *config.Config
|
||||||
|
restic *ResticManager
|
||||||
|
logger *log.Logger
|
||||||
|
pinger *monitor.Pinger
|
||||||
|
|
||||||
|
mu sync.Mutex
|
||||||
|
lastDBDump *DBDumpStatus
|
||||||
|
lastBackup *BackupStatus
|
||||||
|
running bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// DBDumpStatus holds the last DB dump result.
|
||||||
|
type DBDumpStatus struct {
|
||||||
|
LastRun time.Time
|
||||||
|
Results []DumpResult
|
||||||
|
Success bool
|
||||||
|
Duration time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// BackupStatus holds the last backup result.
|
||||||
|
type BackupStatus struct {
|
||||||
|
LastRun time.Time
|
||||||
|
Snapshot *SnapshotResult
|
||||||
|
Success bool
|
||||||
|
Duration time.Duration
|
||||||
|
RepoStats *RepoStats
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewManager creates a new backup manager.
|
||||||
|
func NewManager(cfg *config.Config, pinger *monitor.Pinger, logger *log.Logger) *Manager {
|
||||||
|
return &Manager{
|
||||||
|
cfg: cfg,
|
||||||
|
restic: NewResticManager(cfg, logger),
|
||||||
|
logger: logger,
|
||||||
|
pinger: pinger,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunDBDumps discovers and dumps all databases.
|
||||||
|
func (m *Manager) RunDBDumps(ctx context.Context) error {
|
||||||
|
start := time.Now()
|
||||||
|
m.logger.Printf("[INFO] Starting database dump run")
|
||||||
|
|
||||||
|
dbs, err := DiscoverDatabases(ctx, m.logger)
|
||||||
|
if err != nil {
|
||||||
|
m.logger.Printf("[ERROR] Database discovery failed: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(dbs) == 0 {
|
||||||
|
m.logger.Printf("[INFO] No database containers found")
|
||||||
|
m.mu.Lock()
|
||||||
|
m.lastDBDump = &DBDumpStatus{
|
||||||
|
LastRun: time.Now(),
|
||||||
|
Success: true,
|
||||||
|
Duration: time.Since(start),
|
||||||
|
}
|
||||||
|
m.mu.Unlock()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
m.logger.Printf("[INFO] Discovered %d database(s): %s", len(dbs), dbNames(dbs))
|
||||||
|
|
||||||
|
results := DumpAll(ctx, dbs, m.cfg.Paths.DBDumpDir, m.logger)
|
||||||
|
|
||||||
|
// Check results
|
||||||
|
allOK := true
|
||||||
|
var summary []string
|
||||||
|
var totalSize int64
|
||||||
|
for _, r := range results {
|
||||||
|
if r.Error != nil {
|
||||||
|
allOK = false
|
||||||
|
summary = append(summary, fmt.Sprintf("FAIL %s: %v", r.DB.ContainerName, r.Error))
|
||||||
|
m.logger.Printf("[ERROR] DB dump failed for %s: %v", r.DB.ContainerName, r.Error)
|
||||||
|
} else {
|
||||||
|
totalSize += r.Size
|
||||||
|
summary = append(summary, fmt.Sprintf("OK %s (%s)", r.DB.ContainerName, formatBytes(r.Size)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
duration := time.Since(start)
|
||||||
|
m.mu.Lock()
|
||||||
|
m.lastDBDump = &DBDumpStatus{
|
||||||
|
LastRun: time.Now(),
|
||||||
|
Results: results,
|
||||||
|
Success: allOK,
|
||||||
|
Duration: duration,
|
||||||
|
}
|
||||||
|
m.mu.Unlock()
|
||||||
|
|
||||||
|
// Ping healthcheck
|
||||||
|
uuid := m.cfg.Monitoring.PingUUIDs.DBDump
|
||||||
|
body := fmt.Sprintf("DB dump: %d databases, %s total\n%s",
|
||||||
|
len(results), formatBytes(totalSize), strings.Join(summary, "\n"))
|
||||||
|
|
||||||
|
if allOK {
|
||||||
|
m.pinger.Ping(uuid, body)
|
||||||
|
m.logger.Printf("[INFO] DB dump completed: %d databases, %s total (%s)",
|
||||||
|
len(results), formatBytes(totalSize), duration.Round(time.Millisecond))
|
||||||
|
} else {
|
||||||
|
m.pinger.Fail(uuid, body)
|
||||||
|
return fmt.Errorf("some database dumps failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunBackup runs a restic backup snapshot.
|
||||||
|
func (m *Manager) RunBackup(ctx context.Context) error {
|
||||||
|
start := time.Now()
|
||||||
|
m.logger.Printf("[INFO] Starting restic backup")
|
||||||
|
|
||||||
|
// Ensure repo is initialized
|
||||||
|
if err := m.restic.EnsureInitialized(); err != nil {
|
||||||
|
m.logger.Printf("[ERROR] Restic init failed: %v", err)
|
||||||
|
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Restic init failed: %v", err))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backup paths
|
||||||
|
paths := []string{
|
||||||
|
m.cfg.Paths.StacksDir,
|
||||||
|
m.cfg.Paths.DBDumpDir,
|
||||||
|
"/opt/docker/felhom-controller/controller.yaml",
|
||||||
|
}
|
||||||
|
tags := []string{"felhom", m.cfg.Customer.ID}
|
||||||
|
|
||||||
|
result, err := m.restic.Snapshot(paths, tags)
|
||||||
|
if err != nil {
|
||||||
|
m.logger.Printf("[ERROR] Restic backup failed: %v", err)
|
||||||
|
m.pinger.Fail(m.cfg.Monitoring.PingUUIDs.Backup, fmt.Sprintf("Backup failed: %v", err))
|
||||||
|
|
||||||
|
m.mu.Lock()
|
||||||
|
m.lastBackup = &BackupStatus{
|
||||||
|
LastRun: time.Now(),
|
||||||
|
Success: false,
|
||||||
|
Duration: time.Since(start),
|
||||||
|
}
|
||||||
|
m.mu.Unlock()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prune check (weekly — Sunday)
|
||||||
|
if shouldPrune(m.cfg.Backup.PruneSchedule) {
|
||||||
|
m.logger.Printf("[INFO] Running weekly prune")
|
||||||
|
if err := m.restic.Prune(m.cfg.Backup.Retention); err != nil {
|
||||||
|
m.logger.Printf("[WARN] Restic prune failed: %v", err)
|
||||||
|
}
|
||||||
|
if err := m.restic.Check(); err != nil {
|
||||||
|
m.logger.Printf("[WARN] Restic check failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get stats
|
||||||
|
stats, _ := m.restic.Stats()
|
||||||
|
|
||||||
|
duration := time.Since(start)
|
||||||
|
m.mu.Lock()
|
||||||
|
m.lastBackup = &BackupStatus{
|
||||||
|
LastRun: time.Now(),
|
||||||
|
Snapshot: result,
|
||||||
|
Success: true,
|
||||||
|
Duration: duration,
|
||||||
|
RepoStats: stats,
|
||||||
|
}
|
||||||
|
m.mu.Unlock()
|
||||||
|
|
||||||
|
body := fmt.Sprintf("Backup OK\nSnapshot: %s\nNew files: %d, Changed: %d\nData added: %s\nDuration: %s",
|
||||||
|
result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded,
|
||||||
|
duration.Round(time.Second))
|
||||||
|
m.pinger.Ping(m.cfg.Monitoring.PingUUIDs.Backup, body)
|
||||||
|
|
||||||
|
m.logger.Printf("[INFO] Restic backup completed: snapshot %s, %d new, %d changed, %s added (%s)",
|
||||||
|
result.SnapshotID, result.FilesNew, result.FilesChanged, result.DataAdded,
|
||||||
|
duration.Round(time.Millisecond))
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunFullBackup runs DB dumps followed by restic backup.
|
||||||
|
func (m *Manager) RunFullBackup(ctx context.Context) error {
|
||||||
|
m.mu.Lock()
|
||||||
|
if m.running {
|
||||||
|
m.mu.Unlock()
|
||||||
|
return fmt.Errorf("backup already in progress")
|
||||||
|
}
|
||||||
|
m.running = true
|
||||||
|
m.mu.Unlock()
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
m.mu.Lock()
|
||||||
|
m.running = false
|
||||||
|
m.mu.Unlock()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Step 1: DB dumps
|
||||||
|
if err := m.RunDBDumps(ctx); err != nil {
|
||||||
|
m.logger.Printf("[WARN] DB dump had errors, continuing with backup anyway")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Restic backup
|
||||||
|
return m.RunBackup(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetStatus returns the current backup status.
|
||||||
|
func (m *Manager) GetStatus() (*DBDumpStatus, *BackupStatus) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return m.lastDBDump, m.lastBackup
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRepoStats returns repository statistics.
|
||||||
|
func (m *Manager) GetRepoStats() (*RepoStats, error) {
|
||||||
|
return m.restic.Stats()
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsRunning returns whether a backup is currently in progress.
|
||||||
|
func (m *Manager) IsRunning() bool {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return m.running
|
||||||
|
}
|
||||||
|
|
||||||
|
func shouldPrune(schedule string) bool {
|
||||||
|
loc, err := time.LoadLocation("Europe/Budapest")
|
||||||
|
if err != nil {
|
||||||
|
loc = time.UTC
|
||||||
|
}
|
||||||
|
now := time.Now().In(loc)
|
||||||
|
|
||||||
|
switch strings.ToLower(schedule) {
|
||||||
|
case "weekly":
|
||||||
|
return now.Weekday() == time.Sunday
|
||||||
|
case "daily":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return now.Weekday() == time.Sunday
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func dbNames(dbs []DiscoveredDB) string {
|
||||||
|
var names []string
|
||||||
|
for _, db := range dbs {
|
||||||
|
names = append(names, fmt.Sprintf("%s(%s)", db.ContainerName, db.DBType))
|
||||||
|
}
|
||||||
|
return strings.Join(names, ", ")
|
||||||
|
}
|
||||||
@@ -0,0 +1,324 @@
|
|||||||
|
package backup
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DBType represents a database engine type.
|
||||||
|
type DBType string
|
||||||
|
|
||||||
|
const (
|
||||||
|
DBTypePostgres DBType = "postgres"
|
||||||
|
DBTypeMariaDB DBType = "mariadb"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DiscoveredDB holds metadata about a running database container.
|
||||||
|
type DiscoveredDB struct {
|
||||||
|
ContainerName string
|
||||||
|
ContainerID string
|
||||||
|
DBType DBType
|
||||||
|
DBUser string
|
||||||
|
DBName string
|
||||||
|
StackName string
|
||||||
|
}
|
||||||
|
|
||||||
|
// DumpResult holds the outcome of a single database dump.
|
||||||
|
type DumpResult struct {
|
||||||
|
DB DiscoveredDB
|
||||||
|
FilePath string
|
||||||
|
Size int64
|
||||||
|
Duration time.Duration
|
||||||
|
Error error
|
||||||
|
}
|
||||||
|
|
||||||
|
// DiscoverDatabases finds running database containers via docker ps.
|
||||||
|
func DiscoverDatabases(ctx context.Context, logger *log.Logger) ([]DiscoveredDB, error) {
|
||||||
|
cmd := exec.CommandContext(ctx, "docker", "ps", "--format", "{{.ID}}\t{{.Names}}\t{{.Image}}", "--filter", "status=running")
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("docker ps failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var dbs []DiscoveredDB
|
||||||
|
|
||||||
|
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(line, "\t", 3)
|
||||||
|
if len(parts) < 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
id, name, image := parts[0], parts[1], strings.ToLower(parts[2])
|
||||||
|
|
||||||
|
var dbType DBType
|
||||||
|
if strings.Contains(image, "postgres") {
|
||||||
|
dbType = DBTypePostgres
|
||||||
|
} else if strings.Contains(image, "mariadb") || strings.Contains(image, "mysql") {
|
||||||
|
dbType = DBTypeMariaDB
|
||||||
|
} else {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
db := DiscoveredDB{
|
||||||
|
ContainerID: id,
|
||||||
|
ContainerName: name,
|
||||||
|
DBType: dbType,
|
||||||
|
StackName: deriveStackName(name),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get env vars from container
|
||||||
|
if err := populateDBEnv(ctx, &db); err != nil {
|
||||||
|
logger.Printf("[WARN] Could not read env vars for %s: %v", name, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
dbs = append(dbs, db)
|
||||||
|
}
|
||||||
|
|
||||||
|
return dbs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DumpAll dumps all discovered databases.
|
||||||
|
func DumpAll(ctx context.Context, dbs []DiscoveredDB, dumpDir string, logger *log.Logger) []DumpResult {
|
||||||
|
// Clean up old .tmp files (older than 1 hour)
|
||||||
|
cleanupTmpFiles(dumpDir, logger)
|
||||||
|
|
||||||
|
var results []DumpResult
|
||||||
|
for _, db := range dbs {
|
||||||
|
result := DumpOne(ctx, db, dumpDir, logger)
|
||||||
|
results = append(results, result)
|
||||||
|
}
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
// DumpOne dumps a single database.
|
||||||
|
func DumpOne(ctx context.Context, db DiscoveredDB, dumpDir string, logger *log.Logger) DumpResult {
|
||||||
|
start := time.Now()
|
||||||
|
result := DumpResult{DB: db}
|
||||||
|
|
||||||
|
// Ensure dump directory exists
|
||||||
|
if err := os.MkdirAll(dumpDir, 0755); err != nil {
|
||||||
|
result.Error = fmt.Errorf("creating dump dir: %w", err)
|
||||||
|
result.Duration = time.Since(start)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
filename := fmt.Sprintf("%s-%s.sql", db.StackName, db.DBType)
|
||||||
|
tmpPath := filepath.Join(dumpDir, filename+".tmp")
|
||||||
|
finalPath := filepath.Join(dumpDir, filename)
|
||||||
|
|
||||||
|
// 5-minute timeout per dump
|
||||||
|
dumpCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Verify container is still running
|
||||||
|
checkCmd := exec.CommandContext(dumpCtx, "docker", "inspect", "--format", "{{.State.Running}}", db.ContainerID)
|
||||||
|
checkOut, err := checkCmd.Output()
|
||||||
|
if err != nil || strings.TrimSpace(string(checkOut)) != "true" {
|
||||||
|
result.Error = fmt.Errorf("container %s no longer running", db.ContainerName)
|
||||||
|
result.Duration = time.Since(start)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build dump command
|
||||||
|
var cmd *exec.Cmd
|
||||||
|
switch db.DBType {
|
||||||
|
case DBTypePostgres:
|
||||||
|
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
|
||||||
|
"pg_dump", "-U", db.DBUser, "-d", db.DBName,
|
||||||
|
"--clean", "--if-exists", "--no-owner", "--no-privileges")
|
||||||
|
case DBTypeMariaDB:
|
||||||
|
// Get root password from container env
|
||||||
|
password := getMariaDBPassword(dumpCtx, db.ContainerID)
|
||||||
|
if password == "" {
|
||||||
|
result.Error = fmt.Errorf("could not determine MariaDB root password for %s", db.ContainerName)
|
||||||
|
result.Duration = time.Since(start)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
cmd = exec.CommandContext(dumpCtx, "docker", "exec", db.ContainerID,
|
||||||
|
"mariadb-dump", "-u", "root", "-p"+password,
|
||||||
|
"--single-transaction", "--routines", "--triggers", db.DBName)
|
||||||
|
default:
|
||||||
|
result.Error = fmt.Errorf("unsupported DB type: %s", db.DBType)
|
||||||
|
result.Duration = time.Since(start)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write output to tmp file
|
||||||
|
tmpFile, err := os.Create(tmpPath)
|
||||||
|
if err != nil {
|
||||||
|
result.Error = fmt.Errorf("creating tmp file: %w", err)
|
||||||
|
result.Duration = time.Since(start)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd.Stdout = tmpFile
|
||||||
|
var stderr strings.Builder
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
|
||||||
|
err = cmd.Run()
|
||||||
|
tmpFile.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
os.Remove(tmpPath)
|
||||||
|
errMsg := stderr.String()
|
||||||
|
if len(errMsg) > 200 {
|
||||||
|
errMsg = errMsg[:200]
|
||||||
|
}
|
||||||
|
result.Error = fmt.Errorf("dump failed: %v — %s", err, errMsg)
|
||||||
|
result.Duration = time.Since(start)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check file size
|
||||||
|
stat, err := os.Stat(tmpPath)
|
||||||
|
if err != nil || stat.Size() == 0 {
|
||||||
|
os.Remove(tmpPath)
|
||||||
|
result.Error = fmt.Errorf("dump produced empty file for %s", db.ContainerName)
|
||||||
|
result.Duration = time.Since(start)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rename tmp to final
|
||||||
|
if err := os.Rename(tmpPath, finalPath); err != nil {
|
||||||
|
os.Remove(tmpPath)
|
||||||
|
result.Error = fmt.Errorf("renaming dump file: %w", err)
|
||||||
|
result.Duration = time.Since(start)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
result.FilePath = finalPath
|
||||||
|
result.Size = stat.Size()
|
||||||
|
result.Duration = time.Since(start)
|
||||||
|
|
||||||
|
logger.Printf("[INFO] DB dump: %s → %s (%s, %s)", db.ContainerName, filename,
|
||||||
|
formatBytes(stat.Size()), result.Duration.Round(time.Millisecond))
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func populateDBEnv(ctx context.Context, db *DiscoveredDB) error {
|
||||||
|
cmd := exec.CommandContext(ctx, "docker", "inspect", db.ContainerID,
|
||||||
|
"--format", "{{range .Config.Env}}{{println .}}{{end}}")
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
env := make(map[string]string)
|
||||||
|
for _, line := range strings.Split(string(out), "\n") {
|
||||||
|
if idx := strings.IndexByte(line, '='); idx > 0 {
|
||||||
|
env[line[:idx]] = line[idx+1:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch db.DBType {
|
||||||
|
case DBTypePostgres:
|
||||||
|
db.DBUser = env["POSTGRES_USER"]
|
||||||
|
if db.DBUser == "" {
|
||||||
|
db.DBUser = "postgres"
|
||||||
|
}
|
||||||
|
db.DBName = env["POSTGRES_DB"]
|
||||||
|
if db.DBName == "" {
|
||||||
|
db.DBName = db.DBUser
|
||||||
|
}
|
||||||
|
case DBTypeMariaDB:
|
||||||
|
db.DBName = env["MYSQL_DATABASE"]
|
||||||
|
if db.DBName == "" {
|
||||||
|
db.DBName = env["MARIADB_DATABASE"]
|
||||||
|
}
|
||||||
|
if db.DBName == "" {
|
||||||
|
db.DBName = "mysql" // fallback to dump all
|
||||||
|
}
|
||||||
|
db.DBUser = "root"
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getMariaDBPassword(ctx context.Context, containerID string) string {
|
||||||
|
cmd := exec.CommandContext(ctx, "docker", "inspect", containerID,
|
||||||
|
"--format", "{{range .Config.Env}}{{println .}}{{end}}")
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, line := range strings.Split(string(out), "\n") {
|
||||||
|
if strings.HasPrefix(line, "MYSQL_ROOT_PASSWORD=") {
|
||||||
|
return strings.TrimPrefix(line, "MYSQL_ROOT_PASSWORD=")
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(line, "MARIADB_ROOT_PASSWORD=") {
|
||||||
|
return strings.TrimPrefix(line, "MARIADB_ROOT_PASSWORD=")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// deriveStackName strips known DB suffixes from container name.
|
||||||
|
func deriveStackName(containerName string) string {
|
||||||
|
knownSuffixes := []string{"postgres", "db", "mariadb", "mysql", "database", "redis", "cache"}
|
||||||
|
|
||||||
|
parts := strings.Split(containerName, "-")
|
||||||
|
if len(parts) <= 1 {
|
||||||
|
return containerName
|
||||||
|
}
|
||||||
|
|
||||||
|
last := strings.ToLower(parts[len(parts)-1])
|
||||||
|
for _, suffix := range knownSuffixes {
|
||||||
|
if last == suffix {
|
||||||
|
return strings.Join(parts[:len(parts)-1], "-")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return containerName
|
||||||
|
}
|
||||||
|
|
||||||
|
func cleanupTmpFiles(dumpDir string, logger *log.Logger) {
|
||||||
|
entries, err := os.ReadDir(dumpDir)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cutoff := time.Now().Add(-1 * time.Hour)
|
||||||
|
for _, e := range entries {
|
||||||
|
if !strings.HasSuffix(e.Name(), ".tmp") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
info, err := e.Info()
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if info.ModTime().Before(cutoff) {
|
||||||
|
path := filepath.Join(dumpDir, e.Name())
|
||||||
|
os.Remove(path)
|
||||||
|
logger.Printf("[INFO] Cleaned up stale tmp file: %s", e.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatBytes(b int64) string {
|
||||||
|
const (
|
||||||
|
kb = 1024
|
||||||
|
mb = 1024 * kb
|
||||||
|
gb = 1024 * mb
|
||||||
|
)
|
||||||
|
switch {
|
||||||
|
case b >= gb:
|
||||||
|
return fmt.Sprintf("%.1f GB", float64(b)/float64(gb))
|
||||||
|
case b >= mb:
|
||||||
|
return fmt.Sprintf("%.1f MB", float64(b)/float64(mb))
|
||||||
|
case b >= kb:
|
||||||
|
return fmt.Sprintf("%.1f KB", float64(b)/float64(kb))
|
||||||
|
default:
|
||||||
|
return fmt.Sprintf("%d B", b)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,316 @@
|
|||||||
|
package backup
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/rand"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ResticManager handles restic backup operations.
|
||||||
|
type ResticManager struct {
|
||||||
|
repoPath string
|
||||||
|
passwordFile string
|
||||||
|
logger *log.Logger
|
||||||
|
customerID string
|
||||||
|
cacheDir string
|
||||||
|
}
|
||||||
|
|
||||||
|
// SnapshotResult holds the outcome of a restic backup.
|
||||||
|
type SnapshotResult struct {
|
||||||
|
SnapshotID string
|
||||||
|
FilesNew int
|
||||||
|
FilesChanged int
|
||||||
|
DataAdded string
|
||||||
|
Duration time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// SnapshotInfo holds information about a restic snapshot.
|
||||||
|
type SnapshotInfo struct {
|
||||||
|
ID string `json:"short_id"`
|
||||||
|
Time time.Time `json:"time"`
|
||||||
|
Paths []string `json:"paths"`
|
||||||
|
Tags []string `json:"tags"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// RepoStats holds repository statistics.
|
||||||
|
type RepoStats struct {
|
||||||
|
TotalSize string
|
||||||
|
SnapshotCount int
|
||||||
|
LatestSnapshot *SnapshotInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewResticManager creates a new restic manager.
|
||||||
|
func NewResticManager(cfg *config.Config, logger *log.Logger) *ResticManager {
|
||||||
|
return &ResticManager{
|
||||||
|
repoPath: cfg.Backup.ResticRepo,
|
||||||
|
passwordFile: cfg.Backup.ResticPasswordFile,
|
||||||
|
logger: logger,
|
||||||
|
customerID: cfg.Customer.ID,
|
||||||
|
cacheDir: filepath.Join(cfg.Paths.DataDir, "restic-cache"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnsureInitialized checks if the restic repo exists and initializes it if not.
|
||||||
|
// Also auto-generates the password file if missing.
|
||||||
|
func (r *ResticManager) EnsureInitialized() error {
|
||||||
|
// Ensure password file exists
|
||||||
|
if _, err := os.Stat(r.passwordFile); os.IsNotExist(err) {
|
||||||
|
if err := r.generatePassword(); err != nil {
|
||||||
|
return fmt.Errorf("generating restic password: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure cache dir exists
|
||||||
|
os.MkdirAll(r.cacheDir, 0700)
|
||||||
|
|
||||||
|
// Check if repo is already initialized
|
||||||
|
configPath := filepath.Join(r.repoPath, "config")
|
||||||
|
if _, err := os.Stat(configPath); err == nil {
|
||||||
|
r.logger.Printf("[INFO] Restic repo already initialized at %s", r.repoPath)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure repo directory exists
|
||||||
|
if err := os.MkdirAll(r.repoPath, 0700); err != nil {
|
||||||
|
return fmt.Errorf("creating repo dir: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize repo
|
||||||
|
r.logger.Printf("[INFO] Initializing restic repository at %s", r.repoPath)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
cmd := r.command(ctx, "init")
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("restic init failed: %v — %s", err, truncate(string(out), 200))
|
||||||
|
}
|
||||||
|
|
||||||
|
r.logger.Printf("[INFO] Restic repository initialized successfully")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Snapshot creates a new backup snapshot of the given paths.
|
||||||
|
func (r *ResticManager) Snapshot(paths []string, tags []string) (*SnapshotResult, error) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
args := []string{"backup", "--json"}
|
||||||
|
for _, tag := range tags {
|
||||||
|
args = append(args, "--tag", tag)
|
||||||
|
}
|
||||||
|
args = append(args, "--host", r.customerID)
|
||||||
|
|
||||||
|
// Only include paths that exist
|
||||||
|
var existingPaths []string
|
||||||
|
for _, p := range paths {
|
||||||
|
if _, err := os.Stat(p); err == nil {
|
||||||
|
existingPaths = append(existingPaths, p)
|
||||||
|
} else {
|
||||||
|
r.logger.Printf("[WARN] Backup path does not exist, skipping: %s", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(existingPaths) == 0 {
|
||||||
|
return nil, fmt.Errorf("no backup paths exist")
|
||||||
|
}
|
||||||
|
args = append(args, existingPaths...)
|
||||||
|
|
||||||
|
cmd := r.command(ctx, args...)
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
// Check for stale lock
|
||||||
|
errStr := string(out)
|
||||||
|
if strings.Contains(errStr, "lock") || strings.Contains(errStr, "locked") {
|
||||||
|
r.logger.Printf("[WARN] Restic repo locked — attempting unlock")
|
||||||
|
unlockCmd := r.command(ctx, "unlock")
|
||||||
|
unlockCmd.Run()
|
||||||
|
// Retry once
|
||||||
|
cmd = r.command(ctx, args...)
|
||||||
|
out, err = cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("restic backup failed after unlock: %v", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return nil, fmt.Errorf("restic backup failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &SnapshotResult{
|
||||||
|
Duration: time.Since(start),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse JSON output — look for the summary line
|
||||||
|
for _, line := range strings.Split(string(out), "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var msg struct {
|
||||||
|
MessageType string `json:"message_type"`
|
||||||
|
FilesNew int `json:"files_new"`
|
||||||
|
FilesChanged int `json:"files_changed"`
|
||||||
|
DataAdded int64 `json:"data_added"`
|
||||||
|
SnapshotID string `json:"snapshot_id"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal([]byte(line), &msg); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if msg.MessageType == "summary" {
|
||||||
|
result.SnapshotID = msg.SnapshotID
|
||||||
|
result.FilesNew = msg.FilesNew
|
||||||
|
result.FilesChanged = msg.FilesChanged
|
||||||
|
result.DataAdded = formatBytes(msg.DataAdded)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prune removes old snapshots according to retention policy.
|
||||||
|
func (r *ResticManager) Prune(retention config.RetentionConfig) error {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
args := []string{
|
||||||
|
"forget",
|
||||||
|
"--keep-daily", fmt.Sprintf("%d", retention.KeepDaily),
|
||||||
|
"--keep-weekly", fmt.Sprintf("%d", retention.KeepWeekly),
|
||||||
|
"--keep-monthly", fmt.Sprintf("%d", retention.KeepMonthly),
|
||||||
|
"--prune",
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := r.command(ctx, args...)
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("restic forget/prune failed: %v — %s", err, truncate(string(out), 200))
|
||||||
|
}
|
||||||
|
|
||||||
|
r.logger.Printf("[INFO] Restic prune completed")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check verifies repository integrity.
|
||||||
|
func (r *ResticManager) Check() error {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
cmd := r.command(ctx, "check")
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("restic check failed: %v — %s", err, truncate(string(out), 200))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// LatestSnapshot returns the most recent snapshot info.
|
||||||
|
func (r *ResticManager) LatestSnapshot() (*SnapshotInfo, error) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
cmd := r.command(ctx, "snapshots", "--latest", "1", "--json")
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("restic snapshots failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var snapshots []SnapshotInfo
|
||||||
|
if err := json.Unmarshal(out, &snapshots); err != nil {
|
||||||
|
return nil, fmt.Errorf("parsing snapshot JSON: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(snapshots) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &snapshots[0], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stats returns repository statistics.
|
||||||
|
func (r *ResticManager) Stats() (*RepoStats, error) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
stats := &RepoStats{}
|
||||||
|
|
||||||
|
// Get repo size
|
||||||
|
cmd := r.command(ctx, "stats", "--json")
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err == nil {
|
||||||
|
var raw struct {
|
||||||
|
TotalSize uint64 `json:"total_size"`
|
||||||
|
}
|
||||||
|
if json.Unmarshal(out, &raw) == nil {
|
||||||
|
stats.TotalSize = formatBytes(int64(raw.TotalSize))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count snapshots
|
||||||
|
cmd = r.command(ctx, "snapshots", "--json")
|
||||||
|
out, err = cmd.Output()
|
||||||
|
if err == nil {
|
||||||
|
var snapshots []SnapshotInfo
|
||||||
|
if json.Unmarshal(out, &snapshots) == nil {
|
||||||
|
stats.SnapshotCount = len(snapshots)
|
||||||
|
if len(snapshots) > 0 {
|
||||||
|
latest := snapshots[len(snapshots)-1]
|
||||||
|
stats.LatestSnapshot = &latest
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ResticManager) command(ctx context.Context, args ...string) *exec.Cmd {
|
||||||
|
cmd := exec.CommandContext(ctx, "restic", args...)
|
||||||
|
cmd.Env = append(os.Environ(),
|
||||||
|
"RESTIC_REPOSITORY="+r.repoPath,
|
||||||
|
"RESTIC_PASSWORD_FILE="+r.passwordFile,
|
||||||
|
"RESTIC_CACHE_DIR="+r.cacheDir,
|
||||||
|
)
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ResticManager) generatePassword() error {
|
||||||
|
// Ensure directory exists
|
||||||
|
dir := filepath.Dir(r.passwordFile)
|
||||||
|
if err := os.MkdirAll(dir, 0700); err != nil {
|
||||||
|
return fmt.Errorf("creating password dir: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate 32 random bytes, base64url-encode
|
||||||
|
b := make([]byte, 32)
|
||||||
|
if _, err := rand.Read(b); err != nil {
|
||||||
|
return fmt.Errorf("generating random bytes: %w", err)
|
||||||
|
}
|
||||||
|
password := base64.URLEncoding.EncodeToString(b)
|
||||||
|
|
||||||
|
if err := os.WriteFile(r.passwordFile, []byte(password), 0600); err != nil {
|
||||||
|
return fmt.Errorf("writing password file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
r.logger.Printf("[INFO] Generated new restic repository password at %s", r.passwordFile)
|
||||||
|
r.logger.Printf("[WARN] Save this password externally — losing it means losing access to ALL backups")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func truncate(s string, maxLen int) string {
|
||||||
|
if len(s) <= maxLen {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return s[:maxLen] + "..."
|
||||||
|
}
|
||||||
@@ -89,11 +89,12 @@ type RetentionConfig struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type MonitoringConfig struct {
|
type MonitoringConfig struct {
|
||||||
Enabled bool `yaml:"enabled"`
|
Enabled bool `yaml:"enabled"`
|
||||||
HealthchecksBase string `yaml:"healthchecks_base"`
|
HealthchecksBase string `yaml:"healthchecks_base"`
|
||||||
PingUUIDs PingUUIDsConfig `yaml:"ping_uuids"`
|
PingUUIDs PingUUIDsConfig `yaml:"ping_uuids"`
|
||||||
HealthCheckSchedule string `yaml:"health_check_schedule"`
|
HealthCheckSchedule string `yaml:"health_check_schedule"`
|
||||||
Thresholds ThresholdsConfig `yaml:"thresholds"`
|
SystemHealthInterval string `yaml:"system_health_interval"`
|
||||||
|
Thresholds ThresholdsConfig `yaml:"thresholds"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type PingUUIDsConfig struct {
|
type PingUUIDsConfig struct {
|
||||||
@@ -187,8 +188,10 @@ func applyDefaults(cfg *Config) {
|
|||||||
di(&cfg.Backup.Retention.KeepDaily, 7)
|
di(&cfg.Backup.Retention.KeepDaily, 7)
|
||||||
di(&cfg.Backup.Retention.KeepWeekly, 4)
|
di(&cfg.Backup.Retention.KeepWeekly, 4)
|
||||||
di(&cfg.Backup.Retention.KeepMonthly, 6)
|
di(&cfg.Backup.Retention.KeepMonthly, 6)
|
||||||
|
d(&cfg.Backup.ResticPasswordFile, "/opt/docker/felhom-controller/data/restic-password")
|
||||||
d(&cfg.Monitoring.HealthchecksBase, "https://status.felhom.eu")
|
d(&cfg.Monitoring.HealthchecksBase, "https://status.felhom.eu")
|
||||||
d(&cfg.Monitoring.HealthCheckSchedule, "06:00")
|
d(&cfg.Monitoring.HealthCheckSchedule, "06:00")
|
||||||
|
d(&cfg.Monitoring.SystemHealthInterval, "5m")
|
||||||
di(&cfg.Monitoring.Thresholds.DiskWarnPercent, 80)
|
di(&cfg.Monitoring.Thresholds.DiskWarnPercent, 80)
|
||||||
di(&cfg.Monitoring.Thresholds.DiskCritPercent, 90)
|
di(&cfg.Monitoring.Thresholds.DiskCritPercent, 90)
|
||||||
di(&cfg.Monitoring.Thresholds.BackupMaxAgeHours, 36)
|
di(&cfg.Monitoring.Thresholds.BackupMaxAgeHours, 36)
|
||||||
@@ -217,6 +220,7 @@ func applyEnvOverrides(cfg *Config) {
|
|||||||
envStr("FELHOM_PATHS_STACKS_DIR", &cfg.Paths.StacksDir)
|
envStr("FELHOM_PATHS_STACKS_DIR", &cfg.Paths.StacksDir)
|
||||||
envStr("FELHOM_PATHS_HDD_PATH", &cfg.Paths.HDDPath)
|
envStr("FELHOM_PATHS_HDD_PATH", &cfg.Paths.HDDPath)
|
||||||
envStr("FELHOM_LOGGING_LEVEL", &cfg.Logging.Level)
|
envStr("FELHOM_LOGGING_LEVEL", &cfg.Logging.Level)
|
||||||
|
envStr("FELHOM_MONITORING_SYSTEM_HEALTH_INTERVAL", &cfg.Monitoring.SystemHealthInterval)
|
||||||
}
|
}
|
||||||
|
|
||||||
func validate(cfg *Config) error {
|
func validate(cfg *Config) error {
|
||||||
|
|||||||
@@ -0,0 +1,160 @@
|
|||||||
|
package monitor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
||||||
|
)
|
||||||
|
|
||||||
|
// HealthReport contains the results of a system health check.
|
||||||
|
type HealthReport struct {
|
||||||
|
Status string // "ok", "warn", "fail"
|
||||||
|
Issues []string // critical problems
|
||||||
|
Warnings []string // non-critical warnings
|
||||||
|
Info []string // informational items
|
||||||
|
Timestamp time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunHealthCheck runs system checks and returns a diagnostic report.
|
||||||
|
func RunHealthCheck(cfg *config.Config, cpuCollector *system.CPUCollector) *HealthReport {
|
||||||
|
report := &HealthReport{
|
||||||
|
Status: "ok",
|
||||||
|
Timestamp: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
sysInfo := system.GetInfo(cfg.Paths.HDDPath, cpuCollector)
|
||||||
|
|
||||||
|
// 1. Disk usage (SSD)
|
||||||
|
if sysInfo.DiskPercent > 0 {
|
||||||
|
if sysInfo.DiskPercent >= float64(cfg.Monitoring.Thresholds.DiskCritPercent) {
|
||||||
|
report.Issues = append(report.Issues, fmt.Sprintf("SSD disk usage critical: %.0f%%", sysInfo.DiskPercent))
|
||||||
|
} else if sysInfo.DiskPercent >= float64(cfg.Monitoring.Thresholds.DiskWarnPercent) {
|
||||||
|
report.Warnings = append(report.Warnings, fmt.Sprintf("SSD disk usage high: %.0f%%", sysInfo.DiskPercent))
|
||||||
|
} else {
|
||||||
|
report.Info = append(report.Info, fmt.Sprintf("SSD: %.0f%% used", sysInfo.DiskPercent))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// HDD disk usage
|
||||||
|
if sysInfo.HDDConfigured && sysInfo.HDDPercent > 0 {
|
||||||
|
if sysInfo.HDDPercent >= float64(cfg.Monitoring.Thresholds.DiskCritPercent) {
|
||||||
|
report.Issues = append(report.Issues, fmt.Sprintf("HDD disk usage critical: %.0f%%", sysInfo.HDDPercent))
|
||||||
|
} else if sysInfo.HDDPercent >= float64(cfg.Monitoring.Thresholds.DiskWarnPercent) {
|
||||||
|
report.Warnings = append(report.Warnings, fmt.Sprintf("HDD disk usage high: %.0f%%", sysInfo.HDDPercent))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Memory usage
|
||||||
|
if sysInfo.MemPercent > 0 {
|
||||||
|
if sysInfo.MemPercent >= float64(cfg.Monitoring.Thresholds.MemoryWarnPercent) {
|
||||||
|
report.Warnings = append(report.Warnings, fmt.Sprintf("Memory usage high: %.0f%%", sysInfo.MemPercent))
|
||||||
|
} else {
|
||||||
|
report.Info = append(report.Info, fmt.Sprintf("Memory: %.0f%% used", sysInfo.MemPercent))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. CPU usage
|
||||||
|
if sysInfo.CPUPercent > 0 {
|
||||||
|
if sysInfo.CPUPercent >= float64(cfg.Monitoring.Thresholds.CPUWarnPercent) {
|
||||||
|
report.Warnings = append(report.Warnings, fmt.Sprintf("CPU usage high: %.0f%%", sysInfo.CPUPercent))
|
||||||
|
} else {
|
||||||
|
report.Info = append(report.Info, fmt.Sprintf("CPU: %.0f%%", sysInfo.CPUPercent))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Temperature
|
||||||
|
if sysInfo.TemperatureCelsius > 0 {
|
||||||
|
if sysInfo.TemperatureCelsius >= float64(cfg.Monitoring.Thresholds.TemperatureWarnCelsius) {
|
||||||
|
report.Warnings = append(report.Warnings, fmt.Sprintf("Temperature high: %.0f°C (%s)", sysInfo.TemperatureCelsius, sysInfo.TemperatureSource))
|
||||||
|
} else {
|
||||||
|
report.Info = append(report.Info, fmt.Sprintf("Temperature: %.0f°C", sysInfo.TemperatureCelsius))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Docker health
|
||||||
|
if err := checkDocker(); err != nil {
|
||||||
|
report.Issues = append(report.Issues, fmt.Sprintf("Docker: %v", err))
|
||||||
|
} else {
|
||||||
|
report.Info = append(report.Info, "Docker: reachable")
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Protected containers
|
||||||
|
missingProtected := checkProtectedContainers(cfg.Stacks.Protected)
|
||||||
|
for _, name := range missingProtected {
|
||||||
|
report.Issues = append(report.Issues, fmt.Sprintf("Protected container not running: %s", name))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine status
|
||||||
|
if len(report.Issues) > 0 {
|
||||||
|
report.Status = "fail"
|
||||||
|
} else if len(report.Warnings) > 0 {
|
||||||
|
report.Status = "warn"
|
||||||
|
}
|
||||||
|
|
||||||
|
return report
|
||||||
|
}
|
||||||
|
|
||||||
|
// FormatMessage returns a human-readable summary for healthcheck ping body.
|
||||||
|
func (r *HealthReport) FormatMessage() string {
|
||||||
|
var sb strings.Builder
|
||||||
|
|
||||||
|
sb.WriteString(fmt.Sprintf("Status: %s\n", strings.ToUpper(r.Status)))
|
||||||
|
sb.WriteString(fmt.Sprintf("Time: %s\n\n", r.Timestamp.Format("2006-01-02 15:04:05")))
|
||||||
|
|
||||||
|
if len(r.Issues) > 0 {
|
||||||
|
sb.WriteString("ISSUES:\n")
|
||||||
|
for _, issue := range r.Issues {
|
||||||
|
sb.WriteString(" - " + issue + "\n")
|
||||||
|
}
|
||||||
|
sb.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(r.Warnings) > 0 {
|
||||||
|
sb.WriteString("WARNINGS:\n")
|
||||||
|
for _, w := range r.Warnings {
|
||||||
|
sb.WriteString(" - " + w + "\n")
|
||||||
|
}
|
||||||
|
sb.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(r.Info) > 0 {
|
||||||
|
sb.WriteString("INFO:\n")
|
||||||
|
for _, info := range r.Info {
|
||||||
|
sb.WriteString(" - " + info + "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkDocker() error {
|
||||||
|
cmd := exec.Command("docker", "info", "--format", "{{.ServerVersion}}")
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("docker not reachable: %v", err)
|
||||||
|
}
|
||||||
|
if len(strings.TrimSpace(string(out))) == 0 {
|
||||||
|
return fmt.Errorf("docker returned empty version")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkProtectedContainers(protected []string) []string {
|
||||||
|
var missing []string
|
||||||
|
for _, name := range protected {
|
||||||
|
cmd := exec.Command("docker", "inspect", "--format", "{{.State.Running}}", name)
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
missing = append(missing, name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(string(out)) != "true" {
|
||||||
|
missing = append(missing, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return missing
|
||||||
|
}
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
package monitor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Pinger sends health check pings to a Healthchecks.io-compatible server.
|
||||||
|
type Pinger struct {
|
||||||
|
baseURL string
|
||||||
|
httpClient *http.Client
|
||||||
|
logger *log.Logger
|
||||||
|
enabled bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewPinger creates a new Pinger from monitoring config.
|
||||||
|
func NewPinger(cfg *config.MonitoringConfig, logger *log.Logger) *Pinger {
|
||||||
|
return &Pinger{
|
||||||
|
baseURL: strings.TrimRight(cfg.HealthchecksBase, "/"),
|
||||||
|
httpClient: &http.Client{
|
||||||
|
Timeout: 10 * time.Second,
|
||||||
|
},
|
||||||
|
logger: logger,
|
||||||
|
enabled: cfg.Enabled,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ping sends a success signal with optional diagnostic body.
|
||||||
|
func (p *Pinger) Ping(uuid string, body string) error {
|
||||||
|
return p.send(uuid, "", body)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fail sends a failure signal with diagnostic body.
|
||||||
|
func (p *Pinger) Fail(uuid string, body string) error {
|
||||||
|
return p.send(uuid, "/fail", body)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start sends a "job started" signal (for duration tracking).
|
||||||
|
func (p *Pinger) Start(uuid string) error {
|
||||||
|
return p.send(uuid, "/start", "")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Pinger) send(uuid, suffix, body string) error {
|
||||||
|
if !p.enabled {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if uuid == "" || strings.HasPrefix(uuid, "CHANGEME") {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
url := fmt.Sprintf("%s/ping/%s%s", p.baseURL, uuid, suffix)
|
||||||
|
|
||||||
|
var lastErr error
|
||||||
|
for attempt := 0; attempt < 3; attempt++ {
|
||||||
|
if attempt > 0 {
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
var bodyReader io.Reader
|
||||||
|
if body != "" {
|
||||||
|
bodyReader = strings.NewReader(body)
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequest(http.MethodPost, url, bodyReader)
|
||||||
|
if err != nil {
|
||||||
|
lastErr = err
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := p.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
lastErr = err
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
lastErr = fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
p.logger.Printf("[WARN] Health ping failed after 3 attempts (%s): %v", uuid, lastErr)
|
||||||
|
return nil // Never let ping failures affect the caller
|
||||||
|
}
|
||||||
@@ -0,0 +1,251 @@
|
|||||||
|
package scheduler
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// JobFunc is the function signature for scheduler jobs.
|
||||||
|
type JobFunc func(ctx context.Context) error
|
||||||
|
|
||||||
|
// Job represents a scheduled task.
|
||||||
|
type Job struct {
|
||||||
|
Name string
|
||||||
|
Fn JobFunc
|
||||||
|
Interval time.Duration // for periodic jobs (every N)
|
||||||
|
Schedule string // for daily jobs ("02:30", "03:00") — mutually exclusive with Interval
|
||||||
|
LastRun time.Time
|
||||||
|
LastErr error
|
||||||
|
Running bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scheduler manages periodic and daily jobs.
|
||||||
|
type Scheduler struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
jobs []*Job
|
||||||
|
logger *log.Logger
|
||||||
|
ctx context.Context
|
||||||
|
cancel context.CancelFunc
|
||||||
|
wg sync.WaitGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates a new Scheduler.
|
||||||
|
func New(logger *log.Logger) *Scheduler {
|
||||||
|
return &Scheduler{
|
||||||
|
logger: logger,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Every registers a periodic job that runs every interval.
|
||||||
|
func (s *Scheduler) Every(name string, interval time.Duration, fn JobFunc) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
s.jobs = append(s.jobs, &Job{
|
||||||
|
Name: name,
|
||||||
|
Fn: fn,
|
||||||
|
Interval: interval,
|
||||||
|
})
|
||||||
|
s.logger.Printf("[SCHED] Registered periodic job: %s (every %s)", name, interval)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Daily registers a job that runs once per day at the specified time (HH:MM) in Europe/Budapest timezone.
|
||||||
|
func (s *Scheduler) Daily(name string, timeStr string, fn JobFunc) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
// Validate time format
|
||||||
|
if _, _, err := parseDailyTime(timeStr); err != nil {
|
||||||
|
s.logger.Printf("[ERROR] Daily job %s has invalid schedule %q: %v — job not started", name, timeStr, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
s.jobs = append(s.jobs, &Job{
|
||||||
|
Name: name,
|
||||||
|
Fn: fn,
|
||||||
|
Schedule: timeStr,
|
||||||
|
})
|
||||||
|
|
||||||
|
nextRun := nextDailyRun(timeStr)
|
||||||
|
s.logger.Printf("[SCHED] Daily job %s scheduled for %s", name, nextRun.Format("2006-01-02 15:04 MST"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start begins running all registered jobs.
|
||||||
|
func (s *Scheduler) Start(ctx context.Context) {
|
||||||
|
s.ctx, s.cancel = context.WithCancel(ctx)
|
||||||
|
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
for _, job := range s.jobs {
|
||||||
|
if job.Interval > 0 {
|
||||||
|
s.wg.Add(1)
|
||||||
|
go s.runPeriodicJob(job)
|
||||||
|
} else if job.Schedule != "" {
|
||||||
|
s.wg.Add(1)
|
||||||
|
go s.runDailyJob(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.logger.Printf("[SCHED] Scheduler started with %d jobs", len(s.jobs))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop cancels all jobs and waits for them to finish (30s timeout).
|
||||||
|
func (s *Scheduler) Stop() {
|
||||||
|
if s.cancel != nil {
|
||||||
|
s.cancel()
|
||||||
|
}
|
||||||
|
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
s.wg.Wait()
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
s.logger.Println("[SCHED] All jobs stopped")
|
||||||
|
case <-time.After(30 * time.Second):
|
||||||
|
s.logger.Println("[WARN] Scheduler stop timed out after 30s — some jobs may still be running")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetJobs returns a snapshot of all jobs (copies, not pointers).
|
||||||
|
func (s *Scheduler) GetJobs() []Job {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
result := make([]Job, len(s.jobs))
|
||||||
|
for i, j := range s.jobs {
|
||||||
|
result[i] = *j
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Scheduler) runPeriodicJob(job *Job) {
|
||||||
|
defer s.wg.Done()
|
||||||
|
|
||||||
|
// Quiet mode: jobs with interval <= 30s only log failures
|
||||||
|
quiet := job.Interval <= 30*time.Second
|
||||||
|
|
||||||
|
ticker := time.NewTicker(job.Interval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
s.executeJob(job, quiet)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Scheduler) runDailyJob(job *Job) {
|
||||||
|
defer s.wg.Done()
|
||||||
|
|
||||||
|
for {
|
||||||
|
nextRun := nextDailyRun(job.Schedule)
|
||||||
|
waitDuration := time.Until(nextRun)
|
||||||
|
|
||||||
|
if waitDuration < 0 {
|
||||||
|
waitDuration = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
timer := time.NewTimer(waitDuration)
|
||||||
|
select {
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
timer.Stop()
|
||||||
|
return
|
||||||
|
case <-timer.C:
|
||||||
|
s.executeJob(job, false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Scheduler) executeJob(job *Job, quiet bool) {
|
||||||
|
s.mu.Lock()
|
||||||
|
if job.Running {
|
||||||
|
s.mu.Unlock()
|
||||||
|
s.logger.Printf("[WARN] Job %s still running, skipping", job.Name)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
job.Running = true
|
||||||
|
s.mu.Unlock()
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
s.mu.Lock()
|
||||||
|
job.Running = false
|
||||||
|
s.mu.Unlock()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Panic recovery
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
s.mu.Lock()
|
||||||
|
job.LastErr = fmt.Errorf("panic: %v", r)
|
||||||
|
s.mu.Unlock()
|
||||||
|
s.logger.Printf("[ERROR] Job %s panicked: %v", job.Name, r)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if !quiet {
|
||||||
|
s.logger.Printf("[SCHED] Running job: %s", job.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
err := job.Fn(s.ctx)
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
|
||||||
|
s.mu.Lock()
|
||||||
|
job.LastRun = time.Now()
|
||||||
|
job.LastErr = err
|
||||||
|
s.mu.Unlock()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
s.logger.Printf("[WARN] Job %s failed: %v (took %s)", job.Name, err, elapsed.Round(time.Millisecond))
|
||||||
|
} else if !quiet {
|
||||||
|
s.logger.Printf("[SCHED] Job %s completed (took %s)", job.Name, elapsed.Round(time.Millisecond))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseDailyTime parses "HH:MM" and returns hour and minute.
|
||||||
|
func parseDailyTime(timeStr string) (int, int, error) {
|
||||||
|
var hour, min int
|
||||||
|
n, err := fmt.Sscanf(timeStr, "%d:%d", &hour, &min)
|
||||||
|
if err != nil || n != 2 {
|
||||||
|
return 0, 0, fmt.Errorf("expected HH:MM format, got %q", timeStr)
|
||||||
|
}
|
||||||
|
if hour < 0 || hour > 23 || min < 0 || min > 59 {
|
||||||
|
return 0, 0, fmt.Errorf("invalid time %q: hour must be 0-23, minute 0-59", timeStr)
|
||||||
|
}
|
||||||
|
return hour, min, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// nextDailyRun calculates the next occurrence of the daily schedule in Europe/Budapest timezone.
|
||||||
|
func nextDailyRun(timeStr string) time.Time {
|
||||||
|
hour, min, err := parseDailyTime(timeStr)
|
||||||
|
if err != nil {
|
||||||
|
// Should not happen — validated at registration
|
||||||
|
return time.Now().Add(24 * time.Hour)
|
||||||
|
}
|
||||||
|
|
||||||
|
loc, err := time.LoadLocation("Europe/Budapest")
|
||||||
|
if err != nil {
|
||||||
|
// Fallback to UTC if timezone not available
|
||||||
|
loc = time.UTC
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now().In(loc)
|
||||||
|
next := time.Date(now.Year(), now.Month(), now.Day(), hour, min, 0, 0, loc)
|
||||||
|
|
||||||
|
// If the time has already passed today, schedule for tomorrow
|
||||||
|
if !next.After(now) {
|
||||||
|
next = next.Add(24 * time.Hour)
|
||||||
|
}
|
||||||
|
|
||||||
|
return next
|
||||||
|
}
|
||||||
@@ -0,0 +1,134 @@
|
|||||||
|
//go:build linux
|
||||||
|
|
||||||
|
package system
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CPUCollector samples CPU usage in the background by reading /proc/stat.
|
||||||
|
type CPUCollector struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
cpuPercent float64
|
||||||
|
sampleRate time.Duration
|
||||||
|
cancel context.CancelFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCPUCollector creates a new CPU collector with the given sample rate.
|
||||||
|
func NewCPUCollector(sampleRate time.Duration) *CPUCollector {
|
||||||
|
return &CPUCollector{
|
||||||
|
sampleRate: sampleRate,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start begins background CPU sampling.
|
||||||
|
func (c *CPUCollector) Start(ctx context.Context) {
|
||||||
|
ctx, c.cancel = context.WithCancel(ctx)
|
||||||
|
go c.loop(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop stops the background CPU sampling.
|
||||||
|
func (c *CPUCollector) Stop() {
|
||||||
|
if c.cancel != nil {
|
||||||
|
c.cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CPUPercent returns the latest CPU usage percentage (0-100).
|
||||||
|
func (c *CPUCollector) CPUPercent() float64 {
|
||||||
|
c.mu.RLock()
|
||||||
|
defer c.mu.RUnlock()
|
||||||
|
return c.cpuPercent
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *CPUCollector) loop(ctx context.Context) {
|
||||||
|
for {
|
||||||
|
// Read first sample
|
||||||
|
idle1, total1, err := readCPUStat()
|
||||||
|
if err != nil {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-time.After(c.sampleRate):
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for sample interval
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-time.After(c.sampleRate):
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read second sample
|
||||||
|
idle2, total2, err := readCPUStat()
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
totalDelta := total2 - total1
|
||||||
|
idleDelta := idle2 - idle1
|
||||||
|
|
||||||
|
if totalDelta > 0 {
|
||||||
|
busyDelta := totalDelta - idleDelta
|
||||||
|
percent := float64(busyDelta) / float64(totalDelta) * 100
|
||||||
|
c.mu.Lock()
|
||||||
|
c.cpuPercent = percent
|
||||||
|
c.mu.Unlock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// readCPUStat reads /proc/stat and returns idle and total CPU jiffies.
|
||||||
|
// First line format: cpu <user> <nice> <system> <idle> <iowait> <irq> <softirq> <steal>
|
||||||
|
func readCPUStat() (idle, total uint64, err error) {
|
||||||
|
f, err := os.Open("/proc/stat")
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
if !scanner.Scan() {
|
||||||
|
return 0, 0, fmt.Errorf("empty /proc/stat")
|
||||||
|
}
|
||||||
|
|
||||||
|
line := scanner.Text()
|
||||||
|
if !strings.HasPrefix(line, "cpu ") {
|
||||||
|
return 0, 0, fmt.Errorf("unexpected /proc/stat first line: %s", line)
|
||||||
|
}
|
||||||
|
|
||||||
|
fields := strings.Fields(line)
|
||||||
|
if len(fields) < 9 {
|
||||||
|
return 0, 0, fmt.Errorf("/proc/stat has too few fields: %d", len(fields))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fields: cpu user(1) nice(2) system(3) idle(4) iowait(5) irq(6) softirq(7) steal(8)
|
||||||
|
var values [8]uint64
|
||||||
|
for i := 0; i < 8; i++ {
|
||||||
|
var v uint64
|
||||||
|
for _, c := range fields[i+1] {
|
||||||
|
if c >= '0' && c <= '9' {
|
||||||
|
v = v*10 + uint64(c-'0')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
values[i] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
// idle_total = idle + iowait
|
||||||
|
idleTotal := values[3] + values[4]
|
||||||
|
// total = sum of all
|
||||||
|
var totalVal uint64
|
||||||
|
for _, v := range values {
|
||||||
|
totalVal += v
|
||||||
|
}
|
||||||
|
|
||||||
|
return idleTotal, totalVal, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
//go:build !linux
|
||||||
|
|
||||||
|
package system
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CPUCollector is a no-op on non-Linux platforms.
|
||||||
|
type CPUCollector struct{}
|
||||||
|
|
||||||
|
// NewCPUCollector creates a no-op CPU collector on non-Linux platforms.
|
||||||
|
func NewCPUCollector(_ time.Duration) *CPUCollector {
|
||||||
|
return &CPUCollector{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start is a no-op on non-Linux platforms.
|
||||||
|
func (c *CPUCollector) Start(_ context.Context) {}
|
||||||
|
|
||||||
|
// Stop is a no-op on non-Linux platforms.
|
||||||
|
func (c *CPUCollector) Stop() {}
|
||||||
|
|
||||||
|
// CPUPercent always returns 0 on non-Linux platforms.
|
||||||
|
func (c *CPUCollector) CPUPercent() float64 { return 0 }
|
||||||
@@ -17,4 +17,11 @@ type SystemInfo struct {
|
|||||||
HDDAvailGB float64 `json:"hdd_avail_gb,omitempty"`
|
HDDAvailGB float64 `json:"hdd_avail_gb,omitempty"`
|
||||||
HDDPercent float64 `json:"hdd_percent,omitempty"`
|
HDDPercent float64 `json:"hdd_percent,omitempty"`
|
||||||
HDDConfigured bool `json:"hdd_configured"`
|
HDDConfigured bool `json:"hdd_configured"`
|
||||||
|
|
||||||
|
CPUPercent float64 `json:"cpu_percent"`
|
||||||
|
LoadAvg1 float64 `json:"load_avg_1"`
|
||||||
|
LoadAvg5 float64 `json:"load_avg_5"`
|
||||||
|
LoadAvg15 float64 `json:"load_avg_15"`
|
||||||
|
TemperatureCelsius float64 `json:"temperature_celsius"`
|
||||||
|
TemperatureSource string `json:"temperature_source,omitempty"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,13 +6,16 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
)
|
)
|
||||||
|
|
||||||
// GetInfo reads system memory and disk usage.
|
// GetInfo reads system memory, disk, CPU, load, and temperature info.
|
||||||
// hddPath is the mount path for external HDD; if empty, HDD info is skipped.
|
// hddPath is the mount path for external HDD; if empty, HDD info is skipped.
|
||||||
func GetInfo(hddPath string) SystemInfo {
|
// cpuCollector provides the latest CPU usage sample; may be nil.
|
||||||
|
func GetInfo(hddPath string, cpuCollector *CPUCollector) SystemInfo {
|
||||||
info := SystemInfo{}
|
info := SystemInfo{}
|
||||||
|
|
||||||
// --- Memory from /proc/meminfo ---
|
// --- Memory from /proc/meminfo ---
|
||||||
@@ -27,6 +30,17 @@ func GetInfo(hddPath string) SystemInfo {
|
|||||||
readDiskUsage(hddPath, &info.HDDTotalGB, &info.HDDUsedGB, &info.HDDAvailGB, &info.HDDPercent)
|
readDiskUsage(hddPath, &info.HDDTotalGB, &info.HDDUsedGB, &info.HDDAvailGB, &info.HDDPercent)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Load average ---
|
||||||
|
readLoadAvg(&info)
|
||||||
|
|
||||||
|
// --- Temperature ---
|
||||||
|
readTemperature(&info)
|
||||||
|
|
||||||
|
// --- CPU from collector ---
|
||||||
|
if cpuCollector != nil {
|
||||||
|
info.CPUPercent = cpuCollector.CPUPercent()
|
||||||
|
}
|
||||||
|
|
||||||
return info
|
return info
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,7 +86,6 @@ func readMemInfo(info *SystemInfo) {
|
|||||||
|
|
||||||
// parseMemLine extracts the kB value from a /proc/meminfo line like "MemTotal: 16384000 kB"
|
// parseMemLine extracts the kB value from a /proc/meminfo line like "MemTotal: 16384000 kB"
|
||||||
func parseMemLine(line string) uint64 {
|
func parseMemLine(line string) uint64 {
|
||||||
// Remove label prefix up to ':'
|
|
||||||
parts := strings.SplitN(line, ":", 2)
|
parts := strings.SplitN(line, ":", 2)
|
||||||
if len(parts) < 2 {
|
if len(parts) < 2 {
|
||||||
return 0
|
return 0
|
||||||
@@ -99,7 +112,7 @@ func readDiskUsage(path string, totalGB, usedGB, availGB *float64, percent *floa
|
|||||||
bsize := uint64(stat.Bsize)
|
bsize := uint64(stat.Bsize)
|
||||||
total := stat.Blocks * bsize
|
total := stat.Blocks * bsize
|
||||||
avail := stat.Bavail * bsize
|
avail := stat.Bavail * bsize
|
||||||
used := total - (stat.Bfree * bsize) // Bfree includes reserved blocks
|
used := total - (stat.Bfree * bsize)
|
||||||
|
|
||||||
const gb = 1024 * 1024 * 1024
|
const gb = 1024 * 1024 * 1024
|
||||||
*totalGB = float64(total) / gb
|
*totalGB = float64(total) / gb
|
||||||
@@ -109,3 +122,117 @@ func readDiskUsage(path string, totalGB, usedGB, availGB *float64, percent *floa
|
|||||||
*percent = float64(used) / float64(total) * 100
|
*percent = float64(used) / float64(total) * 100
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// readLoadAvg reads 1/5/15 minute load averages from /proc/loadavg.
|
||||||
|
func readLoadAvg(info *SystemInfo) {
|
||||||
|
data, err := os.ReadFile("/proc/loadavg")
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Sscanf(string(data), "%f %f %f", &info.LoadAvg1, &info.LoadAvg5, &info.LoadAvg15)
|
||||||
|
}
|
||||||
|
|
||||||
|
// readTemperature reads CPU/SoC temperature from thermal zones.
|
||||||
|
// Tries /host/sys first (Docker mount), then /sys (native).
|
||||||
|
func readTemperature(info *SystemInfo) {
|
||||||
|
prefixes := []string{"/host/sys", "/sys"}
|
||||||
|
|
||||||
|
for _, prefix := range prefixes {
|
||||||
|
if readThermalZones(prefix, info) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: try hwmon
|
||||||
|
for _, prefix := range prefixes {
|
||||||
|
if readHwmon(prefix, info) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func readThermalZones(sysPrefix string, info *SystemInfo) bool {
|
||||||
|
pattern := filepath.Join(sysPrefix, "class", "thermal", "thermal_zone*", "temp")
|
||||||
|
matches, err := filepath.Glob(pattern)
|
||||||
|
if err != nil || len(matches) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Strings(matches)
|
||||||
|
|
||||||
|
var maxTemp float64
|
||||||
|
var maxSource string
|
||||||
|
|
||||||
|
for _, tempPath := range matches {
|
||||||
|
data, err := os.ReadFile(tempPath)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var milliDeg int64
|
||||||
|
if _, err := fmt.Sscanf(strings.TrimSpace(string(data)), "%d", &milliDeg); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
temp := float64(milliDeg) / 1000.0
|
||||||
|
|
||||||
|
// Read the type file for the label
|
||||||
|
zoneDir := filepath.Dir(tempPath)
|
||||||
|
typePath := filepath.Join(zoneDir, "type")
|
||||||
|
typeData, err := os.ReadFile(typePath)
|
||||||
|
source := strings.TrimSpace(string(typeData))
|
||||||
|
if err != nil || source == "" {
|
||||||
|
source = filepath.Base(zoneDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
if temp > maxTemp {
|
||||||
|
maxTemp = temp
|
||||||
|
maxSource = source
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if maxTemp > 0 {
|
||||||
|
info.TemperatureCelsius = maxTemp
|
||||||
|
info.TemperatureSource = maxSource
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func readHwmon(sysPrefix string, info *SystemInfo) bool {
|
||||||
|
pattern := filepath.Join(sysPrefix, "class", "hwmon", "hwmon*", "temp1_input")
|
||||||
|
matches, err := filepath.Glob(pattern)
|
||||||
|
if err != nil || len(matches) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
var maxTemp float64
|
||||||
|
var maxSource string
|
||||||
|
|
||||||
|
for _, tempPath := range matches {
|
||||||
|
data, err := os.ReadFile(tempPath)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var milliDeg int64
|
||||||
|
if _, err := fmt.Sscanf(strings.TrimSpace(string(data)), "%d", &milliDeg); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
temp := float64(milliDeg) / 1000.0
|
||||||
|
source := filepath.Base(filepath.Dir(tempPath))
|
||||||
|
|
||||||
|
if temp > maxTemp {
|
||||||
|
maxTemp = temp
|
||||||
|
maxSource = source
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if maxTemp > 0 {
|
||||||
|
info.TemperatureCelsius = maxTemp
|
||||||
|
info.TemperatureSource = maxSource
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package system
|
|||||||
import "fmt"
|
import "fmt"
|
||||||
|
|
||||||
// GetInfo returns empty system info on non-Linux platforms.
|
// GetInfo returns empty system info on non-Linux platforms.
|
||||||
func GetInfo(_ string) SystemInfo {
|
func GetInfo(_ string, _ *CPUCollector) SystemInfo {
|
||||||
return SystemInfo{}
|
return SystemInfo{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -129,5 +129,20 @@ func (s *Server) templateFuncMap() template.FuncMap {
|
|||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
},
|
},
|
||||||
|
"tempColor": func(celsius float64) string {
|
||||||
|
if celsius > 75 {
|
||||||
|
return "red"
|
||||||
|
}
|
||||||
|
if celsius >= 60 {
|
||||||
|
return "yellow"
|
||||||
|
}
|
||||||
|
return "green"
|
||||||
|
},
|
||||||
|
"fmtTemp": func(celsius float64) string {
|
||||||
|
return fmt.Sprintf("%.0f°C", celsius)
|
||||||
|
},
|
||||||
|
"fmtLoad": func(load float64) string {
|
||||||
|
return fmt.Sprintf("%.2f", load)
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ func (s *Server) dashboardHandler(w http.ResponseWriter, _ *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sysInfo := system.GetInfo(s.cfg.Paths.HDDPath)
|
sysInfo := system.GetInfo(s.cfg.Paths.HDDPath, s.cpuCollector)
|
||||||
|
|
||||||
data := s.baseData("dashboard", "Vezérlőpult")
|
data := s.baseData("dashboard", "Vezérlőpult")
|
||||||
data["Stacks"] = stackList
|
data["Stacks"] = stackList
|
||||||
@@ -44,6 +44,16 @@ func (s *Server) dashboardHandler(w http.ResponseWriter, _ *http.Request) {
|
|||||||
data["TotalCount"] = len(stackList)
|
data["TotalCount"] = len(stackList)
|
||||||
data["SystemInfo"] = sysInfo
|
data["SystemInfo"] = sysInfo
|
||||||
|
|
||||||
|
// Backup status
|
||||||
|
data["BackupEnabled"] = s.cfg.Backup.Enabled
|
||||||
|
if s.backupMgr != nil {
|
||||||
|
dbDump, backupSt := s.backupMgr.GetStatus()
|
||||||
|
data["DBDumpStatus"] = dbDump
|
||||||
|
data["BackupStatus"] = backupSt
|
||||||
|
data["BackupRunning"] = s.backupMgr.IsRunning()
|
||||||
|
data["BackupMaxAgeHours"] = s.cfg.Monitoring.Thresholds.BackupMaxAgeHours
|
||||||
|
}
|
||||||
|
|
||||||
s.render(w, "dashboard", data)
|
s.render(w, "dashboard", data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,28 +10,34 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
|
||||||
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
||||||
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
|
"gitea.dooplex.hu/admin/felhom-controller/internal/stacks"
|
||||||
|
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Server struct {
|
type Server struct {
|
||||||
cfg *config.Config
|
cfg *config.Config
|
||||||
stackMgr *stacks.Manager
|
stackMgr *stacks.Manager
|
||||||
logger *log.Logger
|
cpuCollector *system.CPUCollector
|
||||||
version string
|
backupMgr *backup.Manager
|
||||||
tmpl *template.Template
|
logger *log.Logger
|
||||||
|
version string
|
||||||
|
tmpl *template.Template
|
||||||
|
|
||||||
sessions map[string]*session
|
sessions map[string]*session
|
||||||
sessionsMu sync.RWMutex
|
sessionsMu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewServer(cfg *config.Config, stackMgr *stacks.Manager, logger *log.Logger, version string) *Server {
|
func NewServer(cfg *config.Config, stackMgr *stacks.Manager, cpuCollector *system.CPUCollector, backupMgr *backup.Manager, logger *log.Logger, version string) *Server {
|
||||||
s := &Server{
|
s := &Server{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
stackMgr: stackMgr,
|
stackMgr: stackMgr,
|
||||||
logger: logger,
|
cpuCollector: cpuCollector,
|
||||||
version: version,
|
backupMgr: backupMgr,
|
||||||
sessions: make(map[string]*session),
|
logger: logger,
|
||||||
|
version: version,
|
||||||
|
sessions: make(map[string]*session),
|
||||||
}
|
}
|
||||||
s.loadTemplates()
|
s.loadTemplates()
|
||||||
go s.cleanupSessions()
|
go s.cleanupSessions()
|
||||||
|
|||||||
@@ -33,6 +33,29 @@
|
|||||||
<div class="system-bar-fill system-bar-{{usageColor .SystemInfo.MemPercent}}" style="width:{{printf "%.0f" .SystemInfo.MemPercent}}%"></div>
|
<div class="system-bar-fill system-bar-{{usageColor .SystemInfo.MemPercent}}" style="width:{{printf "%.0f" .SystemInfo.MemPercent}}%"></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="system-info-item">
|
||||||
|
<div class="system-info-header">
|
||||||
|
<span class="system-info-label">CPU</span>
|
||||||
|
<span class="system-info-value">{{printf "%.0f" .SystemInfo.CPUPercent}}%</span>
|
||||||
|
</div>
|
||||||
|
<div class="system-bar">
|
||||||
|
<div class="system-bar-fill system-bar-{{usageColor .SystemInfo.CPUPercent}}" style="width:{{printf "%.0f" .SystemInfo.CPUPercent}}%"></div>
|
||||||
|
</div>
|
||||||
|
<div class="system-load-avg">Load: {{fmtLoad .SystemInfo.LoadAvg1}} / {{fmtLoad .SystemInfo.LoadAvg5}} / {{fmtLoad .SystemInfo.LoadAvg15}}</div>
|
||||||
|
</div>
|
||||||
|
{{if .SystemInfo.TemperatureCelsius}}
|
||||||
|
<div class="system-info-item system-info-item-compact">
|
||||||
|
<div class="system-info-header">
|
||||||
|
<span class="system-info-label">Hőmérséklet</span>
|
||||||
|
<span class="system-info-value">
|
||||||
|
<span class="temp-dot temp-dot-{{tempColor .SystemInfo.TemperatureCelsius}}"></span>
|
||||||
|
{{fmtTemp .SystemInfo.TemperatureCelsius}}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{{end}}
|
||||||
|
</div>
|
||||||
|
<div class="system-info-items" style="margin-top: 1rem;">
|
||||||
<div class="system-info-item">
|
<div class="system-info-item">
|
||||||
<div class="system-info-header">
|
<div class="system-info-header">
|
||||||
<span class="system-info-label">SSD tárhely</span>
|
<span class="system-info-label">SSD tárhely</span>
|
||||||
@@ -57,6 +80,46 @@
|
|||||||
</div>
|
</div>
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|
||||||
|
{{if .BackupEnabled}}
|
||||||
|
<div class="backup-status-card">
|
||||||
|
<h3>Biztonsági mentés</h3>
|
||||||
|
{{if .BackupStatus}}
|
||||||
|
<div class="backup-info-row">
|
||||||
|
<span class="backup-label">Utolsó mentés:</span>
|
||||||
|
<span class="backup-value">
|
||||||
|
{{if .BackupStatus.Success}}
|
||||||
|
<span class="backup-status-ok">{{.BackupStatus.LastRun.Format "2006-01-02 15:04"}}</span>
|
||||||
|
{{else}}
|
||||||
|
<span class="backup-status-fail">Sikertelen</span>
|
||||||
|
{{end}}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{{else}}
|
||||||
|
<div class="backup-info-row">
|
||||||
|
<span class="backup-label">Utolsó mentés:</span>
|
||||||
|
<span class="backup-value backup-status-none">Még nem futott</span>
|
||||||
|
</div>
|
||||||
|
{{end}}
|
||||||
|
{{if .DBDumpStatus}}
|
||||||
|
<div class="backup-info-row">
|
||||||
|
<span class="backup-label">Adatbázisok:</span>
|
||||||
|
<span class="backup-value">{{len .DBDumpStatus.Results}} mentve</span>
|
||||||
|
</div>
|
||||||
|
{{end}}
|
||||||
|
{{if .BackupStatus}}{{if .BackupStatus.RepoStats}}
|
||||||
|
<div class="backup-info-row">
|
||||||
|
<span class="backup-label">Tároló méret:</span>
|
||||||
|
<span class="backup-value">{{.BackupStatus.RepoStats.TotalSize}} ({{.BackupStatus.RepoStats.SnapshotCount}} pillanatkép)</span>
|
||||||
|
</div>
|
||||||
|
{{end}}{{end}}
|
||||||
|
<div class="backup-actions" style="margin-top: .75rem;">
|
||||||
|
<button class="btn btn-sm btn-primary" onclick="triggerBackup()" id="backup-btn">
|
||||||
|
{{if .BackupRunning}}Mentés folyamatban...{{else}}Mentés most{{end}}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
<h3>Alkalmazások állapota</h3>
|
<h3>Alkalmazások állapota</h3>
|
||||||
|
|
||||||
<div class="stack-list">
|
<div class="stack-list">
|
||||||
@@ -97,5 +160,28 @@
|
|||||||
{{end}}
|
{{end}}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function triggerBackup() {
|
||||||
|
const btn = document.getElementById('backup-btn');
|
||||||
|
btn.disabled = true;
|
||||||
|
btn.textContent = 'Mentés indítása...';
|
||||||
|
fetch('/api/backup/run', { method: 'POST' })
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {
|
||||||
|
if (data.ok) {
|
||||||
|
btn.textContent = 'Mentés folyamatban...';
|
||||||
|
btn.classList.add('loading');
|
||||||
|
} else {
|
||||||
|
btn.textContent = data.error || 'Hiba';
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(() => {
|
||||||
|
btn.textContent = 'Hiba';
|
||||||
|
btn.disabled = false;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
{{template "layout_end" .}}
|
{{template "layout_end" .}}
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|||||||
@@ -1122,6 +1122,62 @@ select.form-control option { background: var(--bg-secondary); color: var(--text-
|
|||||||
justify-content: flex-end;
|
justify-content: flex-end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Load average text */
|
||||||
|
.system-load-avg {
|
||||||
|
font-size: .7rem;
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-family: 'JetBrains Mono', monospace;
|
||||||
|
margin-top: .25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Temperature dot */
|
||||||
|
.temp-dot {
|
||||||
|
display: inline-block;
|
||||||
|
width: 8px;
|
||||||
|
height: 8px;
|
||||||
|
border-radius: 50%;
|
||||||
|
margin-right: .25rem;
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
|
.temp-dot-green { background: var(--green); box-shadow: 0 0 4px rgba(35, 134, 54, 0.5); }
|
||||||
|
.temp-dot-yellow { background: var(--yellow); box-shadow: 0 0 4px rgba(210, 153, 34, 0.5); }
|
||||||
|
.temp-dot-red { background: var(--red); box-shadow: 0 0 4px rgba(218, 54, 51, 0.5); }
|
||||||
|
|
||||||
|
.system-info-item-compact {
|
||||||
|
flex: 0 1 auto;
|
||||||
|
min-width: 120px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Backup status card */
|
||||||
|
.backup-status-card {
|
||||||
|
background: var(--bg-card);
|
||||||
|
border-radius: var(--radius);
|
||||||
|
padding: 1rem 1.25rem;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
.backup-status-card h3 {
|
||||||
|
margin-bottom: .75rem;
|
||||||
|
}
|
||||||
|
.backup-info-row {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
padding: .25rem 0;
|
||||||
|
font-size: .85rem;
|
||||||
|
}
|
||||||
|
.backup-label {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
.backup-value {
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-family: 'JetBrains Mono', monospace;
|
||||||
|
font-size: .8rem;
|
||||||
|
}
|
||||||
|
.backup-status-ok { color: var(--green); }
|
||||||
|
.backup-status-fail { color: var(--red); }
|
||||||
|
.backup-status-none { color: var(--text-muted); }
|
||||||
|
|
||||||
/* Responsive */
|
/* Responsive */
|
||||||
@media(max-width: 768px) {
|
@media(max-width: 768px) {
|
||||||
.sidebar { width: 100%; height: auto; position: relative; border-right: none; border-bottom: 1px solid var(--border-color); }
|
.sidebar { width: 100%; height: auto; position: relative; border-right: none; border-bottom: 1px solid var(--border-color); }
|
||||||
|
|||||||
Reference in New Issue
Block a user