v0.5.0: Backup bugfixes + monitoring page with metrics store

- Fix "Helyi mentés" showing "–" after controller restart by synthesizing
  LastBackup from snapshot history and LastDBDump from dump files on disk
- New monitoring page (/monitoring) with system info, metrics charts, and
  container resource overview
- SQLite metrics store (modernc.org/sqlite, pure Go, no CGO) with 60s
  collection interval and 30-day auto-prune
- REST API endpoints: /api/metrics/system, /api/metrics/containers/summary,
  /api/metrics/containers/{name}, /api/metrics/sysinfo
- Chart.js 4.4.7 embedded locally for offline environments
- System info provider reads hostname, OS, kernel, CPU, uptime from /proc
- Docker compose updated with /etc/os-release host mount

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-16 10:14:46 +01:00
parent 87e79548b0
commit 3e8baebfa5
19 changed files with 1691 additions and 5 deletions
+249
View File
@@ -0,0 +1,249 @@
package metrics
import (
"context"
"fmt"
"log"
"os/exec"
"strconv"
"strings"
"time"
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
)
// MetricsCollector periodically samples system and container metrics and stores them.
type MetricsCollector struct {
store *MetricsStore
cpuCollector *system.CPUCollector
hddPath string
logger *log.Logger
cancel context.CancelFunc
}
// NewMetricsCollector creates a new collector.
func NewMetricsCollector(store *MetricsStore, cpuCollector *system.CPUCollector, hddPath string, logger *log.Logger) *MetricsCollector {
return &MetricsCollector{
store: store,
cpuCollector: cpuCollector,
hddPath: hddPath,
logger: logger,
}
}
// Start begins the background collection loop (every 60 seconds).
func (c *MetricsCollector) Start(ctx context.Context) {
ctx, c.cancel = context.WithCancel(ctx)
go c.loop(ctx)
}
// Stop cancels the collection loop.
func (c *MetricsCollector) Stop() {
if c.cancel != nil {
c.cancel()
}
}
func (c *MetricsCollector) loop(ctx context.Context) {
ticker := time.NewTicker(60 * time.Second)
defer ticker.Stop()
// Sample immediately on start
c.sample()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
c.sample()
}
}
}
func (c *MetricsCollector) sample() {
sys := c.sampleSystem()
if err := c.store.InsertSystemMetrics(sys); err != nil {
c.logger.Printf("[WARN] Failed to store system metrics: %v", err)
}
containers := c.sampleContainers()
if err := c.store.InsertContainerMetrics(containers); err != nil {
c.logger.Printf("[WARN] Failed to store container metrics: %v", err)
}
}
func (c *MetricsCollector) sampleSystem() SystemSample {
info := system.GetInfo(c.hddPath, c.cpuCollector)
return SystemSample{
Timestamp: time.Now().Unix(),
CPUPercent: info.CPUPercent,
MemUsedMB: int(info.UsedMemMB),
MemTotalMB: int(info.TotalMemMB),
TempCelsius: info.TemperatureCelsius,
LoadAvg1: info.LoadAvg1,
LoadAvg5: info.LoadAvg5,
LoadAvg15: info.LoadAvg15,
DiskUsedGB: info.DiskUsedGB,
DiskTotalGB: info.DiskTotalGB,
HDDUsedGB: info.HDDUsedGB,
HDDTotalGB: info.HDDTotalGB,
}
}
func (c *MetricsCollector) sampleContainers() []ContainerSample {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "stats", "--no-stream",
"--format", "{{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}\t{{.BlockIO}}")
out, err := cmd.Output()
if err != nil {
c.logger.Printf("[WARN] docker stats failed: %v", err)
return nil
}
now := time.Now().Unix()
var samples []ContainerSample
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
if line == "" {
continue
}
parts := strings.Split(line, "\t")
if len(parts) < 5 {
continue
}
name := parts[0]
cpuPct := parsePercent(parts[1])
memUsage, memLimit := parseMemUsage(parts[2])
netRx, netTx := parseIOPair(parts[3])
blkRead, blkWrite := parseIOPair(parts[4])
samples = append(samples, ContainerSample{
Timestamp: now,
ContainerName: name,
CPUPercent: cpuPct,
MemUsageMB: memUsage,
MemLimitMB: memLimit,
NetRxBytes: netRx,
NetTxBytes: netTx,
BlockReadBytes: blkRead,
BlockWriteBytes: blkWrite,
})
}
return samples
}
// parsePercent parses "2.50%" → 2.50
func parsePercent(s string) float64 {
s = strings.TrimSpace(s)
s = strings.TrimSuffix(s, "%")
v, _ := strconv.ParseFloat(s, 64)
return v
}
// parseMemUsage parses "150.5MiB / 512MiB" → (150.5, 512.0)
func parseMemUsage(s string) (usage, limit float64) {
parts := strings.Split(s, "/")
if len(parts) != 2 {
return 0, 0
}
usage = parseSizeToMB(strings.TrimSpace(parts[0]))
limit = parseSizeToMB(strings.TrimSpace(parts[1]))
return
}
// parseIOPair parses "1.5MB / 2.3MB" → (1500000, 2300000) as bytes
func parseIOPair(s string) (int64, int64) {
parts := strings.Split(s, "/")
if len(parts) != 2 {
return 0, 0
}
return parseSizeToBytes(strings.TrimSpace(parts[0])), parseSizeToBytes(strings.TrimSpace(parts[1]))
}
// parseSizeToMB parses "150.5MiB", "1.5GiB", "500kB" → value in MB
func parseSizeToMB(s string) float64 {
s = strings.TrimSpace(s)
multipliers := []struct {
suffix string
factor float64
}{
{"GiB", 1024},
{"MiB", 1},
{"KiB", 1.0 / 1024},
{"GB", 1000},
{"MB", 1},
{"KB", 1.0 / 1000},
{"kB", 1.0 / 1000},
{"B", 1.0 / (1024 * 1024)},
}
for _, m := range multipliers {
if strings.HasSuffix(s, m.suffix) {
numStr := strings.TrimSpace(strings.TrimSuffix(s, m.suffix))
val, err := strconv.ParseFloat(numStr, 64)
if err != nil {
return 0
}
return val * m.factor
}
}
// Fallback: try to parse as plain number
val, _ := strconv.ParseFloat(s, 64)
return val
}
// parseSizeToBytes parses "1.5MB", "500kB", "2.3GB" → bytes
func parseSizeToBytes(s string) int64 {
s = strings.TrimSpace(s)
multipliers := []struct {
suffix string
factor float64
}{
{"GiB", 1024 * 1024 * 1024},
{"MiB", 1024 * 1024},
{"KiB", 1024},
{"GB", 1e9},
{"MB", 1e6},
{"KB", 1e3},
{"kB", 1e3},
{"B", 1},
}
for _, m := range multipliers {
if strings.HasSuffix(s, m.suffix) {
numStr := strings.TrimSpace(strings.TrimSuffix(s, m.suffix))
val, err := strconv.ParseFloat(numStr, 64)
if err != nil {
return 0
}
return int64(val * m.factor)
}
}
val, _ := strconv.ParseFloat(s, 64)
return int64(val)
}
// FormatUptime formats seconds into "X nap, Y óra" style for Hungarian display.
func FormatUptime(seconds int64) string {
days := seconds / 86400
hours := (seconds % 86400) / 3600
minutes := (seconds % 3600) / 60
if days > 0 {
return fmt.Sprintf("%d nap, %d óra", days, hours)
}
if hours > 0 {
return fmt.Sprintf("%d óra, %d perc", hours, minutes)
}
return fmt.Sprintf("%d perc", minutes)
}
+322
View File
@@ -0,0 +1,322 @@
package metrics
import (
"database/sql"
"fmt"
"log"
"time"
_ "modernc.org/sqlite"
)
// MetricsStore manages SQLite storage for system and container metrics.
type MetricsStore struct {
db *sql.DB
logger *log.Logger
}
// NewMetricsStore opens (or creates) a SQLite database at dbPath and initializes the schema.
func NewMetricsStore(dbPath string, logger *log.Logger) (*MetricsStore, error) {
db, err := sql.Open("sqlite", dbPath)
if err != nil {
return nil, fmt.Errorf("open sqlite: %w", err)
}
// Set pragmas for performance and concurrency
pragmas := []string{
"PRAGMA journal_mode=WAL",
"PRAGMA synchronous=NORMAL",
"PRAGMA busy_timeout=5000",
}
for _, p := range pragmas {
if _, err := db.Exec(p); err != nil {
db.Close()
return nil, fmt.Errorf("pragma %q: %w", p, err)
}
}
// Create tables
schema := []string{
`CREATE TABLE IF NOT EXISTS system_metrics (
ts INTEGER NOT NULL,
cpu_percent REAL NOT NULL,
mem_used_mb INTEGER NOT NULL,
mem_total_mb INTEGER NOT NULL,
temp_celsius REAL,
load_avg_1 REAL,
load_avg_5 REAL,
load_avg_15 REAL,
disk_used_gb REAL,
disk_total_gb REAL,
hdd_used_gb REAL,
hdd_total_gb REAL
)`,
`CREATE INDEX IF NOT EXISTS idx_system_ts ON system_metrics(ts)`,
`CREATE TABLE IF NOT EXISTS container_metrics (
ts INTEGER NOT NULL,
container_name TEXT NOT NULL,
cpu_percent REAL NOT NULL,
mem_usage_mb REAL NOT NULL,
mem_limit_mb REAL,
net_rx_bytes INTEGER,
net_tx_bytes INTEGER,
block_read_bytes INTEGER,
block_write_bytes INTEGER
)`,
`CREATE INDEX IF NOT EXISTS idx_container_ts ON container_metrics(ts)`,
`CREATE INDEX IF NOT EXISTS idx_container_name ON container_metrics(container_name, ts)`,
}
for _, s := range schema {
if _, err := db.Exec(s); err != nil {
db.Close()
return nil, fmt.Errorf("schema: %w", err)
}
}
return &MetricsStore{db: db, logger: logger}, nil
}
// Close closes the underlying database connection.
func (s *MetricsStore) Close() error {
return s.db.Close()
}
// InsertSystemMetrics inserts a single system metrics sample.
func (s *MetricsStore) InsertSystemMetrics(m SystemSample) error {
_, err := s.db.Exec(
`INSERT INTO system_metrics (ts, cpu_percent, mem_used_mb, mem_total_mb, temp_celsius,
load_avg_1, load_avg_5, load_avg_15, disk_used_gb, disk_total_gb, hdd_used_gb, hdd_total_gb)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
m.Timestamp, m.CPUPercent, m.MemUsedMB, m.MemTotalMB, m.TempCelsius,
m.LoadAvg1, m.LoadAvg5, m.LoadAvg15, m.DiskUsedGB, m.DiskTotalGB, m.HDDUsedGB, m.HDDTotalGB,
)
return err
}
// InsertContainerMetrics inserts a batch of container metrics samples.
func (s *MetricsStore) InsertContainerMetrics(samples []ContainerSample) error {
if len(samples) == 0 {
return nil
}
tx, err := s.db.Begin()
if err != nil {
return err
}
defer tx.Rollback()
stmt, err := tx.Prepare(
`INSERT INTO container_metrics (ts, container_name, cpu_percent, mem_usage_mb, mem_limit_mb,
net_rx_bytes, net_tx_bytes, block_read_bytes, block_write_bytes)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
)
if err != nil {
return err
}
defer stmt.Close()
for _, c := range samples {
if _, err := stmt.Exec(c.Timestamp, c.ContainerName, c.CPUPercent, c.MemUsageMB, c.MemLimitMB,
c.NetRxBytes, c.NetTxBytes, c.BlockReadBytes, c.BlockWriteBytes); err != nil {
return err
}
}
return tx.Commit()
}
// QuerySystemMetrics returns downsampled system metrics between from and to.
// resolution controls the approximate number of data points returned.
func (s *MetricsStore) QuerySystemMetrics(from, to time.Time, resolution int) ([]SystemSample, error) {
fromTS := from.Unix()
toTS := to.Unix()
if resolution <= 0 {
resolution = 200
}
rangeSeconds := toTS - fromTS
if rangeSeconds <= 0 {
return nil, nil
}
bucketSeconds := rangeSeconds / int64(resolution)
if bucketSeconds < 1 {
bucketSeconds = 1
}
rows, err := s.db.Query(`
SELECT
(ts / ?) * ? AS bucket_ts,
AVG(cpu_percent),
AVG(mem_used_mb),
AVG(mem_total_mb),
AVG(temp_celsius),
AVG(load_avg_1),
AVG(load_avg_5),
AVG(load_avg_15),
AVG(disk_used_gb),
AVG(disk_total_gb),
AVG(hdd_used_gb),
AVG(hdd_total_gb)
FROM system_metrics
WHERE ts >= ? AND ts <= ?
GROUP BY ts / ?
ORDER BY bucket_ts ASC`,
bucketSeconds, bucketSeconds, fromTS, toTS, bucketSeconds,
)
if err != nil {
return nil, err
}
defer rows.Close()
var result []SystemSample
for rows.Next() {
var m SystemSample
var tempC, load1, load5, load15, diskUsed, diskTotal, hddUsed, hddTotal sql.NullFloat64
if err := rows.Scan(&m.Timestamp, &m.CPUPercent, &m.MemUsedMB, &m.MemTotalMB,
&tempC, &load1, &load5, &load15, &diskUsed, &diskTotal, &hddUsed, &hddTotal); err != nil {
return nil, err
}
if tempC.Valid {
m.TempCelsius = tempC.Float64
}
if load1.Valid {
m.LoadAvg1 = load1.Float64
}
if load5.Valid {
m.LoadAvg5 = load5.Float64
}
if load15.Valid {
m.LoadAvg15 = load15.Float64
}
if diskUsed.Valid {
m.DiskUsedGB = diskUsed.Float64
}
if diskTotal.Valid {
m.DiskTotalGB = diskTotal.Float64
}
if hddUsed.Valid {
m.HDDUsedGB = hddUsed.Float64
}
if hddTotal.Valid {
m.HDDTotalGB = hddTotal.Float64
}
result = append(result, m)
}
return result, rows.Err()
}
// QueryContainerMetrics returns downsampled metrics for a specific container.
func (s *MetricsStore) QueryContainerMetrics(name string, from, to time.Time, resolution int) ([]ContainerSample, error) {
fromTS := from.Unix()
toTS := to.Unix()
if resolution <= 0 {
resolution = 200
}
rangeSeconds := toTS - fromTS
if rangeSeconds <= 0 {
return nil, nil
}
bucketSeconds := rangeSeconds / int64(resolution)
if bucketSeconds < 1 {
bucketSeconds = 1
}
rows, err := s.db.Query(`
SELECT
(ts / ?) * ? AS bucket_ts,
container_name,
AVG(cpu_percent),
AVG(mem_usage_mb),
AVG(mem_limit_mb),
AVG(net_rx_bytes),
AVG(net_tx_bytes),
AVG(block_read_bytes),
AVG(block_write_bytes)
FROM container_metrics
WHERE container_name = ? AND ts >= ? AND ts <= ?
GROUP BY ts / ?
ORDER BY bucket_ts ASC`,
bucketSeconds, bucketSeconds, name, fromTS, toTS, bucketSeconds,
)
if err != nil {
return nil, err
}
defer rows.Close()
var result []ContainerSample
for rows.Next() {
var c ContainerSample
var memLimit, netRx, netTx, blkRead, blkWrite sql.NullFloat64
if err := rows.Scan(&c.Timestamp, &c.ContainerName, &c.CPUPercent, &c.MemUsageMB,
&memLimit, &netRx, &netTx, &blkRead, &blkWrite); err != nil {
return nil, err
}
if memLimit.Valid {
c.MemLimitMB = memLimit.Float64
}
if netRx.Valid {
c.NetRxBytes = int64(netRx.Float64)
}
if netTx.Valid {
c.NetTxBytes = int64(netTx.Float64)
}
if blkRead.Valid {
c.BlockReadBytes = int64(blkRead.Float64)
}
if blkWrite.Valid {
c.BlockWriteBytes = int64(blkWrite.Float64)
}
result = append(result, c)
}
return result, rows.Err()
}
// QueryContainerSummary returns the latest metrics for all containers.
func (s *MetricsStore) QueryContainerSummary() ([]ContainerCurrentStats, error) {
rows, err := s.db.Query(`
SELECT container_name, cpu_percent, mem_usage_mb, COALESCE(mem_limit_mb, 0)
FROM container_metrics
WHERE ts = (SELECT MAX(ts) FROM container_metrics)
ORDER BY cpu_percent DESC`)
if err != nil {
return nil, err
}
defer rows.Close()
var result []ContainerCurrentStats
for rows.Next() {
var c ContainerCurrentStats
if err := rows.Scan(&c.ContainerName, &c.CPUPercent, &c.MemUsageMB, &c.MemLimitMB); err != nil {
return nil, err
}
result = append(result, c)
}
return result, rows.Err()
}
// Prune deletes rows older than the given duration. Returns the number of deleted rows.
func (s *MetricsStore) Prune(olderThan time.Duration) (int64, error) {
cutoff := time.Now().Add(-olderThan).Unix()
var total int64
res, err := s.db.Exec("DELETE FROM system_metrics WHERE ts < ?", cutoff)
if err != nil {
return 0, err
}
n, _ := res.RowsAffected()
total += n
res, err = s.db.Exec("DELETE FROM container_metrics WHERE ts < ?", cutoff)
if err != nil {
return total, err
}
n, _ = res.RowsAffected()
total += n
return total, nil
}
+110
View File
@@ -0,0 +1,110 @@
//go:build linux
package metrics
import (
"bufio"
"fmt"
"os"
"runtime"
"strings"
"time"
)
// GetStaticInfo reads host-level static system information.
// Reads from /proc and /etc (which reflect the host in Docker containers).
func GetStaticInfo() StaticSystemInfo {
info := StaticSystemInfo{}
// Hostname
info.Hostname, _ = os.Hostname()
// OS — try host mount first, fall back to container's
info.OS = readOSRelease("/host/etc/os-release")
if info.OS == "" {
info.OS = readOSRelease("/etc/os-release")
}
// Kernel version
if data, err := os.ReadFile("/proc/sys/kernel/osrelease"); err == nil {
info.Kernel = strings.TrimSpace(string(data))
}
// Architecture
if data, err := os.ReadFile("/proc/sys/kernel/arch"); err == nil {
info.Architecture = strings.TrimSpace(string(data))
}
if info.Architecture == "" {
// Fallback: use uname -m equivalent from /proc/cpuinfo or runtime
info.Architecture = runtime.GOARCH
// Try to get the actual host arch
if data, err := os.ReadFile("/proc/version"); err == nil {
v := string(data)
if strings.Contains(v, "x86_64") {
info.Architecture = "x86_64"
} else if strings.Contains(v, "aarch64") {
info.Architecture = "aarch64"
}
}
}
// CPU model and cores
info.CPUModel, info.CPUCores = readCPUInfo()
if info.CPUCores == 0 {
info.CPUCores = runtime.NumCPU()
}
// Uptime
if data, err := os.ReadFile("/proc/uptime"); err == nil {
var uptimeSec float64
if _, err := fmt.Sscanf(string(data), "%f", &uptimeSec); err == nil {
info.UptimeSeconds = int64(uptimeSec)
info.BootTime = time.Now().Add(-time.Duration(info.UptimeSeconds) * time.Second)
}
}
return info
}
// readOSRelease reads PRETTY_NAME from an os-release file.
func readOSRelease(path string) string {
f, err := os.Open(path)
if err != nil {
return ""
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "PRETTY_NAME=") {
val := strings.TrimPrefix(line, "PRETTY_NAME=")
val = strings.Trim(val, `"`)
return val
}
}
return ""
}
// readCPUInfo reads the CPU model name and number of cores from /proc/cpuinfo.
func readCPUInfo() (model string, cores int) {
f, err := os.Open("/proc/cpuinfo")
if err != nil {
return "", 0
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "model name") {
if idx := strings.Index(line, ":"); idx >= 0 {
model = strings.TrimSpace(line[idx+1:])
}
}
if strings.HasPrefix(line, "processor") {
cores++
}
}
return model, cores
}
@@ -0,0 +1,19 @@
//go:build !linux
package metrics
import (
"os"
"runtime"
)
// GetStaticInfo returns placeholder system info on non-Linux platforms.
func GetStaticInfo() StaticSystemInfo {
hostname, _ := os.Hostname()
return StaticSystemInfo{
Hostname: hostname,
OS: runtime.GOOS,
Architecture: runtime.GOARCH,
CPUCores: runtime.NumCPU(),
}
}
+52
View File
@@ -0,0 +1,52 @@
package metrics
import "time"
// SystemSample holds one system-wide metrics snapshot.
type SystemSample struct {
Timestamp int64 `json:"ts"`
CPUPercent float64 `json:"cpu"`
MemUsedMB int `json:"mem_used"`
MemTotalMB int `json:"mem_total"`
TempCelsius float64 `json:"temp"`
LoadAvg1 float64 `json:"load1"`
LoadAvg5 float64 `json:"load5"`
LoadAvg15 float64 `json:"load15"`
DiskUsedGB float64 `json:"disk_used"`
DiskTotalGB float64 `json:"disk_total"`
HDDUsedGB float64 `json:"hdd_used"`
HDDTotalGB float64 `json:"hdd_total"`
}
// ContainerSample holds one per-container metrics snapshot.
type ContainerSample struct {
Timestamp int64 `json:"ts"`
ContainerName string `json:"name"`
CPUPercent float64 `json:"cpu"`
MemUsageMB float64 `json:"mem_usage"`
MemLimitMB float64 `json:"mem_limit"`
NetRxBytes int64 `json:"net_rx"`
NetTxBytes int64 `json:"net_tx"`
BlockReadBytes int64 `json:"blk_read"`
BlockWriteBytes int64 `json:"blk_write"`
}
// ContainerCurrentStats holds the latest snapshot for a single container.
type ContainerCurrentStats struct {
ContainerName string `json:"name"`
CPUPercent float64 `json:"cpu_percent"`
MemUsageMB float64 `json:"mem_usage_mb"`
MemLimitMB float64 `json:"mem_limit_mb"`
}
// StaticSystemInfo holds static (or slowly-changing) host information.
type StaticSystemInfo struct {
Hostname string `json:"hostname"`
OS string `json:"os"`
Kernel string `json:"kernel"`
Architecture string `json:"architecture"`
CPUModel string `json:"cpu_model"`
CPUCores int `json:"cpu_cores"`
UptimeSeconds int64 `json:"uptime_seconds"`
BootTime time.Time `json:"boot_time"`
}