v0.4.0: monitoring & backup — scheduler, CPU/temp metrics, healthchecks, restic backups

Phase 2 (Monitoring & Health):
- Central job scheduler replacing ad-hoc goroutines (internal/scheduler)
- CPU usage collector via /proc/stat background sampling (internal/system/cpu_linux.go)
- Temperature reading from /sys/class/thermal + /host/sys (Docker mount)
- Load average from /proc/loadavg
- Healthchecks.io-compatible HTTP pinger (internal/monitor/pinger.go)
- System health checks: disk, memory, CPU, temp, Docker, protected containers (internal/monitor/healthcheck.go)

Phase 3 (Backups):
- Database auto-discovery via docker ps + docker inspect (internal/backup/dbdump.go)
- Database dumping via docker exec (pg_dump / mariadb-dump) with atomic writes
- Restic backup integration with auto-password generation (internal/backup/restic.go)
- Backup orchestrator: DB dumps + restic snapshots + weekly prune (internal/backup/backup.go)
- Manual backup trigger via dashboard button and POST /api/backup/run

Dashboard UI:
- CPU usage bar with load average display
- Temperature with colored indicator dot
- Backup status card with last run time, DB count, repo stats
- "Mentés most" button for manual backup trigger

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 11:17:10 +01:00
parent 8a988c5998
commit d32d9fb44b
21 changed files with 2060 additions and 82 deletions
+134
View File
@@ -0,0 +1,134 @@
//go:build linux
package system
import (
"bufio"
"context"
"fmt"
"os"
"strings"
"sync"
"time"
)
// CPUCollector samples CPU usage in the background by reading /proc/stat.
type CPUCollector struct {
mu sync.RWMutex
cpuPercent float64
sampleRate time.Duration
cancel context.CancelFunc
}
// NewCPUCollector creates a new CPU collector with the given sample rate.
func NewCPUCollector(sampleRate time.Duration) *CPUCollector {
return &CPUCollector{
sampleRate: sampleRate,
}
}
// Start begins background CPU sampling.
func (c *CPUCollector) Start(ctx context.Context) {
ctx, c.cancel = context.WithCancel(ctx)
go c.loop(ctx)
}
// Stop stops the background CPU sampling.
func (c *CPUCollector) Stop() {
if c.cancel != nil {
c.cancel()
}
}
// CPUPercent returns the latest CPU usage percentage (0-100).
func (c *CPUCollector) CPUPercent() float64 {
c.mu.RLock()
defer c.mu.RUnlock()
return c.cpuPercent
}
func (c *CPUCollector) loop(ctx context.Context) {
for {
// Read first sample
idle1, total1, err := readCPUStat()
if err != nil {
select {
case <-ctx.Done():
return
case <-time.After(c.sampleRate):
continue
}
}
// Wait for sample interval
select {
case <-ctx.Done():
return
case <-time.After(c.sampleRate):
}
// Read second sample
idle2, total2, err := readCPUStat()
if err != nil {
continue
}
totalDelta := total2 - total1
idleDelta := idle2 - idle1
if totalDelta > 0 {
busyDelta := totalDelta - idleDelta
percent := float64(busyDelta) / float64(totalDelta) * 100
c.mu.Lock()
c.cpuPercent = percent
c.mu.Unlock()
}
}
}
// readCPUStat reads /proc/stat and returns idle and total CPU jiffies.
// First line format: cpu <user> <nice> <system> <idle> <iowait> <irq> <softirq> <steal>
func readCPUStat() (idle, total uint64, err error) {
f, err := os.Open("/proc/stat")
if err != nil {
return 0, 0, err
}
defer f.Close()
scanner := bufio.NewScanner(f)
if !scanner.Scan() {
return 0, 0, fmt.Errorf("empty /proc/stat")
}
line := scanner.Text()
if !strings.HasPrefix(line, "cpu ") {
return 0, 0, fmt.Errorf("unexpected /proc/stat first line: %s", line)
}
fields := strings.Fields(line)
if len(fields) < 9 {
return 0, 0, fmt.Errorf("/proc/stat has too few fields: %d", len(fields))
}
// Fields: cpu user(1) nice(2) system(3) idle(4) iowait(5) irq(6) softirq(7) steal(8)
var values [8]uint64
for i := 0; i < 8; i++ {
var v uint64
for _, c := range fields[i+1] {
if c >= '0' && c <= '9' {
v = v*10 + uint64(c-'0')
}
}
values[i] = v
}
// idle_total = idle + iowait
idleTotal := values[3] + values[4]
// total = sum of all
var totalVal uint64
for _, v := range values {
totalVal += v
}
return idleTotal, totalVal, nil
}
+25
View File
@@ -0,0 +1,25 @@
//go:build !linux
package system
import (
"context"
"time"
)
// CPUCollector is a no-op on non-Linux platforms.
type CPUCollector struct{}
// NewCPUCollector creates a no-op CPU collector on non-Linux platforms.
func NewCPUCollector(_ time.Duration) *CPUCollector {
return &CPUCollector{}
}
// Start is a no-op on non-Linux platforms.
func (c *CPUCollector) Start(_ context.Context) {}
// Stop is a no-op on non-Linux platforms.
func (c *CPUCollector) Stop() {}
// CPUPercent always returns 0 on non-Linux platforms.
func (c *CPUCollector) CPUPercent() float64 { return 0 }
+7
View File
@@ -17,4 +17,11 @@ type SystemInfo struct {
HDDAvailGB float64 `json:"hdd_avail_gb,omitempty"`
HDDPercent float64 `json:"hdd_percent,omitempty"`
HDDConfigured bool `json:"hdd_configured"`
CPUPercent float64 `json:"cpu_percent"`
LoadAvg1 float64 `json:"load_avg_1"`
LoadAvg5 float64 `json:"load_avg_5"`
LoadAvg15 float64 `json:"load_avg_15"`
TemperatureCelsius float64 `json:"temperature_celsius"`
TemperatureSource string `json:"temperature_source,omitempty"`
}
+131 -4
View File
@@ -6,13 +6,16 @@ import (
"bufio"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
)
// GetInfo reads system memory and disk usage.
// GetInfo reads system memory, disk, CPU, load, and temperature info.
// hddPath is the mount path for external HDD; if empty, HDD info is skipped.
func GetInfo(hddPath string) SystemInfo {
// cpuCollector provides the latest CPU usage sample; may be nil.
func GetInfo(hddPath string, cpuCollector *CPUCollector) SystemInfo {
info := SystemInfo{}
// --- Memory from /proc/meminfo ---
@@ -27,6 +30,17 @@ func GetInfo(hddPath string) SystemInfo {
readDiskUsage(hddPath, &info.HDDTotalGB, &info.HDDUsedGB, &info.HDDAvailGB, &info.HDDPercent)
}
// --- Load average ---
readLoadAvg(&info)
// --- Temperature ---
readTemperature(&info)
// --- CPU from collector ---
if cpuCollector != nil {
info.CPUPercent = cpuCollector.CPUPercent()
}
return info
}
@@ -72,7 +86,6 @@ func readMemInfo(info *SystemInfo) {
// parseMemLine extracts the kB value from a /proc/meminfo line like "MemTotal: 16384000 kB"
func parseMemLine(line string) uint64 {
// Remove label prefix up to ':'
parts := strings.SplitN(line, ":", 2)
if len(parts) < 2 {
return 0
@@ -99,7 +112,7 @@ func readDiskUsage(path string, totalGB, usedGB, availGB *float64, percent *floa
bsize := uint64(stat.Bsize)
total := stat.Blocks * bsize
avail := stat.Bavail * bsize
used := total - (stat.Bfree * bsize) // Bfree includes reserved blocks
used := total - (stat.Bfree * bsize)
const gb = 1024 * 1024 * 1024
*totalGB = float64(total) / gb
@@ -109,3 +122,117 @@ func readDiskUsage(path string, totalGB, usedGB, availGB *float64, percent *floa
*percent = float64(used) / float64(total) * 100
}
}
// readLoadAvg reads 1/5/15 minute load averages from /proc/loadavg.
func readLoadAvg(info *SystemInfo) {
data, err := os.ReadFile("/proc/loadavg")
if err != nil {
return
}
fmt.Sscanf(string(data), "%f %f %f", &info.LoadAvg1, &info.LoadAvg5, &info.LoadAvg15)
}
// readTemperature reads CPU/SoC temperature from thermal zones.
// Tries /host/sys first (Docker mount), then /sys (native).
func readTemperature(info *SystemInfo) {
prefixes := []string{"/host/sys", "/sys"}
for _, prefix := range prefixes {
if readThermalZones(prefix, info) {
return
}
}
// Fallback: try hwmon
for _, prefix := range prefixes {
if readHwmon(prefix, info) {
return
}
}
}
func readThermalZones(sysPrefix string, info *SystemInfo) bool {
pattern := filepath.Join(sysPrefix, "class", "thermal", "thermal_zone*", "temp")
matches, err := filepath.Glob(pattern)
if err != nil || len(matches) == 0 {
return false
}
sort.Strings(matches)
var maxTemp float64
var maxSource string
for _, tempPath := range matches {
data, err := os.ReadFile(tempPath)
if err != nil {
continue
}
var milliDeg int64
if _, err := fmt.Sscanf(strings.TrimSpace(string(data)), "%d", &milliDeg); err != nil {
continue
}
temp := float64(milliDeg) / 1000.0
// Read the type file for the label
zoneDir := filepath.Dir(tempPath)
typePath := filepath.Join(zoneDir, "type")
typeData, err := os.ReadFile(typePath)
source := strings.TrimSpace(string(typeData))
if err != nil || source == "" {
source = filepath.Base(zoneDir)
}
if temp > maxTemp {
maxTemp = temp
maxSource = source
}
}
if maxTemp > 0 {
info.TemperatureCelsius = maxTemp
info.TemperatureSource = maxSource
return true
}
return false
}
func readHwmon(sysPrefix string, info *SystemInfo) bool {
pattern := filepath.Join(sysPrefix, "class", "hwmon", "hwmon*", "temp1_input")
matches, err := filepath.Glob(pattern)
if err != nil || len(matches) == 0 {
return false
}
var maxTemp float64
var maxSource string
for _, tempPath := range matches {
data, err := os.ReadFile(tempPath)
if err != nil {
continue
}
var milliDeg int64
if _, err := fmt.Sscanf(strings.TrimSpace(string(data)), "%d", &milliDeg); err != nil {
continue
}
temp := float64(milliDeg) / 1000.0
source := filepath.Base(filepath.Dir(tempPath))
if temp > maxTemp {
maxTemp = temp
maxSource = source
}
}
if maxTemp > 0 {
info.TemperatureCelsius = maxTemp
info.TemperatureSource = maxSource
return true
}
return false
}
+1 -1
View File
@@ -5,7 +5,7 @@ package system
import "fmt"
// GetInfo returns empty system info on non-Linux platforms.
func GetInfo(_ string) SystemInfo {
func GetInfo(_ string, _ *CPUCollector) SystemInfo {
return SystemInfo{}
}