v0.4.0: monitoring & backup — scheduler, CPU/temp metrics, healthchecks, restic backups

Phase 2 (Monitoring & Health):
- Central job scheduler replacing ad-hoc goroutines (internal/scheduler)
- CPU usage collector via /proc/stat background sampling (internal/system/cpu_linux.go)
- Temperature reading from /sys/class/thermal + /host/sys (Docker mount)
- Load average from /proc/loadavg
- Healthchecks.io-compatible HTTP pinger (internal/monitor/pinger.go)
- System health checks: disk, memory, CPU, temp, Docker, protected containers (internal/monitor/healthcheck.go)

Phase 3 (Backups):
- Database auto-discovery via docker ps + docker inspect (internal/backup/dbdump.go)
- Database dumping via docker exec (pg_dump / mariadb-dump) with atomic writes
- Restic backup integration with auto-password generation (internal/backup/restic.go)
- Backup orchestrator: DB dumps + restic snapshots + weekly prune (internal/backup/backup.go)
- Manual backup trigger via dashboard button and POST /api/backup/run

Dashboard UI:
- CPU usage bar with load average display
- Temperature with colored indicator dot
- Backup status card with last run time, DB count, repo stats
- "Mentés most" button for manual backup trigger

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 11:17:10 +01:00
parent 8a988c5998
commit d32d9fb44b
21 changed files with 2060 additions and 82 deletions
+131 -4
View File
@@ -6,13 +6,16 @@ import (
"bufio"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
)
// GetInfo reads system memory and disk usage.
// GetInfo reads system memory, disk, CPU, load, and temperature info.
// hddPath is the mount path for external HDD; if empty, HDD info is skipped.
func GetInfo(hddPath string) SystemInfo {
// cpuCollector provides the latest CPU usage sample; may be nil.
func GetInfo(hddPath string, cpuCollector *CPUCollector) SystemInfo {
info := SystemInfo{}
// --- Memory from /proc/meminfo ---
@@ -27,6 +30,17 @@ func GetInfo(hddPath string) SystemInfo {
readDiskUsage(hddPath, &info.HDDTotalGB, &info.HDDUsedGB, &info.HDDAvailGB, &info.HDDPercent)
}
// --- Load average ---
readLoadAvg(&info)
// --- Temperature ---
readTemperature(&info)
// --- CPU from collector ---
if cpuCollector != nil {
info.CPUPercent = cpuCollector.CPUPercent()
}
return info
}
@@ -72,7 +86,6 @@ func readMemInfo(info *SystemInfo) {
// parseMemLine extracts the kB value from a /proc/meminfo line like "MemTotal: 16384000 kB"
func parseMemLine(line string) uint64 {
// Remove label prefix up to ':'
parts := strings.SplitN(line, ":", 2)
if len(parts) < 2 {
return 0
@@ -99,7 +112,7 @@ func readDiskUsage(path string, totalGB, usedGB, availGB *float64, percent *floa
bsize := uint64(stat.Bsize)
total := stat.Blocks * bsize
avail := stat.Bavail * bsize
used := total - (stat.Bfree * bsize) // Bfree includes reserved blocks
used := total - (stat.Bfree * bsize)
const gb = 1024 * 1024 * 1024
*totalGB = float64(total) / gb
@@ -109,3 +122,117 @@ func readDiskUsage(path string, totalGB, usedGB, availGB *float64, percent *floa
*percent = float64(used) / float64(total) * 100
}
}
// readLoadAvg reads 1/5/15 minute load averages from /proc/loadavg.
func readLoadAvg(info *SystemInfo) {
data, err := os.ReadFile("/proc/loadavg")
if err != nil {
return
}
fmt.Sscanf(string(data), "%f %f %f", &info.LoadAvg1, &info.LoadAvg5, &info.LoadAvg15)
}
// readTemperature reads CPU/SoC temperature from thermal zones.
// Tries /host/sys first (Docker mount), then /sys (native).
func readTemperature(info *SystemInfo) {
prefixes := []string{"/host/sys", "/sys"}
for _, prefix := range prefixes {
if readThermalZones(prefix, info) {
return
}
}
// Fallback: try hwmon
for _, prefix := range prefixes {
if readHwmon(prefix, info) {
return
}
}
}
func readThermalZones(sysPrefix string, info *SystemInfo) bool {
pattern := filepath.Join(sysPrefix, "class", "thermal", "thermal_zone*", "temp")
matches, err := filepath.Glob(pattern)
if err != nil || len(matches) == 0 {
return false
}
sort.Strings(matches)
var maxTemp float64
var maxSource string
for _, tempPath := range matches {
data, err := os.ReadFile(tempPath)
if err != nil {
continue
}
var milliDeg int64
if _, err := fmt.Sscanf(strings.TrimSpace(string(data)), "%d", &milliDeg); err != nil {
continue
}
temp := float64(milliDeg) / 1000.0
// Read the type file for the label
zoneDir := filepath.Dir(tempPath)
typePath := filepath.Join(zoneDir, "type")
typeData, err := os.ReadFile(typePath)
source := strings.TrimSpace(string(typeData))
if err != nil || source == "" {
source = filepath.Base(zoneDir)
}
if temp > maxTemp {
maxTemp = temp
maxSource = source
}
}
if maxTemp > 0 {
info.TemperatureCelsius = maxTemp
info.TemperatureSource = maxSource
return true
}
return false
}
func readHwmon(sysPrefix string, info *SystemInfo) bool {
pattern := filepath.Join(sysPrefix, "class", "hwmon", "hwmon*", "temp1_input")
matches, err := filepath.Glob(pattern)
if err != nil || len(matches) == 0 {
return false
}
var maxTemp float64
var maxSource string
for _, tempPath := range matches {
data, err := os.ReadFile(tempPath)
if err != nil {
continue
}
var milliDeg int64
if _, err := fmt.Sscanf(strings.TrimSpace(string(data)), "%d", &milliDeg); err != nil {
continue
}
temp := float64(milliDeg) / 1000.0
source := filepath.Base(filepath.Dir(tempPath))
if temp > maxTemp {
maxTemp = temp
maxSource = source
}
}
if maxTemp > 0 {
info.TemperatureCelsius = maxTemp
info.TemperatureSource = maxSource
return true
}
return false
}