v0.4.0: monitoring & backup — scheduler, CPU/temp metrics, healthchecks, restic backups
Phase 2 (Monitoring & Health): - Central job scheduler replacing ad-hoc goroutines (internal/scheduler) - CPU usage collector via /proc/stat background sampling (internal/system/cpu_linux.go) - Temperature reading from /sys/class/thermal + /host/sys (Docker mount) - Load average from /proc/loadavg - Healthchecks.io-compatible HTTP pinger (internal/monitor/pinger.go) - System health checks: disk, memory, CPU, temp, Docker, protected containers (internal/monitor/healthcheck.go) Phase 3 (Backups): - Database auto-discovery via docker ps + docker inspect (internal/backup/dbdump.go) - Database dumping via docker exec (pg_dump / mariadb-dump) with atomic writes - Restic backup integration with auto-password generation (internal/backup/restic.go) - Backup orchestrator: DB dumps + restic snapshots + weekly prune (internal/backup/backup.go) - Manual backup trigger via dashboard button and POST /api/backup/run Dashboard UI: - CPU usage bar with load average display - Temperature with colored indicator dot - Backup status card with last run time, DB count, repo stats - "Mentés most" button for manual backup trigger Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,134 @@
|
||||
//go:build linux
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CPUCollector samples CPU usage in the background by reading /proc/stat.
|
||||
type CPUCollector struct {
|
||||
mu sync.RWMutex
|
||||
cpuPercent float64
|
||||
sampleRate time.Duration
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// NewCPUCollector creates a new CPU collector with the given sample rate.
|
||||
func NewCPUCollector(sampleRate time.Duration) *CPUCollector {
|
||||
return &CPUCollector{
|
||||
sampleRate: sampleRate,
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins background CPU sampling.
|
||||
func (c *CPUCollector) Start(ctx context.Context) {
|
||||
ctx, c.cancel = context.WithCancel(ctx)
|
||||
go c.loop(ctx)
|
||||
}
|
||||
|
||||
// Stop stops the background CPU sampling.
|
||||
func (c *CPUCollector) Stop() {
|
||||
if c.cancel != nil {
|
||||
c.cancel()
|
||||
}
|
||||
}
|
||||
|
||||
// CPUPercent returns the latest CPU usage percentage (0-100).
|
||||
func (c *CPUCollector) CPUPercent() float64 {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
return c.cpuPercent
|
||||
}
|
||||
|
||||
func (c *CPUCollector) loop(ctx context.Context) {
|
||||
for {
|
||||
// Read first sample
|
||||
idle1, total1, err := readCPUStat()
|
||||
if err != nil {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(c.sampleRate):
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for sample interval
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(c.sampleRate):
|
||||
}
|
||||
|
||||
// Read second sample
|
||||
idle2, total2, err := readCPUStat()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
totalDelta := total2 - total1
|
||||
idleDelta := idle2 - idle1
|
||||
|
||||
if totalDelta > 0 {
|
||||
busyDelta := totalDelta - idleDelta
|
||||
percent := float64(busyDelta) / float64(totalDelta) * 100
|
||||
c.mu.Lock()
|
||||
c.cpuPercent = percent
|
||||
c.mu.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readCPUStat reads /proc/stat and returns idle and total CPU jiffies.
|
||||
// First line format: cpu <user> <nice> <system> <idle> <iowait> <irq> <softirq> <steal>
|
||||
func readCPUStat() (idle, total uint64, err error) {
|
||||
f, err := os.Open("/proc/stat")
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
if !scanner.Scan() {
|
||||
return 0, 0, fmt.Errorf("empty /proc/stat")
|
||||
}
|
||||
|
||||
line := scanner.Text()
|
||||
if !strings.HasPrefix(line, "cpu ") {
|
||||
return 0, 0, fmt.Errorf("unexpected /proc/stat first line: %s", line)
|
||||
}
|
||||
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 9 {
|
||||
return 0, 0, fmt.Errorf("/proc/stat has too few fields: %d", len(fields))
|
||||
}
|
||||
|
||||
// Fields: cpu user(1) nice(2) system(3) idle(4) iowait(5) irq(6) softirq(7) steal(8)
|
||||
var values [8]uint64
|
||||
for i := 0; i < 8; i++ {
|
||||
var v uint64
|
||||
for _, c := range fields[i+1] {
|
||||
if c >= '0' && c <= '9' {
|
||||
v = v*10 + uint64(c-'0')
|
||||
}
|
||||
}
|
||||
values[i] = v
|
||||
}
|
||||
|
||||
// idle_total = idle + iowait
|
||||
idleTotal := values[3] + values[4]
|
||||
// total = sum of all
|
||||
var totalVal uint64
|
||||
for _, v := range values {
|
||||
totalVal += v
|
||||
}
|
||||
|
||||
return idleTotal, totalVal, nil
|
||||
}
|
||||
Reference in New Issue
Block a user