v0.4.0: monitoring & backup — scheduler, CPU/temp metrics, healthchecks, restic backups
Phase 2 (Monitoring & Health): - Central job scheduler replacing ad-hoc goroutines (internal/scheduler) - CPU usage collector via /proc/stat background sampling (internal/system/cpu_linux.go) - Temperature reading from /sys/class/thermal + /host/sys (Docker mount) - Load average from /proc/loadavg - Healthchecks.io-compatible HTTP pinger (internal/monitor/pinger.go) - System health checks: disk, memory, CPU, temp, Docker, protected containers (internal/monitor/healthcheck.go) Phase 3 (Backups): - Database auto-discovery via docker ps + docker inspect (internal/backup/dbdump.go) - Database dumping via docker exec (pg_dump / mariadb-dump) with atomic writes - Restic backup integration with auto-password generation (internal/backup/restic.go) - Backup orchestrator: DB dumps + restic snapshots + weekly prune (internal/backup/backup.go) - Manual backup trigger via dashboard button and POST /api/backup/run Dashboard UI: - CPU usage bar with load average display - Temperature with colored indicator dot - Backup status card with last run time, DB count, repo stats - "Mentés most" button for manual backup trigger Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,251 @@
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// JobFunc is the function signature for scheduler jobs.
|
||||
type JobFunc func(ctx context.Context) error
|
||||
|
||||
// Job represents a scheduled task.
|
||||
type Job struct {
|
||||
Name string
|
||||
Fn JobFunc
|
||||
Interval time.Duration // for periodic jobs (every N)
|
||||
Schedule string // for daily jobs ("02:30", "03:00") — mutually exclusive with Interval
|
||||
LastRun time.Time
|
||||
LastErr error
|
||||
Running bool
|
||||
}
|
||||
|
||||
// Scheduler manages periodic and daily jobs.
|
||||
type Scheduler struct {
|
||||
mu sync.Mutex
|
||||
jobs []*Job
|
||||
logger *log.Logger
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
// New creates a new Scheduler.
|
||||
func New(logger *log.Logger) *Scheduler {
|
||||
return &Scheduler{
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// Every registers a periodic job that runs every interval.
|
||||
func (s *Scheduler) Every(name string, interval time.Duration, fn JobFunc) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
s.jobs = append(s.jobs, &Job{
|
||||
Name: name,
|
||||
Fn: fn,
|
||||
Interval: interval,
|
||||
})
|
||||
s.logger.Printf("[SCHED] Registered periodic job: %s (every %s)", name, interval)
|
||||
}
|
||||
|
||||
// Daily registers a job that runs once per day at the specified time (HH:MM) in Europe/Budapest timezone.
|
||||
func (s *Scheduler) Daily(name string, timeStr string, fn JobFunc) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
// Validate time format
|
||||
if _, _, err := parseDailyTime(timeStr); err != nil {
|
||||
s.logger.Printf("[ERROR] Daily job %s has invalid schedule %q: %v — job not started", name, timeStr, err)
|
||||
return
|
||||
}
|
||||
|
||||
s.jobs = append(s.jobs, &Job{
|
||||
Name: name,
|
||||
Fn: fn,
|
||||
Schedule: timeStr,
|
||||
})
|
||||
|
||||
nextRun := nextDailyRun(timeStr)
|
||||
s.logger.Printf("[SCHED] Daily job %s scheduled for %s", name, nextRun.Format("2006-01-02 15:04 MST"))
|
||||
}
|
||||
|
||||
// Start begins running all registered jobs.
|
||||
func (s *Scheduler) Start(ctx context.Context) {
|
||||
s.ctx, s.cancel = context.WithCancel(ctx)
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
for _, job := range s.jobs {
|
||||
if job.Interval > 0 {
|
||||
s.wg.Add(1)
|
||||
go s.runPeriodicJob(job)
|
||||
} else if job.Schedule != "" {
|
||||
s.wg.Add(1)
|
||||
go s.runDailyJob(job)
|
||||
}
|
||||
}
|
||||
|
||||
s.logger.Printf("[SCHED] Scheduler started with %d jobs", len(s.jobs))
|
||||
}
|
||||
|
||||
// Stop cancels all jobs and waits for them to finish (30s timeout).
|
||||
func (s *Scheduler) Stop() {
|
||||
if s.cancel != nil {
|
||||
s.cancel()
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
s.wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
s.logger.Println("[SCHED] All jobs stopped")
|
||||
case <-time.After(30 * time.Second):
|
||||
s.logger.Println("[WARN] Scheduler stop timed out after 30s — some jobs may still be running")
|
||||
}
|
||||
}
|
||||
|
||||
// GetJobs returns a snapshot of all jobs (copies, not pointers).
|
||||
func (s *Scheduler) GetJobs() []Job {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
result := make([]Job, len(s.jobs))
|
||||
for i, j := range s.jobs {
|
||||
result[i] = *j
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *Scheduler) runPeriodicJob(job *Job) {
|
||||
defer s.wg.Done()
|
||||
|
||||
// Quiet mode: jobs with interval <= 30s only log failures
|
||||
quiet := job.Interval <= 30*time.Second
|
||||
|
||||
ticker := time.NewTicker(job.Interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
s.executeJob(job, quiet)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) runDailyJob(job *Job) {
|
||||
defer s.wg.Done()
|
||||
|
||||
for {
|
||||
nextRun := nextDailyRun(job.Schedule)
|
||||
waitDuration := time.Until(nextRun)
|
||||
|
||||
if waitDuration < 0 {
|
||||
waitDuration = 0
|
||||
}
|
||||
|
||||
timer := time.NewTimer(waitDuration)
|
||||
select {
|
||||
case <-s.ctx.Done():
|
||||
timer.Stop()
|
||||
return
|
||||
case <-timer.C:
|
||||
s.executeJob(job, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) executeJob(job *Job, quiet bool) {
|
||||
s.mu.Lock()
|
||||
if job.Running {
|
||||
s.mu.Unlock()
|
||||
s.logger.Printf("[WARN] Job %s still running, skipping", job.Name)
|
||||
return
|
||||
}
|
||||
job.Running = true
|
||||
s.mu.Unlock()
|
||||
|
||||
defer func() {
|
||||
s.mu.Lock()
|
||||
job.Running = false
|
||||
s.mu.Unlock()
|
||||
}()
|
||||
|
||||
// Panic recovery
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
s.mu.Lock()
|
||||
job.LastErr = fmt.Errorf("panic: %v", r)
|
||||
s.mu.Unlock()
|
||||
s.logger.Printf("[ERROR] Job %s panicked: %v", job.Name, r)
|
||||
}
|
||||
}()
|
||||
|
||||
if !quiet {
|
||||
s.logger.Printf("[SCHED] Running job: %s", job.Name)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
err := job.Fn(s.ctx)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
s.mu.Lock()
|
||||
job.LastRun = time.Now()
|
||||
job.LastErr = err
|
||||
s.mu.Unlock()
|
||||
|
||||
if err != nil {
|
||||
s.logger.Printf("[WARN] Job %s failed: %v (took %s)", job.Name, err, elapsed.Round(time.Millisecond))
|
||||
} else if !quiet {
|
||||
s.logger.Printf("[SCHED] Job %s completed (took %s)", job.Name, elapsed.Round(time.Millisecond))
|
||||
}
|
||||
}
|
||||
|
||||
// parseDailyTime parses "HH:MM" and returns hour and minute.
|
||||
func parseDailyTime(timeStr string) (int, int, error) {
|
||||
var hour, min int
|
||||
n, err := fmt.Sscanf(timeStr, "%d:%d", &hour, &min)
|
||||
if err != nil || n != 2 {
|
||||
return 0, 0, fmt.Errorf("expected HH:MM format, got %q", timeStr)
|
||||
}
|
||||
if hour < 0 || hour > 23 || min < 0 || min > 59 {
|
||||
return 0, 0, fmt.Errorf("invalid time %q: hour must be 0-23, minute 0-59", timeStr)
|
||||
}
|
||||
return hour, min, nil
|
||||
}
|
||||
|
||||
// nextDailyRun calculates the next occurrence of the daily schedule in Europe/Budapest timezone.
|
||||
func nextDailyRun(timeStr string) time.Time {
|
||||
hour, min, err := parseDailyTime(timeStr)
|
||||
if err != nil {
|
||||
// Should not happen — validated at registration
|
||||
return time.Now().Add(24 * time.Hour)
|
||||
}
|
||||
|
||||
loc, err := time.LoadLocation("Europe/Budapest")
|
||||
if err != nil {
|
||||
// Fallback to UTC if timezone not available
|
||||
loc = time.UTC
|
||||
}
|
||||
|
||||
now := time.Now().In(loc)
|
||||
next := time.Date(now.Year(), now.Month(), now.Day(), hour, min, 0, 0, loc)
|
||||
|
||||
// If the time has already passed today, schedule for tomorrow
|
||||
if !next.After(now) {
|
||||
next = next.Add(24 * time.Hour)
|
||||
}
|
||||
|
||||
return next
|
||||
}
|
||||
Reference in New Issue
Block a user