8b8c04a487
Concurrency fixes: - Deep-copy stacks in GetStack/GetStacks to prevent shared state mutation (C04) - Add per-state mutex to watchdog pathProbeState (C05) - Guard MetricsCollector.Start() with sync.Once against double-start (C06) - Hold diskJobMu across entire raw mount operation (C07) - Add mutex to SetEncryptionKey (C08), MigrateEncryption write lock (H03) - Use sync.Once for sync.Stop() channel close (H08) - Set syncing=true before releasing lock in TriggerSync (H09) - Deep-copy lastDBDump/lastBackup in GetFullStatus (H11) - Add WaitGroup for stderr goroutine in MigrateDrive (H19) - Add mutex to SetBackupRunningCheck (M18) Security fixes: - Validate Bearer token against Hub API key in CSRF middleware (H16) - Validate backup paths start with expected prefix in RemoveStack (M12) - Guard uuid[:8] slice with length check (H20) - Parse fstab fields exactly for mount target matching (H21) Bug fixes: - Use decrypted env vars for compose deploy (C01) - Log decrypt failures in DecryptMap instead of swallowing (C02) - Move Deployed=false inside lock in runComposeDeploy (C03) - Fix activeDrives() to skip disconnected drives (H02) - Fix Snapshot() stderr extraction from exec.ExitError (H01) - Check unlockCmd.Run() error in restic (H01) - Buffer template rendering via bytes.Buffer (H07) - Thread context.Context through cloudflare client (H10) - Fix leaf-name collision detection in cross-drive backup (H15) - Add nil check for crossDriveRunner (H17) - Use strings.TrimSpace instead of slice on command output (H18) - Make SaveAppConfig atomic with write-to-tmp+rename (H04) - Pass encKey on deploy failure SaveAppConfig (H05) - Fix IPv6 address format in TCP health probe Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
258 lines
8.6 KiB
Go
258 lines
8.6 KiB
Go
package selftest
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/backup"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/config"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/settings"
|
|
"gitea.dooplex.hu/admin/felhom-controller/internal/system"
|
|
)
|
|
|
|
// CheckResult represents the outcome of a single self-test check.
|
|
type CheckResult struct {
|
|
Name string `json:"name"`
|
|
Status string `json:"status"` // "pass", "warn", "fail"
|
|
Message string `json:"message"`
|
|
}
|
|
|
|
// Summary contains all self-test results.
|
|
type Summary struct {
|
|
Results []CheckResult `json:"results"`
|
|
Pass int `json:"pass"`
|
|
Warn int `json:"warn"`
|
|
Fail int `json:"fail"`
|
|
}
|
|
|
|
// Run executes all startup self-test checks and returns a summary.
|
|
// It never blocks startup — each check has an individual timeout.
|
|
func Run(cfg *config.Config, sett *settings.Settings, logger *log.Logger) *Summary {
|
|
s := &Summary{}
|
|
|
|
checks := []func() CheckResult{
|
|
func() CheckResult { return checkDockerSocket() },
|
|
func() CheckResult { return checkStacksDir(cfg.Paths.StacksDir) },
|
|
func() CheckResult { return checkDataDir(cfg.Paths.DataDir) },
|
|
func() CheckResult { return checkSystemDataPath(cfg.Paths.SystemDataPath) },
|
|
func() CheckResult { return checkStoragePaths(sett) },
|
|
func() CheckResult { return checkGitCatalog(cfg.Paths.StacksDir) },
|
|
func() CheckResult { return checkHubConnectivity(cfg) },
|
|
func() CheckResult { return checkResticRepos(sett) },
|
|
func() CheckResult { return checkMetricsDB(cfg.Paths.DataDir) },
|
|
}
|
|
|
|
logger.Println("[INFO] ========== Startup Self-Test ==========")
|
|
for _, check := range checks {
|
|
result := runSafe(check)
|
|
s.Results = append(s.Results, result)
|
|
switch result.Status {
|
|
case "pass":
|
|
s.Pass++
|
|
case "warn":
|
|
s.Warn++
|
|
case "fail":
|
|
s.Fail++
|
|
}
|
|
tag := "[PASS]"
|
|
if result.Status == "warn" {
|
|
tag = "[WARN]"
|
|
} else if result.Status == "fail" {
|
|
tag = "[FAIL]"
|
|
}
|
|
logger.Printf("[INFO] %s %s: %s", tag, result.Name, result.Message)
|
|
}
|
|
logger.Println("[INFO] ========================================")
|
|
logger.Printf("[INFO] Self-test complete: %d passed, %d warnings, %d failed", s.Pass, s.Warn, s.Fail)
|
|
|
|
return s
|
|
}
|
|
|
|
// runSafe executes a check function, recovering from panics.
|
|
func runSafe(fn func() CheckResult) (result CheckResult) {
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
result = CheckResult{
|
|
Name: "unknown",
|
|
Status: "fail",
|
|
Message: fmt.Sprintf("panic: %v", r),
|
|
}
|
|
}
|
|
}()
|
|
return fn()
|
|
}
|
|
|
|
func checkDockerSocket() CheckResult {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
out, err := exec.CommandContext(ctx, "docker", "info", "--format", "{{.ServerVersion}}").Output()
|
|
if err != nil {
|
|
return CheckResult{Name: "Docker socket", Status: "fail", Message: fmt.Sprintf("docker info failed: %v", err)}
|
|
}
|
|
return CheckResult{Name: "Docker socket", Status: "pass", Message: fmt.Sprintf("reachable (v%s)", strings.TrimSpace(string(out)))}
|
|
}
|
|
|
|
func checkStacksDir(stacksDir string) CheckResult {
|
|
info, err := os.Stat(stacksDir)
|
|
if err != nil {
|
|
return CheckResult{Name: "Stacks directory", Status: "fail", Message: fmt.Sprintf("not accessible: %v", err)}
|
|
}
|
|
if !info.IsDir() {
|
|
return CheckResult{Name: "Stacks directory", Status: "fail", Message: "path exists but is not a directory"}
|
|
}
|
|
return CheckResult{Name: "Stacks directory", Status: "pass", Message: stacksDir}
|
|
}
|
|
|
|
func checkDataDir(dataDir string) CheckResult {
|
|
info, err := os.Stat(dataDir)
|
|
if err != nil {
|
|
return CheckResult{Name: "Data directory", Status: "fail", Message: fmt.Sprintf("not accessible: %v", err)}
|
|
}
|
|
if !info.IsDir() {
|
|
return CheckResult{Name: "Data directory", Status: "fail", Message: "path exists but is not a directory"}
|
|
}
|
|
|
|
// Write test: create and remove a temp file
|
|
tmpFile := filepath.Join(dataDir, ".selftest-"+fmt.Sprintf("%d", time.Now().UnixNano()))
|
|
if err := os.WriteFile(tmpFile, []byte("test"), 0644); err != nil {
|
|
return CheckResult{Name: "Data directory", Status: "warn", Message: fmt.Sprintf("not writable: %v", err)}
|
|
}
|
|
os.Remove(tmpFile)
|
|
|
|
return CheckResult{Name: "Data directory", Status: "pass", Message: fmt.Sprintf("%s (writable)", dataDir)}
|
|
}
|
|
|
|
func checkSystemDataPath(systemDataPath string) CheckResult {
|
|
if systemDataPath == "" {
|
|
return CheckResult{Name: "System data path", Status: "warn", Message: "not configured"}
|
|
}
|
|
info, err := os.Stat(systemDataPath)
|
|
if err != nil {
|
|
return CheckResult{Name: "System data path", Status: "warn", Message: fmt.Sprintf("not accessible: %v", err)}
|
|
}
|
|
if !info.IsDir() {
|
|
return CheckResult{Name: "System data path", Status: "fail", Message: "path exists but is not a directory"}
|
|
}
|
|
if !system.IsMountPoint(systemDataPath) {
|
|
return CheckResult{Name: "System data path", Status: "warn", Message: fmt.Sprintf("%s exists but is not a mount point", systemDataPath)}
|
|
}
|
|
return CheckResult{Name: "System data path", Status: "pass", Message: systemDataPath}
|
|
}
|
|
|
|
func checkStoragePaths(sett *settings.Settings) CheckResult {
|
|
paths := sett.GetStoragePaths()
|
|
if len(paths) == 0 {
|
|
return CheckResult{Name: "Storage paths", Status: "warn", Message: "no storage paths registered"}
|
|
}
|
|
|
|
connected := 0
|
|
disconnected := 0
|
|
for _, sp := range paths {
|
|
if sp.Decommissioned {
|
|
continue
|
|
}
|
|
if sp.Disconnected {
|
|
disconnected++
|
|
continue
|
|
}
|
|
if _, err := os.Stat(sp.Path); err == nil && system.IsMountPoint(sp.Path) {
|
|
connected++
|
|
} else {
|
|
disconnected++
|
|
}
|
|
}
|
|
|
|
msg := fmt.Sprintf("%d connected, %d disconnected", connected, disconnected)
|
|
if connected == 0 && disconnected > 0 {
|
|
return CheckResult{Name: "Storage paths", Status: "warn", Message: msg}
|
|
}
|
|
return CheckResult{Name: "Storage paths", Status: "pass", Message: msg}
|
|
}
|
|
|
|
func checkGitCatalog(stacksDir string) CheckResult {
|
|
matches, err := filepath.Glob(filepath.Join(stacksDir, "*", ".felhom.yml"))
|
|
if err != nil {
|
|
return CheckResult{Name: "Git catalog", Status: "warn", Message: fmt.Sprintf("glob error: %v", err)}
|
|
}
|
|
if len(matches) == 0 {
|
|
return CheckResult{Name: "Git catalog", Status: "warn", Message: "no .felhom.yml files found in stacks dir"}
|
|
}
|
|
return CheckResult{Name: "Git catalog", Status: "pass", Message: fmt.Sprintf("%d app definitions found", len(matches))}
|
|
}
|
|
|
|
func checkHubConnectivity(cfg *config.Config) CheckResult {
|
|
if !cfg.Hub.Enabled || cfg.Hub.URL == "" {
|
|
return CheckResult{Name: "Hub connectivity", Status: "pass", Message: "hub disabled, skipped"}
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
|
|
url := cfg.Hub.URL + "/healthz"
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
|
if err != nil {
|
|
return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("request error: %v", err)}
|
|
}
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("unreachable: %v", err)}
|
|
}
|
|
resp.Body.Close()
|
|
|
|
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
|
|
return CheckResult{Name: "Hub connectivity", Status: "pass", Message: fmt.Sprintf("%s reachable (HTTP %d)", cfg.Hub.URL, resp.StatusCode)}
|
|
}
|
|
return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("HTTP %d from %s", resp.StatusCode, url)}
|
|
}
|
|
|
|
func checkResticRepos(sett *settings.Settings) CheckResult {
|
|
paths := sett.GetStoragePaths()
|
|
if len(paths) == 0 {
|
|
return CheckResult{Name: "Restic repos", Status: "pass", Message: "no storage paths, skipped"}
|
|
}
|
|
|
|
found := 0
|
|
missing := 0
|
|
for _, sp := range paths {
|
|
if sp.Disconnected || sp.Decommissioned {
|
|
continue
|
|
}
|
|
repoPath := backup.PrimaryResticRepoPath(sp.Path)
|
|
if _, err := os.Stat(repoPath); err == nil {
|
|
found++
|
|
} else {
|
|
missing++
|
|
}
|
|
}
|
|
|
|
if found == 0 && missing > 0 {
|
|
return CheckResult{Name: "Restic repos", Status: "warn", Message: fmt.Sprintf("0 repos found, %d expected", missing)}
|
|
}
|
|
msg := fmt.Sprintf("%d repos found", found)
|
|
if missing > 0 {
|
|
msg += fmt.Sprintf(", %d missing", missing)
|
|
}
|
|
return CheckResult{Name: "Restic repos", Status: "pass", Message: msg}
|
|
}
|
|
|
|
func checkMetricsDB(dataDir string) CheckResult {
|
|
dbPath := filepath.Join(dataDir, "metrics.db")
|
|
info, err := os.Stat(dbPath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return CheckResult{Name: "Metrics DB", Status: "warn", Message: "metrics.db not found (will be created)"}
|
|
}
|
|
return CheckResult{Name: "Metrics DB", Status: "warn", Message: fmt.Sprintf("not accessible: %v", err)}
|
|
}
|
|
return CheckResult{Name: "Metrics DB", Status: "pass", Message: fmt.Sprintf("%.1f MB", float64(info.Size())/1024/1024)}
|
|
}
|