package selftest import ( "context" "fmt" "log" "net/http" "os" "os/exec" "path/filepath" "time" "gitea.dooplex.hu/admin/felhom-controller/internal/backup" "gitea.dooplex.hu/admin/felhom-controller/internal/config" "gitea.dooplex.hu/admin/felhom-controller/internal/settings" "gitea.dooplex.hu/admin/felhom-controller/internal/system" ) // CheckResult represents the outcome of a single self-test check. type CheckResult struct { Name string `json:"name"` Status string `json:"status"` // "pass", "warn", "fail" Message string `json:"message"` } // Summary contains all self-test results. type Summary struct { Results []CheckResult `json:"results"` Pass int `json:"pass"` Warn int `json:"warn"` Fail int `json:"fail"` } // Run executes all startup self-test checks and returns a summary. // It never blocks startup — each check has an individual timeout. func Run(cfg *config.Config, sett *settings.Settings, logger *log.Logger) *Summary { s := &Summary{} checks := []func() CheckResult{ func() CheckResult { return checkDockerSocket() }, func() CheckResult { return checkStacksDir(cfg.Paths.StacksDir) }, func() CheckResult { return checkDataDir(cfg.Paths.DataDir) }, func() CheckResult { return checkSystemDataPath(cfg.Paths.SystemDataPath) }, func() CheckResult { return checkStoragePaths(sett) }, func() CheckResult { return checkGitCatalog(cfg.Paths.StacksDir) }, func() CheckResult { return checkHubConnectivity(cfg) }, func() CheckResult { return checkResticRepos(sett) }, func() CheckResult { return checkMetricsDB(cfg.Paths.DataDir) }, } logger.Println("[INFO] ========== Startup Self-Test ==========") for _, check := range checks { result := runSafe(check) s.Results = append(s.Results, result) switch result.Status { case "pass": s.Pass++ case "warn": s.Warn++ case "fail": s.Fail++ } tag := "[PASS]" if result.Status == "warn" { tag = "[WARN]" } else if result.Status == "fail" { tag = "[FAIL]" } logger.Printf("[INFO] %s %s: %s", tag, result.Name, result.Message) } logger.Println("[INFO] ========================================") logger.Printf("[INFO] Self-test complete: %d passed, %d warnings, %d failed", s.Pass, s.Warn, s.Fail) return s } // runSafe executes a check function, recovering from panics. func runSafe(fn func() CheckResult) (result CheckResult) { defer func() { if r := recover(); r != nil { result = CheckResult{ Name: "unknown", Status: "fail", Message: fmt.Sprintf("panic: %v", r), } } }() return fn() } func checkDockerSocket() CheckResult { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() out, err := exec.CommandContext(ctx, "docker", "info", "--format", "{{.ServerVersion}}").Output() if err != nil { return CheckResult{Name: "Docker socket", Status: "fail", Message: fmt.Sprintf("docker info failed: %v", err)} } return CheckResult{Name: "Docker socket", Status: "pass", Message: fmt.Sprintf("reachable (v%s)", string(out[:len(out)-1]))} } func checkStacksDir(stacksDir string) CheckResult { info, err := os.Stat(stacksDir) if err != nil { return CheckResult{Name: "Stacks directory", Status: "fail", Message: fmt.Sprintf("not accessible: %v", err)} } if !info.IsDir() { return CheckResult{Name: "Stacks directory", Status: "fail", Message: "path exists but is not a directory"} } return CheckResult{Name: "Stacks directory", Status: "pass", Message: stacksDir} } func checkDataDir(dataDir string) CheckResult { info, err := os.Stat(dataDir) if err != nil { return CheckResult{Name: "Data directory", Status: "fail", Message: fmt.Sprintf("not accessible: %v", err)} } if !info.IsDir() { return CheckResult{Name: "Data directory", Status: "fail", Message: "path exists but is not a directory"} } // Write test: create and remove a temp file tmpFile := filepath.Join(dataDir, ".selftest-"+fmt.Sprintf("%d", time.Now().UnixNano())) if err := os.WriteFile(tmpFile, []byte("test"), 0644); err != nil { return CheckResult{Name: "Data directory", Status: "warn", Message: fmt.Sprintf("not writable: %v", err)} } os.Remove(tmpFile) return CheckResult{Name: "Data directory", Status: "pass", Message: fmt.Sprintf("%s (writable)", dataDir)} } func checkSystemDataPath(systemDataPath string) CheckResult { if systemDataPath == "" { return CheckResult{Name: "System data path", Status: "warn", Message: "not configured"} } info, err := os.Stat(systemDataPath) if err != nil { return CheckResult{Name: "System data path", Status: "warn", Message: fmt.Sprintf("not accessible: %v", err)} } if !info.IsDir() { return CheckResult{Name: "System data path", Status: "fail", Message: "path exists but is not a directory"} } if !system.IsMountPoint(systemDataPath) { return CheckResult{Name: "System data path", Status: "warn", Message: fmt.Sprintf("%s exists but is not a mount point", systemDataPath)} } return CheckResult{Name: "System data path", Status: "pass", Message: systemDataPath} } func checkStoragePaths(sett *settings.Settings) CheckResult { paths := sett.GetStoragePaths() if len(paths) == 0 { return CheckResult{Name: "Storage paths", Status: "warn", Message: "no storage paths registered"} } connected := 0 disconnected := 0 for _, sp := range paths { if sp.Decommissioned { continue } if sp.Disconnected { disconnected++ continue } if _, err := os.Stat(sp.Path); err == nil && system.IsMountPoint(sp.Path) { connected++ } else { disconnected++ } } msg := fmt.Sprintf("%d connected, %d disconnected", connected, disconnected) if connected == 0 && disconnected > 0 { return CheckResult{Name: "Storage paths", Status: "warn", Message: msg} } return CheckResult{Name: "Storage paths", Status: "pass", Message: msg} } func checkGitCatalog(stacksDir string) CheckResult { matches, err := filepath.Glob(filepath.Join(stacksDir, "*", ".felhom.yml")) if err != nil { return CheckResult{Name: "Git catalog", Status: "warn", Message: fmt.Sprintf("glob error: %v", err)} } if len(matches) == 0 { return CheckResult{Name: "Git catalog", Status: "warn", Message: "no .felhom.yml files found in stacks dir"} } return CheckResult{Name: "Git catalog", Status: "pass", Message: fmt.Sprintf("%d app definitions found", len(matches))} } func checkHubConnectivity(cfg *config.Config) CheckResult { if !cfg.Hub.Enabled || cfg.Hub.URL == "" { return CheckResult{Name: "Hub connectivity", Status: "pass", Message: "hub disabled, skipped"} } ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() url := cfg.Hub.URL + "/healthz" req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("request error: %v", err)} } resp, err := http.DefaultClient.Do(req) if err != nil { return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("unreachable: %v", err)} } resp.Body.Close() if resp.StatusCode >= 200 && resp.StatusCode < 300 { return CheckResult{Name: "Hub connectivity", Status: "pass", Message: fmt.Sprintf("%s reachable (HTTP %d)", cfg.Hub.URL, resp.StatusCode)} } return CheckResult{Name: "Hub connectivity", Status: "warn", Message: fmt.Sprintf("HTTP %d from %s", resp.StatusCode, url)} } func checkResticRepos(sett *settings.Settings) CheckResult { paths := sett.GetStoragePaths() if len(paths) == 0 { return CheckResult{Name: "Restic repos", Status: "pass", Message: "no storage paths, skipped"} } found := 0 missing := 0 for _, sp := range paths { if sp.Disconnected || sp.Decommissioned { continue } repoPath := backup.PrimaryResticRepoPath(sp.Path) if _, err := os.Stat(repoPath); err == nil { found++ } else { missing++ } } if found == 0 && missing > 0 { return CheckResult{Name: "Restic repos", Status: "warn", Message: fmt.Sprintf("0 repos found, %d expected", missing)} } msg := fmt.Sprintf("%d repos found", found) if missing > 0 { msg += fmt.Sprintf(", %d missing", missing) } return CheckResult{Name: "Restic repos", Status: "pass", Message: msg} } func checkMetricsDB(dataDir string) CheckResult { dbPath := filepath.Join(dataDir, "metrics.db") info, err := os.Stat(dbPath) if err != nil { if os.IsNotExist(err) { return CheckResult{Name: "Metrics DB", Status: "warn", Message: "metrics.db not found (will be created)"} } return CheckResult{Name: "Metrics DB", Status: "warn", Message: fmt.Sprintf("not accessible: %v", err)} } return CheckResult{Name: "Metrics DB", Status: "pass", Message: fmt.Sprintf("%.1f MB", float64(info.Size())/1024/1024)} }