ab77fa3544
internal/hub: the agent's first daemon — a periodic read-only host-report POSTed to the hub (the heartbeat; no separate ping). - HostReport wire contract (shared field-for-field with the hub ingest): host metrics, guests (vmid + spec), cloudflared status; storage/backups/restore-tests/ pbs/audit collections DEFINED but emitted empty (slices 5/6 fill). - Collector over a read-only proxmoxReader (adapted to the real proxmox surface; no proxmox changes) + a CloudflaredProber. Partial-failure: NodeStatus fail = hard (skip POST); per-guest GuestConfig fail = status "unknown", still report. - Client: Bearer-auth POST, standard TLS (system roots / optional ca_file), typed TransportError/HTTPError, token never in errors. - Loop: immediate first report, adopt hub poll_interval (clamp [60,3600]), resilient to collect/report errors, clean ctx-cancel shutdown. - ControlEnvelope: only poll_interval_seconds acted on; blocked/desired_generation/ has_signed_ops parsed-but-ignored (slice 4). - config: HubConfig + FELHOM_AGENT_HUB_* overlay + mode-aware HubConfig.Validate + WithDefaults + hub-key redaction; example config updated. - main: no-selftest mode is now the daemon; added --selftest=hub. Version -> 0.3.0. Tests: report serialization, client (incl. token-redaction), collector partial- failure, loop continuation+interval adoption, config. internal/proxmox + internal/ authz untouched. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
119 lines
3.5 KiB
Go
119 lines
3.5 KiB
Go
package hub
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/tls"
|
|
"crypto/x509"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"gitea.dooplex.hu/admin/felhom-agent/internal/config"
|
|
)
|
|
|
|
const reportPath = "/api/v1/host-report"
|
|
|
|
// Client posts host-reports to the hub. Auth is a per-host Bearer key. Transport is
|
|
// standard TLS (system roots, or a CAFile pool); verification is always on — the hub
|
|
// has a real cert (unlike the Proxmox self-signed path), so there is no insecure mode.
|
|
type Client struct {
|
|
baseURL string
|
|
apiKey string
|
|
hc *http.Client
|
|
logger *slog.Logger
|
|
}
|
|
|
|
// NewClient builds a hub client from config (defaults applied). It never logs the key.
|
|
func NewClient(cfg config.HubConfig, logger *slog.Logger) (*Client, error) {
|
|
cfg = cfg.WithDefaults()
|
|
if logger == nil {
|
|
logger = slog.Default()
|
|
}
|
|
tlsCfg := &tls.Config{} // system roots
|
|
if cfg.CAFile != "" {
|
|
pem, err := os.ReadFile(cfg.CAFile)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("hub: reading ca_file: %w", err)
|
|
}
|
|
pool := x509.NewCertPool()
|
|
if !pool.AppendCertsFromPEM(pem) {
|
|
return nil, fmt.Errorf("hub: ca_file %q contained no usable certificates", cfg.CAFile)
|
|
}
|
|
tlsCfg.RootCAs = pool
|
|
}
|
|
hc := &http.Client{
|
|
Timeout: time.Duration(cfg.TimeoutSeconds) * time.Second,
|
|
Transport: &http.Transport{TLSClientConfig: tlsCfg},
|
|
}
|
|
return newClient(cfg.URL, cfg.APIKey, hc, logger), nil
|
|
}
|
|
|
|
// newClient is the shared constructor (tests inject a mock-transport *http.Client).
|
|
func newClient(baseURL, apiKey string, hc *http.Client, logger *slog.Logger) *Client {
|
|
return &Client{baseURL: strings.TrimRight(baseURL, "/"), apiKey: apiKey, hc: hc, logger: logger}
|
|
}
|
|
|
|
// TransportError is a network/connection failure (no HTTP response). It never
|
|
// contains the bearer token.
|
|
type TransportError struct{ Err error }
|
|
|
|
func (e *TransportError) Error() string { return "hub: transport error: " + e.Err.Error() }
|
|
func (e *TransportError) Unwrap() error { return e.Err }
|
|
|
|
// HTTPError is a non-2xx response. BodyTail is a short, token-free excerpt.
|
|
type HTTPError struct {
|
|
StatusCode int
|
|
BodyTail string
|
|
}
|
|
|
|
func (e *HTTPError) Error() string {
|
|
return fmt.Sprintf("hub: HTTP %d: %s", e.StatusCode, e.BodyTail)
|
|
}
|
|
|
|
// Report POSTs the host-report and returns the parsed control envelope. The report
|
|
// IS the heartbeat (locked decision 1). Errors are typed (transport vs HTTP) and
|
|
// never include the bearer token.
|
|
func (c *Client) Report(ctx context.Context, r *HostReport) (*ControlEnvelope, error) {
|
|
body, err := json.Marshal(r)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("hub: marshaling report: %w", err)
|
|
}
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+reportPath, bytes.NewReader(body))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("hub: building request: %w", err)
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+c.apiKey)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Accept", "application/json")
|
|
|
|
resp, err := c.hc.Do(req)
|
|
if err != nil {
|
|
return nil, &TransportError{Err: err} // token is in the request header, never the error
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
raw, _ := io.ReadAll(io.LimitReader(resp.Body, 64<<10))
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
return nil, &HTTPError{StatusCode: resp.StatusCode, BodyTail: tail(raw, 256)}
|
|
}
|
|
var env ControlEnvelope
|
|
if err := json.Unmarshal(raw, &env); err != nil {
|
|
return nil, fmt.Errorf("hub: decoding control envelope: %w", err)
|
|
}
|
|
return &env, nil
|
|
}
|
|
|
|
func tail(b []byte, max int) string {
|
|
s := strings.TrimSpace(string(b))
|
|
if len(s) > max {
|
|
return s[:max] + "…"
|
|
}
|
|
return s
|
|
}
|