Files
felhom-controller/controller/internal/agentapi/client.go
T
admin d8d1e17758 slice 9: host-health view on the monitoring page (v0.39.0)
Add agentapi HostMetrics() + a thin /api/host-metrics proxy to the agent's
new GET /host/metrics, and a 'Szerver allapota (gazdagep)' card on the
monitoring page rendering host CPU%/load/mem/CPU-temp(n/a)/uptime + per-
storage capacity bars (thin-pool fill, disk temp/wear). Polls every 8s.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-10 16:16:15 +02:00

411 lines
14 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Package agentapi is the in-guest controller's client for the host agent's per-guest local
// API (doc 03 §6, slice 8A). It reaches the agent over the bridge, pinning the agent's
// self-signed leaf by SHA-256 (the same pin convention the agent uses for the Proxmox/PBS host
// certs), and authenticates with the per-guest bearer token. In 8A it exercises GET /storage
// (connectivity + the controller learning its mounts); the full surface (the /backup/due
// quiesce loop) lands in 8B.
package agentapi
import (
"bytes"
"context"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// Client talks to one agent local-API endpoint with a pinned leaf + bearer token.
type Client struct {
baseURL string
token string
hc *http.Client
}
// MountInfo mirrors the agent's GET /storage mount entry (doc 03 §6).
type MountInfo struct {
Key string `json:"key"`
Storage string `json:"storage"`
MountPoint string `json:"mount_point"`
Class string `json:"class"` // fast | slow | ""
Backup bool `json:"backup"`
}
// StorageResponse mirrors the agent's GET /storage data payload.
type StorageResponse struct {
VMID int `json:"vmid"`
Mounts []MountInfo `json:"mounts"`
}
// apiResponse is the agent's {ok,data,error} envelope.
type apiResponse struct {
OK bool `json:"ok"`
Data json.RawMessage `json:"data"`
Error string `json:"error"`
}
// New builds a pinned client for endpoint ("host:port") with the given per-guest token and the
// agent leaf-cert SHA-256 fingerprint (hex, ':'-separators tolerated). The pin is the trust
// anchor — the agent serves a self-signed cert, so chain verification is replaced by an exact
// leaf-DER SHA-256 match (fails closed on any mismatch).
func New(endpoint, token, fingerprintHex string) (*Client, error) {
endpoint = strings.TrimSpace(endpoint)
if endpoint == "" {
return nil, fmt.Errorf("agentapi: endpoint required")
}
if token == "" {
return nil, fmt.Errorf("agentapi: token required")
}
want, err := normalizeFingerprint(fingerprintHex)
if err != nil {
return nil, err
}
tlsCfg := &tls.Config{
InsecureSkipVerify: true, // self-signed leaf — the pin below is the real check
VerifyPeerCertificate: func(rawCerts [][]byte, _ [][]*x509.Certificate) error {
if len(rawCerts) == 0 {
return fmt.Errorf("agentapi: TLS pin: peer presented no certificate")
}
got := sha256.Sum256(rawCerts[0]) // leaf DER
if hex.EncodeToString(got[:]) != want {
return fmt.Errorf("agentapi: TLS pin mismatch: agent leaf SHA-256 does not match the bootstrap fingerprint")
}
return nil
},
MinVersion: tls.VersionTLS12,
}
return &Client{
baseURL: "https://" + endpoint,
token: token,
hc: &http.Client{
Timeout: 15 * time.Second,
Transport: &http.Transport{TLSClientConfig: tlsCfg},
},
}, nil
}
// Storage calls GET /storage and returns this guest's mounts (connectivity + placement view).
func (c *Client) Storage(ctx context.Context) (StorageResponse, error) {
var out StorageResponse
body, err := c.get(ctx, "/storage")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /storage: %w", err)
}
return out, nil
}
// ---- slice 8B: app-consistent backup (quiesce loop) -------------------------------------
// DueResponse mirrors the agent's GET /backup/due payload.
type DueResponse struct {
VMID int `json:"vmid"`
Due bool `json:"due"`
Reason string `json:"reason"`
}
// BackupResponse mirrors the agent's POST /backup payload.
type BackupResponse struct {
VMID int `json:"vmid"`
JobID string `json:"job_id"`
Phase string `json:"phase"`
}
// StatusResponse mirrors the agent's GET /backup/status payload.
type StatusResponse struct {
VMID int `json:"vmid"`
Phase string `json:"phase"` // idle | running | done | failed
JobID string `json:"job_id"`
Error string `json:"error"`
}
// Backup status phases (mirror the agent's vocabulary).
const (
PhaseIdle = "idle"
PhaseRunning = "running"
PhaseDone = "done"
PhaseFailed = "failed"
)
// BackupDue reports whether a policy-scheduled backup is due for this guest (the quiesce trigger).
func (c *Client) BackupDue(ctx context.Context) (DueResponse, error) {
var out DueResponse
body, err := c.get(ctx, "/backup/due")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /backup/due: %w", err)
}
return out, nil
}
// StartBackup enqueues a backup of this guest (the agent vzdump) and returns the job to poll.
func (c *Client) StartBackup(ctx context.Context) (BackupResponse, error) {
var out BackupResponse
body, err := c.post(ctx, "/backup", struct{}{})
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode POST /backup: %w", err)
}
return out, nil
}
// BackupStatus reports the current/last backup job phase for this guest.
func (c *Client) BackupStatus(ctx context.Context) (StatusResponse, error) {
var out StatusResponse
body, err := c.get(ctx, "/backup/status")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /backup/status: %w", err)
}
return out, nil
}
// ---- slice 8C: disk management (execution is the agent's) --------------------------------
// DiskInfo mirrors the agent's GET /disks entry.
type DiskInfo struct {
Name string `json:"name"`
Type string `json:"type"`
State string `json:"state"`
BackingDevice string `json:"backing_device"`
MountPath string `json:"mount_path"`
Class string `json:"class"`
DataBearing bool `json:"data_bearing"`
DataReason string `json:"data_reason"`
}
// DisksResponse mirrors GET /disks.
type DisksResponse struct {
VMID int `json:"vmid"`
Disks []DiskInfo `json:"disks"`
}
// FormatResult mirrors POST /disks/format (the success/refusal payload).
type FormatResult struct {
VMID int `json:"vmid"`
Device string `json:"device"`
Formatted bool `json:"formatted"`
DataBearing bool `json:"data_bearing"`
Reason string `json:"reason"`
}
// ErrFormatRefused is returned by FormatDisk when the agent refuses a data-bearing format
// (pending operator authorization — the 8C invariant). The UI surfaces this distinctly.
var ErrFormatRefused = fmt.Errorf("agentapi: format refused — device is data-bearing (operator authorization required)")
// Disks lists the host drives the agent manages, with a data-bearing flag per drive.
func (c *Client) Disks(ctx context.Context) (DisksResponse, error) {
var out DisksResponse
body, err := c.get(ctx, "/disks")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /disks: %w", err)
}
return out, nil
}
// AssignDisk attaches a drive (by fs-UUID) as a host mount (benign, self-serve).
func (c *Client) AssignDisk(ctx context.Context, uuid, where, fstype, options string) error {
_, err := c.post(ctx, "/disks/assign", map[string]string{
"uuid": uuid, "where": where, "fstype": fstype, "options": options,
})
return err
}
// EjectResult mirrors POST /disks/eject (the dependent-guest warning).
type EjectResult struct {
VMID int `json:"vmid"`
Ejected string `json:"ejected"`
DependentGuests []int `json:"dependent_guests"`
}
// EjectDisk safe-unmounts a host mount (data preserved) and returns the dependent guests.
func (c *Client) EjectDisk(ctx context.Context, where string) (EjectResult, error) {
var out EjectResult
body, err := c.post(ctx, "/disks/eject", map[string]string{"where": where})
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /disks/eject: %w", err)
}
return out, nil
}
// FormatDisk asks the agent to format a device. The AGENT inspects the device and decides
// data-bearing-ness — a data-bearing device is refused (ErrFormatRefused), the controller's claim
// is irrelevant. Only a device the agent reads as blank is formatted.
func (c *Client) FormatDisk(ctx context.Context, device, fstype string) (FormatResult, error) {
var out FormatResult
body, err := c.post(ctx, "/disks/format", map[string]string{"device": device, "fstype": fstype})
if err != nil {
// A data-bearing refusal comes back as HTTP 403 (the post helper turns it into an error).
if strings.Contains(err.Error(), "HTTP 403") {
return FormatResult{Device: device, Formatted: false, DataBearing: true}, ErrFormatRefused
}
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /disks/format: %w", err)
}
return out, nil
}
// ---- slice 9: host metrics (the customer host-health view) -------------------------------
// HostMetrics mirrors the agent's GET /host/metrics `host` block (shared HostMetrics wire shape).
// CPUTempC is a pointer so a host with no temp sensor is null ("n/a"), distinct from a real 0.
type HostMetrics struct {
Node string `json:"node"`
CPUPercent float64 `json:"cpu_percent"` // 0100
MemoryTotalBytes int64 `json:"memory_total_bytes"`
MemoryUsedBytes int64 `json:"memory_used_bytes"`
MemoryPercent float64 `json:"memory_percent"`
DiskTotalBytes int64 `json:"disk_total_bytes"` // host root fs
DiskUsedBytes int64 `json:"disk_used_bytes"`
DiskPercent float64 `json:"disk_percent"`
LoadAvg []string `json:"loadavg"`
UptimeSeconds int64 `json:"uptime_seconds"`
CPUTempC *int `json:"cpu_temp_c"` // °C or null ("n/a")
}
// ThinPoolFill mirrors the agent's lvmthin pool fill (a full thin-pool corrupts every guest on it).
type ThinPoolFill struct {
DataUsedFraction float64 `json:"data_used_fraction"`
MetadataUsedFraction *float64 `json:"metadata_used_fraction"`
}
// SmartSummary mirrors the agent's per-disk SMART health (only the fields the UI renders). Pointers
// are null when the device type does not expose that attribute.
type SmartSummary struct {
Health string `json:"health"` // PASSED | FAILING | UNKNOWN
TemperatureC *int `json:"temperature_c"`
PercentageUsed *int `json:"percentage_used"` // NVMe wear (%); null for SATA/USB
}
// StorageTarget mirrors the agent's GET /host/metrics storage_targets entry (the per-storage
// capacity + health the monitoring view renders). It is a SUBSET of the agent's wire shape — only
// the fields the UI reads; unknown JSON keys are ignored.
type StorageTarget struct {
Name string `json:"name"`
Type string `json:"type"`
State string `json:"state"`
Reachable bool `json:"reachable"`
TotalBytes int64 `json:"total_bytes"`
UsedBytes int64 `json:"used_bytes"`
AvailBytes int64 `json:"avail_bytes"`
UsedFraction float64 `json:"used_fraction"`
Content string `json:"content"`
MountPath string `json:"mount_path"`
ClassHint string `json:"class_hint"`
ThinPool *ThinPoolFill `json:"thin_pool,omitempty"`
Smart SmartSummary `json:"smart"`
}
// HostMetricsResponse mirrors the agent's GET /host/metrics payload (host-wide health + per-storage
// capacity). Host-wide and token-authed (one-customer-per-host); a fresh collect, not a snapshot.
type HostMetricsResponse struct {
VMID int `json:"vmid"`
Host HostMetrics `json:"host"`
StorageTargets []StorageTarget `json:"storage_targets"`
}
// HostMetrics calls GET /host/metrics and returns the host's live health + per-storage capacity.
func (c *Client) HostMetrics(ctx context.Context) (HostMetricsResponse, error) {
var out HostMetricsResponse
body, err := c.get(ctx, "/host/metrics")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /host/metrics: %w", err)
}
return out, nil
}
// get issues an authenticated GET and unwraps the {ok,data,error} envelope.
func (c *Client) get(ctx context.Context, path string) (json.RawMessage, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+path, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+c.token)
resp, err := c.hc.Do(req)
if err != nil {
return nil, fmt.Errorf("agentapi: GET %s: %w", path, err)
}
defer resp.Body.Close()
raw, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("agentapi: GET %s: HTTP %d", path, resp.StatusCode)
}
var env apiResponse
if err := json.Unmarshal(raw, &env); err != nil {
return nil, fmt.Errorf("agentapi: GET %s: bad envelope: %w", path, err)
}
if !env.OK {
return nil, fmt.Errorf("agentapi: GET %s: %s", path, env.Error)
}
return env.Data, nil
}
// post issues an authenticated JSON POST and unwraps the {ok,data,error} envelope. The agent
// returns 200 or 202 for accepted requests.
func (c *Client) post(ctx context.Context, path string, body any) (json.RawMessage, error) {
buf, err := json.Marshal(body)
if err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+path, bytes.NewReader(buf))
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+c.token)
req.Header.Set("Content-Type", "application/json")
resp, err := c.hc.Do(req)
if err != nil {
return nil, fmt.Errorf("agentapi: POST %s: %w", path, err)
}
defer resp.Body.Close()
raw, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted {
return nil, fmt.Errorf("agentapi: POST %s: HTTP %d", path, resp.StatusCode)
}
var env apiResponse
if err := json.Unmarshal(raw, &env); err != nil {
return nil, fmt.Errorf("agentapi: POST %s: bad envelope: %w", path, err)
}
if !env.OK {
return nil, fmt.Errorf("agentapi: POST %s: %s", path, env.Error)
}
return env.Data, nil
}
// normalizeFingerprint lowercases and strips ':'/' ' separators, requiring a 64-hex SHA-256.
func normalizeFingerprint(fp string) (string, error) {
s := strings.ToLower(strings.NewReplacer(":", "", " ", "", "\t", "").Replace(strings.TrimSpace(fp)))
if len(s) != 64 {
return "", fmt.Errorf("agentapi: fingerprint must be a SHA-256 (64 hex chars), got %d", len(s))
}
if _, err := hex.DecodeString(s); err != nil {
return "", fmt.Errorf("agentapi: fingerprint is not valid hex: %w", err)
}
return s, nil
}