Files
felhom-controller/controller/internal/agentapi/client.go
T
admin 0294513906 slice 8C Phase B.1: agentapi disk client (Disks/AssignDisk/EjectDisk/FormatDisk)
ErrFormatRefused surfaces the agent's data-bearing refusal distinctly. Tests:
list, blank format OK, data-bearing refused, eject dependents.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 13:23:00 +02:00

339 lines
11 KiB
Go

// Package agentapi is the in-guest controller's client for the host agent's per-guest local
// API (doc 03 §6, slice 8A). It reaches the agent over the bridge, pinning the agent's
// self-signed leaf by SHA-256 (the same pin convention the agent uses for the Proxmox/PBS host
// certs), and authenticates with the per-guest bearer token. In 8A it exercises GET /storage
// (connectivity + the controller learning its mounts); the full surface (the /backup/due
// quiesce loop) lands in 8B.
package agentapi
import (
"bytes"
"context"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// Client talks to one agent local-API endpoint with a pinned leaf + bearer token.
type Client struct {
baseURL string
token string
hc *http.Client
}
// MountInfo mirrors the agent's GET /storage mount entry (doc 03 §6).
type MountInfo struct {
Key string `json:"key"`
Storage string `json:"storage"`
MountPoint string `json:"mount_point"`
Class string `json:"class"` // fast | slow | ""
Backup bool `json:"backup"`
}
// StorageResponse mirrors the agent's GET /storage data payload.
type StorageResponse struct {
VMID int `json:"vmid"`
Mounts []MountInfo `json:"mounts"`
}
// apiResponse is the agent's {ok,data,error} envelope.
type apiResponse struct {
OK bool `json:"ok"`
Data json.RawMessage `json:"data"`
Error string `json:"error"`
}
// New builds a pinned client for endpoint ("host:port") with the given per-guest token and the
// agent leaf-cert SHA-256 fingerprint (hex, ':'-separators tolerated). The pin is the trust
// anchor — the agent serves a self-signed cert, so chain verification is replaced by an exact
// leaf-DER SHA-256 match (fails closed on any mismatch).
func New(endpoint, token, fingerprintHex string) (*Client, error) {
endpoint = strings.TrimSpace(endpoint)
if endpoint == "" {
return nil, fmt.Errorf("agentapi: endpoint required")
}
if token == "" {
return nil, fmt.Errorf("agentapi: token required")
}
want, err := normalizeFingerprint(fingerprintHex)
if err != nil {
return nil, err
}
tlsCfg := &tls.Config{
InsecureSkipVerify: true, // self-signed leaf — the pin below is the real check
VerifyPeerCertificate: func(rawCerts [][]byte, _ [][]*x509.Certificate) error {
if len(rawCerts) == 0 {
return fmt.Errorf("agentapi: TLS pin: peer presented no certificate")
}
got := sha256.Sum256(rawCerts[0]) // leaf DER
if hex.EncodeToString(got[:]) != want {
return fmt.Errorf("agentapi: TLS pin mismatch: agent leaf SHA-256 does not match the bootstrap fingerprint")
}
return nil
},
MinVersion: tls.VersionTLS12,
}
return &Client{
baseURL: "https://" + endpoint,
token: token,
hc: &http.Client{
Timeout: 15 * time.Second,
Transport: &http.Transport{TLSClientConfig: tlsCfg},
},
}, nil
}
// Storage calls GET /storage and returns this guest's mounts (connectivity + placement view).
func (c *Client) Storage(ctx context.Context) (StorageResponse, error) {
var out StorageResponse
body, err := c.get(ctx, "/storage")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /storage: %w", err)
}
return out, nil
}
// ---- slice 8B: app-consistent backup (quiesce loop) -------------------------------------
// DueResponse mirrors the agent's GET /backup/due payload.
type DueResponse struct {
VMID int `json:"vmid"`
Due bool `json:"due"`
Reason string `json:"reason"`
}
// BackupResponse mirrors the agent's POST /backup payload.
type BackupResponse struct {
VMID int `json:"vmid"`
JobID string `json:"job_id"`
Phase string `json:"phase"`
}
// StatusResponse mirrors the agent's GET /backup/status payload.
type StatusResponse struct {
VMID int `json:"vmid"`
Phase string `json:"phase"` // idle | running | done | failed
JobID string `json:"job_id"`
Error string `json:"error"`
}
// Backup status phases (mirror the agent's vocabulary).
const (
PhaseIdle = "idle"
PhaseRunning = "running"
PhaseDone = "done"
PhaseFailed = "failed"
)
// BackupDue reports whether a policy-scheduled backup is due for this guest (the quiesce trigger).
func (c *Client) BackupDue(ctx context.Context) (DueResponse, error) {
var out DueResponse
body, err := c.get(ctx, "/backup/due")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /backup/due: %w", err)
}
return out, nil
}
// StartBackup enqueues a backup of this guest (the agent vzdump) and returns the job to poll.
func (c *Client) StartBackup(ctx context.Context) (BackupResponse, error) {
var out BackupResponse
body, err := c.post(ctx, "/backup", struct{}{})
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode POST /backup: %w", err)
}
return out, nil
}
// BackupStatus reports the current/last backup job phase for this guest.
func (c *Client) BackupStatus(ctx context.Context) (StatusResponse, error) {
var out StatusResponse
body, err := c.get(ctx, "/backup/status")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /backup/status: %w", err)
}
return out, nil
}
// ---- slice 8C: disk management (execution is the agent's) --------------------------------
// DiskInfo mirrors the agent's GET /disks entry.
type DiskInfo struct {
Name string `json:"name"`
Type string `json:"type"`
State string `json:"state"`
BackingDevice string `json:"backing_device"`
MountPath string `json:"mount_path"`
Class string `json:"class"`
DataBearing bool `json:"data_bearing"`
DataReason string `json:"data_reason"`
}
// DisksResponse mirrors GET /disks.
type DisksResponse struct {
VMID int `json:"vmid"`
Disks []DiskInfo `json:"disks"`
}
// FormatResult mirrors POST /disks/format (the success/refusal payload).
type FormatResult struct {
VMID int `json:"vmid"`
Device string `json:"device"`
Formatted bool `json:"formatted"`
DataBearing bool `json:"data_bearing"`
Reason string `json:"reason"`
}
// ErrFormatRefused is returned by FormatDisk when the agent refuses a data-bearing format
// (pending operator authorization — the 8C invariant). The UI surfaces this distinctly.
var ErrFormatRefused = fmt.Errorf("agentapi: format refused — device is data-bearing (operator authorization required)")
// Disks lists the host drives the agent manages, with a data-bearing flag per drive.
func (c *Client) Disks(ctx context.Context) (DisksResponse, error) {
var out DisksResponse
body, err := c.get(ctx, "/disks")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /disks: %w", err)
}
return out, nil
}
// AssignDisk attaches a drive (by fs-UUID) as a host mount (benign, self-serve).
func (c *Client) AssignDisk(ctx context.Context, uuid, where, fstype, options string) error {
_, err := c.post(ctx, "/disks/assign", map[string]string{
"uuid": uuid, "where": where, "fstype": fstype, "options": options,
})
return err
}
// EjectResult mirrors POST /disks/eject (the dependent-guest warning).
type EjectResult struct {
VMID int `json:"vmid"`
Ejected string `json:"ejected"`
DependentGuests []int `json:"dependent_guests"`
}
// EjectDisk safe-unmounts a host mount (data preserved) and returns the dependent guests.
func (c *Client) EjectDisk(ctx context.Context, where string) (EjectResult, error) {
var out EjectResult
body, err := c.post(ctx, "/disks/eject", map[string]string{"where": where})
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /disks/eject: %w", err)
}
return out, nil
}
// FormatDisk asks the agent to format a device. The AGENT inspects the device and decides
// data-bearing-ness — a data-bearing device is refused (ErrFormatRefused), the controller's claim
// is irrelevant. Only a device the agent reads as blank is formatted.
func (c *Client) FormatDisk(ctx context.Context, device, fstype string) (FormatResult, error) {
var out FormatResult
body, err := c.post(ctx, "/disks/format", map[string]string{"device": device, "fstype": fstype})
if err != nil {
// A data-bearing refusal comes back as HTTP 403 (the post helper turns it into an error).
if strings.Contains(err.Error(), "HTTP 403") {
return FormatResult{Device: device, Formatted: false, DataBearing: true}, ErrFormatRefused
}
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /disks/format: %w", err)
}
return out, nil
}
// get issues an authenticated GET and unwraps the {ok,data,error} envelope.
func (c *Client) get(ctx context.Context, path string) (json.RawMessage, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+path, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+c.token)
resp, err := c.hc.Do(req)
if err != nil {
return nil, fmt.Errorf("agentapi: GET %s: %w", path, err)
}
defer resp.Body.Close()
raw, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("agentapi: GET %s: HTTP %d", path, resp.StatusCode)
}
var env apiResponse
if err := json.Unmarshal(raw, &env); err != nil {
return nil, fmt.Errorf("agentapi: GET %s: bad envelope: %w", path, err)
}
if !env.OK {
return nil, fmt.Errorf("agentapi: GET %s: %s", path, env.Error)
}
return env.Data, nil
}
// post issues an authenticated JSON POST and unwraps the {ok,data,error} envelope. The agent
// returns 200 or 202 for accepted requests.
func (c *Client) post(ctx context.Context, path string, body any) (json.RawMessage, error) {
buf, err := json.Marshal(body)
if err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+path, bytes.NewReader(buf))
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+c.token)
req.Header.Set("Content-Type", "application/json")
resp, err := c.hc.Do(req)
if err != nil {
return nil, fmt.Errorf("agentapi: POST %s: %w", path, err)
}
defer resp.Body.Close()
raw, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted {
return nil, fmt.Errorf("agentapi: POST %s: HTTP %d", path, resp.StatusCode)
}
var env apiResponse
if err := json.Unmarshal(raw, &env); err != nil {
return nil, fmt.Errorf("agentapi: POST %s: bad envelope: %w", path, err)
}
if !env.OK {
return nil, fmt.Errorf("agentapi: POST %s: %s", path, env.Error)
}
return env.Data, nil
}
// normalizeFingerprint lowercases and strips ':'/' ' separators, requiring a 64-hex SHA-256.
func normalizeFingerprint(fp string) (string, error) {
s := strings.ToLower(strings.NewReplacer(":", "", " ", "", "\t", "").Replace(strings.TrimSpace(fp)))
if len(s) != 64 {
return "", fmt.Errorf("agentapi: fingerprint must be a SHA-256 (64 hex chars), got %d", len(s))
}
if _, err := hex.DecodeString(s); err != nil {
return "", fmt.Errorf("agentapi: fingerprint is not valid hex: %w", err)
}
return s, nil
}