Files
felhom-agent/internal/proxmox/privileged.go
T
admin a042316d6d feat(agent): scaffold + proxmox interaction layer (slice 1)
Stand up the felhom-agent project (module gitea.dooplex.hu/admin/felhom-agent,
binary felhom-agent) and the internal/proxmox package: the typed library every
other agent module calls to talk to Proxmox.

- API-first Client (hand-rolled REST over net/http, PVEAPIToken auth) with typed
  read ops (version/nodes/status/lxc/config/storage) and async mutating ops
  (restore/vzdump/snapshot/rollback/delete-snapshot/setconfig/start/stop), each
  returning a UPID. WaitTask polls task status until stopped and asserts
  exitstatus OK (authz can surface at task exec, not the POST — phase1-2 §1.3).
- Fenced Privileged (root-CLI) backend for the THREE proven exceptions only
  (keyctl pct create, USB mount/fstab, SMART/sensors); each cites why it can't be
  the API. Fence is structural (Client never shells out, Privileged never HTTPs)
  and asserted in routing_test.go.
- TLS: SHA-256 leaf-cert pinning or CA file; insecure mode explicit + off by
  default. No blanket verification disable.
- 403 -> privilege-named APIError; failed task -> privilege-named TaskError.
- JSON config + env overrides (token never logged); slog logging.
- cmd/felhom-agent --selftest (read-only health report) + gated --selftest=task
  (reversible snapshot/rollback/delete exercise of WaitTask). No daemon loop yet.
- Types grounded in the spike findings and exact JSON shapes captured live from
  demo-felhom (PVE 9.2.2). Unit tests use a mock transport + runner.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 14:34:32 +02:00

204 lines
6.8 KiB
Go

package proxmox
import (
"context"
"encoding/json"
"fmt"
"os/exec"
"strconv"
)
// The Privileged backend is fenced to the THREE proven OS-root exceptions only
// (phase3 §B3 boundary, doc.go routing table):
//
// (a) keyctl `pct create` for golden-image builds,
// (b) USB mount-by-UUID / fstab,
// (c) SMART / sensors reads.
//
// It runs host commands through a Runner (direct exec or sudo). It makes NO HTTP
// call — the fence between API ops and root ops is structural: Client owns the
// API, Privileged owns the shell. routing_test.go asserts neither crosses over.
//
// Everything else — the entire guest lifecycle including restore — goes through
// the API Client. Do NOT add non-exception methods here.
// Runner executes a host command and returns its stdout/stderr. *ExecRunner is the
// production implementation; tests inject a mock to assert which commands ran.
type Runner interface {
Run(ctx context.Context, name string, args ...string) (stdout, stderr []byte, err error)
}
// RunnerMode selects how privileged commands are executed.
type RunnerMode string
const (
// RunnerDirect: exec the binary directly (agent already runs as root — not the
// recommended uid model, see README; useful in dev/CI).
RunnerDirect RunnerMode = "direct"
// RunnerSudo: prefix with sudo (the intended model — agent runs as a non-root
// service user with a narrow sudoers allowlist, 03 §3/§12).
RunnerSudo RunnerMode = "sudo"
)
// ExecRunner runs commands via os/exec, optionally through sudo.
type ExecRunner struct {
Mode RunnerMode
SudoPath string // defaults to "sudo" when Mode == RunnerSudo
}
// Run implements Runner.
func (r *ExecRunner) Run(ctx context.Context, name string, args ...string) ([]byte, []byte, error) {
var cmd *exec.Cmd
if r.Mode == RunnerSudo {
sudo := r.SudoPath
if sudo == "" {
sudo = "sudo"
}
cmd = exec.CommandContext(ctx, sudo, append([]string{"-n", name}, args...)...)
} else {
cmd = exec.CommandContext(ctx, name, args...)
}
var stdout, stderr capBuf
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
return stdout.b, stderr.b, err
}
// Privileged is the root-CLI backend.
type Privileged struct {
runner Runner
node string
}
// NewPrivileged builds the fenced root backend.
func NewPrivileged(runner Runner, node string) *Privileged {
return &Privileged{runner: runner, node: node}
}
// GoldenLXCSpec describes a golden-base CT to build fresh.
type GoldenLXCSpec struct {
VMID int
OSTemplate string // CT template volid, e.g. "local:vztmpl/debian-13-standard_..._amd64.tar.zst"
Storage string // rootfs storage, e.g. "local-lvm"
RootFSGB int
Cores int
MemoryMB int
Hostname string
// Features is forced to "nesting=1,keyctl=1" — keyctl is exactly why this is
// root-fenced.
}
// CreateGoldenLXC builds a Docker-capable golden base CT with keyctl=1.
//
// WHY THIS CANNOT BE THE API: setting feature flags other than `nesting` on
// create is `root@pam`-only — `changing feature flags (except nesting) is only
// allowed for root@pam`. No API token qualifies, not even a non-privsep root@pam
// token (same 403). This is the ONLY root-fenced create; the per-customer path
// provisions by restore, which preserves keyctl with no root (phase3 §B3).
//
// This is a one-time/maintenance op at enrollment (03 §9), off the per-customer path.
func (p *Privileged) CreateGoldenLXC(ctx context.Context, spec GoldenLXCSpec) error {
if spec.VMID == 0 || spec.OSTemplate == "" || spec.Storage == "" {
return fmt.Errorf("proxmox: CreateGoldenLXC needs vmid, ostemplate and storage")
}
rootfs := spec.Storage
if spec.RootFSGB > 0 {
rootfs = fmt.Sprintf("%s:%d", spec.Storage, spec.RootFSGB)
}
args := []string{
"create", strconv.Itoa(spec.VMID), spec.OSTemplate,
"--unprivileged", "1",
"--features", "nesting=1,keyctl=1",
"--rootfs", rootfs,
}
if spec.Cores > 0 {
args = append(args, "--cores", strconv.Itoa(spec.Cores))
}
if spec.MemoryMB > 0 {
args = append(args, "--memory", strconv.Itoa(spec.MemoryMB))
}
if spec.Hostname != "" {
args = append(args, "--hostname", spec.Hostname)
}
return p.run(ctx, "pct", args...)
}
// MountUSBByUUID mounts a filesystem by UUID at target (creating the mountpoint).
//
// WHY THIS CANNOT BE THE API: a physical host mount is not a Proxmox API op; it is
// a host-level mount handled by OS root / a narrow sudoers entry (phase3 §B3).
// fstab persistence is a later-slice concern (03 §7 storage manifest).
func (p *Privileged) MountUSBByUUID(ctx context.Context, uuid, target string) error {
if uuid == "" || target == "" {
return fmt.Errorf("proxmox: MountUSBByUUID needs uuid and target")
}
if err := p.run(ctx, "mkdir", "-p", target); err != nil {
return err
}
return p.run(ctx, "mount", "UUID="+uuid, target)
}
// SMART returns parsed `smartctl -a -j` JSON for a device.
//
// WHY THIS CANNOT BE THE API: disk SMART data is not exposed by the Proxmox API;
// it is read with OS root via smartctl (phase3 §B3).
func (p *Privileged) SMART(ctx context.Context, device string) (map[string]any, error) {
if device == "" {
return nil, fmt.Errorf("proxmox: SMART needs a device")
}
out, stderr, err := p.runner.Run(ctx, "smartctl", "-a", "-j", device)
if err != nil {
// smartctl uses nonzero exit codes as bitmask warnings even on success;
// trust parseable JSON output over the exit code.
if len(out) == 0 {
return nil, fmt.Errorf("proxmox: smartctl %s: %w: %s", device, err, stderr)
}
}
var m map[string]any
if err := json.Unmarshal(out, &m); err != nil {
return nil, fmt.Errorf("proxmox: parsing smartctl JSON: %w", err)
}
return m, nil
}
// Sensors returns parsed `sensors -j` JSON (hardware temperatures/fans).
//
// WHY THIS CANNOT BE THE API: hardware sensors are not API-exposed (phase3 §B3).
func (p *Privileged) Sensors(ctx context.Context) (map[string]any, error) {
out, stderr, err := p.runner.Run(ctx, "sensors", "-j")
if err != nil && len(out) == 0 {
return nil, fmt.Errorf("proxmox: sensors: %w: %s", err, stderr)
}
var m map[string]any
if err := json.Unmarshal(out, &m); err != nil {
return nil, fmt.Errorf("proxmox: parsing sensors JSON: %w", err)
}
return m, nil
}
// run executes a command and wraps a nonzero exit with its stderr.
func (p *Privileged) run(ctx context.Context, name string, args ...string) error {
_, stderr, err := p.runner.Run(ctx, name, args...)
if err != nil {
return fmt.Errorf("proxmox: %s %v: %w: %s", name, args, err, trimBody(string(stderr)))
}
return nil
}
// capBuf is a tiny capped buffer so a runaway command can't blow memory.
type capBuf struct{ b []byte }
func (c *capBuf) Write(p []byte) (int, error) {
const max = 1 << 20 // 1 MiB
if len(c.b) < max {
room := max - len(c.b)
if room >= len(p) {
c.b = append(c.b, p...)
} else {
c.b = append(c.b, p[:room]...)
}
}
return len(p), nil // always report full consumption
}