a042316d6d
Stand up the felhom-agent project (module gitea.dooplex.hu/admin/felhom-agent, binary felhom-agent) and the internal/proxmox package: the typed library every other agent module calls to talk to Proxmox. - API-first Client (hand-rolled REST over net/http, PVEAPIToken auth) with typed read ops (version/nodes/status/lxc/config/storage) and async mutating ops (restore/vzdump/snapshot/rollback/delete-snapshot/setconfig/start/stop), each returning a UPID. WaitTask polls task status until stopped and asserts exitstatus OK (authz can surface at task exec, not the POST — phase1-2 §1.3). - Fenced Privileged (root-CLI) backend for the THREE proven exceptions only (keyctl pct create, USB mount/fstab, SMART/sensors); each cites why it can't be the API. Fence is structural (Client never shells out, Privileged never HTTPs) and asserted in routing_test.go. - TLS: SHA-256 leaf-cert pinning or CA file; insecure mode explicit + off by default. No blanket verification disable. - 403 -> privilege-named APIError; failed task -> privilege-named TaskError. - JSON config + env overrides (token never logged); slog logging. - cmd/felhom-agent --selftest (read-only health report) + gated --selftest=task (reversible snapshot/rollback/delete exercise of WaitTask). No daemon loop yet. - Types grounded in the spike findings and exact JSON shapes captured live from demo-felhom (PVE 9.2.2). Unit tests use a mock transport + runner. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
204 lines
6.8 KiB
Go
204 lines
6.8 KiB
Go
package proxmox
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os/exec"
|
|
"strconv"
|
|
)
|
|
|
|
// The Privileged backend is fenced to the THREE proven OS-root exceptions only
|
|
// (phase3 §B3 boundary, doc.go routing table):
|
|
//
|
|
// (a) keyctl `pct create` for golden-image builds,
|
|
// (b) USB mount-by-UUID / fstab,
|
|
// (c) SMART / sensors reads.
|
|
//
|
|
// It runs host commands through a Runner (direct exec or sudo). It makes NO HTTP
|
|
// call — the fence between API ops and root ops is structural: Client owns the
|
|
// API, Privileged owns the shell. routing_test.go asserts neither crosses over.
|
|
//
|
|
// Everything else — the entire guest lifecycle including restore — goes through
|
|
// the API Client. Do NOT add non-exception methods here.
|
|
|
|
// Runner executes a host command and returns its stdout/stderr. *ExecRunner is the
|
|
// production implementation; tests inject a mock to assert which commands ran.
|
|
type Runner interface {
|
|
Run(ctx context.Context, name string, args ...string) (stdout, stderr []byte, err error)
|
|
}
|
|
|
|
// RunnerMode selects how privileged commands are executed.
|
|
type RunnerMode string
|
|
|
|
const (
|
|
// RunnerDirect: exec the binary directly (agent already runs as root — not the
|
|
// recommended uid model, see README; useful in dev/CI).
|
|
RunnerDirect RunnerMode = "direct"
|
|
// RunnerSudo: prefix with sudo (the intended model — agent runs as a non-root
|
|
// service user with a narrow sudoers allowlist, 03 §3/§12).
|
|
RunnerSudo RunnerMode = "sudo"
|
|
)
|
|
|
|
// ExecRunner runs commands via os/exec, optionally through sudo.
|
|
type ExecRunner struct {
|
|
Mode RunnerMode
|
|
SudoPath string // defaults to "sudo" when Mode == RunnerSudo
|
|
}
|
|
|
|
// Run implements Runner.
|
|
func (r *ExecRunner) Run(ctx context.Context, name string, args ...string) ([]byte, []byte, error) {
|
|
var cmd *exec.Cmd
|
|
if r.Mode == RunnerSudo {
|
|
sudo := r.SudoPath
|
|
if sudo == "" {
|
|
sudo = "sudo"
|
|
}
|
|
cmd = exec.CommandContext(ctx, sudo, append([]string{"-n", name}, args...)...)
|
|
} else {
|
|
cmd = exec.CommandContext(ctx, name, args...)
|
|
}
|
|
var stdout, stderr capBuf
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
err := cmd.Run()
|
|
return stdout.b, stderr.b, err
|
|
}
|
|
|
|
// Privileged is the root-CLI backend.
|
|
type Privileged struct {
|
|
runner Runner
|
|
node string
|
|
}
|
|
|
|
// NewPrivileged builds the fenced root backend.
|
|
func NewPrivileged(runner Runner, node string) *Privileged {
|
|
return &Privileged{runner: runner, node: node}
|
|
}
|
|
|
|
// GoldenLXCSpec describes a golden-base CT to build fresh.
|
|
type GoldenLXCSpec struct {
|
|
VMID int
|
|
OSTemplate string // CT template volid, e.g. "local:vztmpl/debian-13-standard_..._amd64.tar.zst"
|
|
Storage string // rootfs storage, e.g. "local-lvm"
|
|
RootFSGB int
|
|
Cores int
|
|
MemoryMB int
|
|
Hostname string
|
|
// Features is forced to "nesting=1,keyctl=1" — keyctl is exactly why this is
|
|
// root-fenced.
|
|
}
|
|
|
|
// CreateGoldenLXC builds a Docker-capable golden base CT with keyctl=1.
|
|
//
|
|
// WHY THIS CANNOT BE THE API: setting feature flags other than `nesting` on
|
|
// create is `root@pam`-only — `changing feature flags (except nesting) is only
|
|
// allowed for root@pam`. No API token qualifies, not even a non-privsep root@pam
|
|
// token (same 403). This is the ONLY root-fenced create; the per-customer path
|
|
// provisions by restore, which preserves keyctl with no root (phase3 §B3).
|
|
//
|
|
// This is a one-time/maintenance op at enrollment (03 §9), off the per-customer path.
|
|
func (p *Privileged) CreateGoldenLXC(ctx context.Context, spec GoldenLXCSpec) error {
|
|
if spec.VMID == 0 || spec.OSTemplate == "" || spec.Storage == "" {
|
|
return fmt.Errorf("proxmox: CreateGoldenLXC needs vmid, ostemplate and storage")
|
|
}
|
|
rootfs := spec.Storage
|
|
if spec.RootFSGB > 0 {
|
|
rootfs = fmt.Sprintf("%s:%d", spec.Storage, spec.RootFSGB)
|
|
}
|
|
args := []string{
|
|
"create", strconv.Itoa(spec.VMID), spec.OSTemplate,
|
|
"--unprivileged", "1",
|
|
"--features", "nesting=1,keyctl=1",
|
|
"--rootfs", rootfs,
|
|
}
|
|
if spec.Cores > 0 {
|
|
args = append(args, "--cores", strconv.Itoa(spec.Cores))
|
|
}
|
|
if spec.MemoryMB > 0 {
|
|
args = append(args, "--memory", strconv.Itoa(spec.MemoryMB))
|
|
}
|
|
if spec.Hostname != "" {
|
|
args = append(args, "--hostname", spec.Hostname)
|
|
}
|
|
return p.run(ctx, "pct", args...)
|
|
}
|
|
|
|
// MountUSBByUUID mounts a filesystem by UUID at target (creating the mountpoint).
|
|
//
|
|
// WHY THIS CANNOT BE THE API: a physical host mount is not a Proxmox API op; it is
|
|
// a host-level mount handled by OS root / a narrow sudoers entry (phase3 §B3).
|
|
// fstab persistence is a later-slice concern (03 §7 storage manifest).
|
|
func (p *Privileged) MountUSBByUUID(ctx context.Context, uuid, target string) error {
|
|
if uuid == "" || target == "" {
|
|
return fmt.Errorf("proxmox: MountUSBByUUID needs uuid and target")
|
|
}
|
|
if err := p.run(ctx, "mkdir", "-p", target); err != nil {
|
|
return err
|
|
}
|
|
return p.run(ctx, "mount", "UUID="+uuid, target)
|
|
}
|
|
|
|
// SMART returns parsed `smartctl -a -j` JSON for a device.
|
|
//
|
|
// WHY THIS CANNOT BE THE API: disk SMART data is not exposed by the Proxmox API;
|
|
// it is read with OS root via smartctl (phase3 §B3).
|
|
func (p *Privileged) SMART(ctx context.Context, device string) (map[string]any, error) {
|
|
if device == "" {
|
|
return nil, fmt.Errorf("proxmox: SMART needs a device")
|
|
}
|
|
out, stderr, err := p.runner.Run(ctx, "smartctl", "-a", "-j", device)
|
|
if err != nil {
|
|
// smartctl uses nonzero exit codes as bitmask warnings even on success;
|
|
// trust parseable JSON output over the exit code.
|
|
if len(out) == 0 {
|
|
return nil, fmt.Errorf("proxmox: smartctl %s: %w: %s", device, err, stderr)
|
|
}
|
|
}
|
|
var m map[string]any
|
|
if err := json.Unmarshal(out, &m); err != nil {
|
|
return nil, fmt.Errorf("proxmox: parsing smartctl JSON: %w", err)
|
|
}
|
|
return m, nil
|
|
}
|
|
|
|
// Sensors returns parsed `sensors -j` JSON (hardware temperatures/fans).
|
|
//
|
|
// WHY THIS CANNOT BE THE API: hardware sensors are not API-exposed (phase3 §B3).
|
|
func (p *Privileged) Sensors(ctx context.Context) (map[string]any, error) {
|
|
out, stderr, err := p.runner.Run(ctx, "sensors", "-j")
|
|
if err != nil && len(out) == 0 {
|
|
return nil, fmt.Errorf("proxmox: sensors: %w: %s", err, stderr)
|
|
}
|
|
var m map[string]any
|
|
if err := json.Unmarshal(out, &m); err != nil {
|
|
return nil, fmt.Errorf("proxmox: parsing sensors JSON: %w", err)
|
|
}
|
|
return m, nil
|
|
}
|
|
|
|
// run executes a command and wraps a nonzero exit with its stderr.
|
|
func (p *Privileged) run(ctx context.Context, name string, args ...string) error {
|
|
_, stderr, err := p.runner.Run(ctx, name, args...)
|
|
if err != nil {
|
|
return fmt.Errorf("proxmox: %s %v: %w: %s", name, args, err, trimBody(string(stderr)))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// capBuf is a tiny capped buffer so a runaway command can't blow memory.
|
|
type capBuf struct{ b []byte }
|
|
|
|
func (c *capBuf) Write(p []byte) (int, error) {
|
|
const max = 1 << 20 // 1 MiB
|
|
if len(c.b) < max {
|
|
room := max - len(c.b)
|
|
if room >= len(p) {
|
|
c.b = append(c.b, p...)
|
|
} else {
|
|
c.b = append(c.b, p[:room]...)
|
|
}
|
|
}
|
|
return len(p), nil // always report full consumption
|
|
}
|