Files
felhom-agent/internal/proxmox/mutate.go
T
admin a042316d6d feat(agent): scaffold + proxmox interaction layer (slice 1)
Stand up the felhom-agent project (module gitea.dooplex.hu/admin/felhom-agent,
binary felhom-agent) and the internal/proxmox package: the typed library every
other agent module calls to talk to Proxmox.

- API-first Client (hand-rolled REST over net/http, PVEAPIToken auth) with typed
  read ops (version/nodes/status/lxc/config/storage) and async mutating ops
  (restore/vzdump/snapshot/rollback/delete-snapshot/setconfig/start/stop), each
  returning a UPID. WaitTask polls task status until stopped and asserts
  exitstatus OK (authz can surface at task exec, not the POST — phase1-2 §1.3).
- Fenced Privileged (root-CLI) backend for the THREE proven exceptions only
  (keyctl pct create, USB mount/fstab, SMART/sensors); each cites why it can't be
  the API. Fence is structural (Client never shells out, Privileged never HTTPs)
  and asserted in routing_test.go.
- TLS: SHA-256 leaf-cert pinning or CA file; insecure mode explicit + off by
  default. No blanket verification disable.
- 403 -> privilege-named APIError; failed task -> privilege-named TaskError.
- JSON config + env overrides (token never logged); slog logging.
- cmd/felhom-agent --selftest (read-only health report) + gated --selftest=task
  (reversible snapshot/rollback/delete exercise of WaitTask). No daemon loop yet.
- Types grounded in the spike findings and exact JSON shapes captured live from
  demo-felhom (PVE 9.2.2). Unit tests use a mock transport + runner.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 14:34:32 +02:00

149 lines
6.1 KiB
Go

package proxmox
import (
"context"
"fmt"
"net/http"
"net/url"
"strconv"
)
// Async mutating operations. Each is API-token-covered (the FelhomAgent role) and
// returns a UPID string; the caller MUST WaitTask on it and assert exitstatus OK.
// The HTTP 200 here is not proof of success (phase1-2 §1.3).
// BackupMode is the vzdump mode.
type BackupMode string
const (
// ModeStop: orderly guest shutdown -> backup -> restart. Highest consistency.
// For LXC the shutdown/restart is internal to vzdump and needs only VM.Backup
// (NOT VM.PowerMgmt) — phase1-2 §1.4.
ModeStop BackupMode = "stop"
// ModeSnapshot: lowest downtime; for an LXC this is crash-consistent only (no
// fsfreeze) — app-consistency is the controller's job (proxmox-platform.md §4.2).
ModeSnapshot BackupMode = "snapshot"
)
// RestoreLXCOptions parameterizes a restore. This is the PRIMARY create path:
// a token-authorized restore preserves features=nesting=1,keyctl=1 from the
// archive, so it needs no root (phase3 §B3). Fresh `pct create` with keyctl is
// the only root-fenced create (see Privileged.CreateGoldenLXC).
type RestoreLXCOptions struct {
VMID int // target VMID (fresh id)
Archive string // source archive volid, e.g. "local:backup/vzdump-lxc-9001-...tar.zst"
Storage string // target storage for the rootfs, e.g. "local-lvm"
Force bool // overwrite an existing VMID (destructive — caller must have authority)
}
// RestoreLXC restores an LXC from a vzdump/PBS archive via POST /nodes/{node}/lxc
// (restore=1). Returns the UPID. NOTE: pct restore preserves the source MAC +
// hostname — reset network identity before starting alongside the original
// (phase1-2 §2.2). Identity reset is a SetConfig call the caller makes after.
func (c *Client) RestoreLXC(ctx context.Context, opts RestoreLXCOptions) (string, error) {
if opts.VMID == 0 || opts.Archive == "" || opts.Storage == "" {
return "", fmt.Errorf("proxmox: RestoreLXC needs vmid, archive and storage")
}
v := url.Values{}
v.Set("vmid", strconv.Itoa(opts.VMID))
v.Set("ostemplate", opts.Archive) // pct restore source
v.Set("restore", "1")
v.Set("storage", opts.Storage)
if opts.Force {
v.Set("force", "1")
}
return c.dataString(ctx, http.MethodPost, "/nodes/"+c.node+"/lxc", v)
}
// VzdumpOptions parameterizes a backup.
type VzdumpOptions struct {
VMID int
Storage string // a storage whose content includes "backup" (e.g. "local") — NOT local-lvm
Mode BackupMode // ModeStop | ModeSnapshot
Compress string // "zstd" (default), "lzo", "gzip", or "" for none
}
// Vzdump starts a backup via POST /nodes/{node}/vzdump. Returns the UPID. An
// agent-initiated vzdump is crash-consistent only for an LXC (no fsfreeze).
func (c *Client) Vzdump(ctx context.Context, opts VzdumpOptions) (string, error) {
if opts.VMID == 0 || opts.Storage == "" || opts.Mode == "" {
return "", fmt.Errorf("proxmox: Vzdump needs vmid, storage and mode")
}
v := url.Values{}
v.Set("vmid", strconv.Itoa(opts.VMID))
v.Set("storage", opts.Storage)
v.Set("mode", string(opts.Mode))
if opts.Compress == "" {
opts.Compress = "zstd"
}
v.Set("compress", opts.Compress)
return c.dataString(ctx, http.MethodPost, "/nodes/"+c.node+"/vzdump", v)
}
// Snapshot creates an LXC snapshot via POST /nodes/{node}/lxc/{vmid}/snapshot.
// A running, unprivileged LXC can be snapshotted on LVM-thin with no stop
// (phase1-2 §1.6) — this is the snapshot-before-change primitive.
func (c *Client) Snapshot(ctx context.Context, vmid int, snapname, description string) (string, error) {
if vmid == 0 || snapname == "" {
return "", fmt.Errorf("proxmox: Snapshot needs vmid and snapname")
}
v := url.Values{}
v.Set("snapname", snapname)
if description != "" {
v.Set("description", description)
}
path := fmt.Sprintf("/nodes/%s/lxc/%d/snapshot", c.node, vmid)
return c.dataString(ctx, http.MethodPost, path, v)
}
// Rollback rolls an LXC back to a snapshot via
// POST /nodes/{node}/lxc/{vmid}/snapshot/{snap}/rollback.
func (c *Client) Rollback(ctx context.Context, vmid int, snapname string) (string, error) {
if vmid == 0 || snapname == "" {
return "", fmt.Errorf("proxmox: Rollback needs vmid and snapname")
}
path := fmt.Sprintf("/nodes/%s/lxc/%d/snapshot/%s/rollback", c.node, vmid, url.PathEscape(snapname))
return c.dataString(ctx, http.MethodPost, path, url.Values{})
}
// DeleteSnapshot removes an LXC snapshot via
// DELETE /nodes/{node}/lxc/{vmid}/snapshot/{snap}.
func (c *Client) DeleteSnapshot(ctx context.Context, vmid int, snapname string) (string, error) {
if vmid == 0 || snapname == "" {
return "", fmt.Errorf("proxmox: DeleteSnapshot needs vmid and snapname")
}
path := fmt.Sprintf("/nodes/%s/lxc/%d/snapshot/%s", c.node, vmid, url.PathEscape(snapname))
return c.dataString(ctx, http.MethodDelete, path, nil)
}
// SetConfig applies config changes via PUT /nodes/{node}/lxc/{vmid}/config
// (e.g. memory, cores, net0, mpN with a backup flag). PVE may apply this
// synchronously (no UPID) — the returned string is empty in that case, and "" is
// not an error. When a UPID is returned, WaitTask on it.
//
// Identity reset after a restore (phase1-2 §2.2) is a SetConfig with
// params{"net0": "name=eth0,bridge=vmbr0,ip=dhcp"} (regenerates the MAC).
func (c *Client) SetConfig(ctx context.Context, vmid int, params map[string]string) (string, error) {
if vmid == 0 || len(params) == 0 {
return "", fmt.Errorf("proxmox: SetConfig needs vmid and at least one param")
}
v := url.Values{}
for k, val := range params {
v.Set(k, val)
}
path := fmt.Sprintf("/nodes/%s/lxc/%d/config", c.node, vmid)
return c.dataString(ctx, http.MethodPut, path, v)
}
// Start starts a guest via POST /nodes/{node}/lxc/{vmid}/status/start (VM.PowerMgmt).
func (c *Client) Start(ctx context.Context, vmid int) (string, error) {
path := fmt.Sprintf("/nodes/%s/lxc/%d/status/start", c.node, vmid)
return c.dataString(ctx, http.MethodPost, path, url.Values{})
}
// Stop stops a guest via POST /nodes/{node}/lxc/{vmid}/status/stop (VM.PowerMgmt).
func (c *Client) Stop(ctx context.Context, vmid int) (string, error) {
path := fmt.Sprintf("/nodes/%s/lxc/%d/status/stop", c.node, vmid)
return c.dataString(ctx, http.MethodPost, path, url.Values{})
}