feat(agent): scaffold + proxmox interaction layer (slice 1)

Stand up the felhom-agent project (module gitea.dooplex.hu/admin/felhom-agent,
binary felhom-agent) and the internal/proxmox package: the typed library every
other agent module calls to talk to Proxmox.

- API-first Client (hand-rolled REST over net/http, PVEAPIToken auth) with typed
  read ops (version/nodes/status/lxc/config/storage) and async mutating ops
  (restore/vzdump/snapshot/rollback/delete-snapshot/setconfig/start/stop), each
  returning a UPID. WaitTask polls task status until stopped and asserts
  exitstatus OK (authz can surface at task exec, not the POST — phase1-2 §1.3).
- Fenced Privileged (root-CLI) backend for the THREE proven exceptions only
  (keyctl pct create, USB mount/fstab, SMART/sensors); each cites why it can't be
  the API. Fence is structural (Client never shells out, Privileged never HTTPs)
  and asserted in routing_test.go.
- TLS: SHA-256 leaf-cert pinning or CA file; insecure mode explicit + off by
  default. No blanket verification disable.
- 403 -> privilege-named APIError; failed task -> privilege-named TaskError.
- JSON config + env overrides (token never logged); slog logging.
- cmd/felhom-agent --selftest (read-only health report) + gated --selftest=task
  (reversible snapshot/rollback/delete exercise of WaitTask). No daemon loop yet.
- Types grounded in the spike findings and exact JSON shapes captured live from
  demo-felhom (PVE 9.2.2). Unit tests use a mock transport + runner.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-08 14:34:32 +02:00
parent 4d84207572
commit a042316d6d
24 changed files with 2240 additions and 0 deletions
+81
View File
@@ -0,0 +1,81 @@
package proxmox
import (
"context"
"errors"
"net/http"
"strings"
"testing"
"time"
)
const testUPID = "UPID:demo-felhom:00026454:004E3431:6A265E53:vzsnapshot:9001:root@pam:"
// fastWait keeps tests quick.
var fastWait = WaitOptions{Interval: time.Millisecond, MaxInterval: 2 * time.Millisecond, Timeout: time.Second}
func TestWaitTask_RunningThenOK(t *testing.T) {
var n int
d := &mockDoer{fn: func(r *http.Request) (*http.Response, error) {
n++
if n == 1 {
return jsonResp(200, `{"data":{"upid":"`+testUPID+`","status":"running"}}`), nil
}
return jsonResp(200, `{"data":{"upid":"`+testUPID+`","status":"stopped","exitstatus":"OK"}}`), nil
}}
st, err := newTestClient(d).WaitTask(context.Background(), testUPID, fastWait)
if err != nil {
t.Fatalf("WaitTask: %v", err)
}
if !st.OK() {
t.Errorf("status not OK: %+v", st)
}
}
func TestWaitTask_FailedSurfacesPrivilege(t *testing.T) {
// vzdump against an unauthorized vmid: 200+UPID, then the 403 in exitstatus.
d := &mockDoer{fn: func(r *http.Request) (*http.Response, error) {
if strings.Contains(r.URL.Path, "/log") {
return jsonResp(200, `{"data":[{"n":1,"t":"TASK ERROR: 403 Permission check failed (/vms/9000, VM.Backup)"}]}`), nil
}
return jsonResp(200, `{"data":{"upid":"`+testUPID+`","status":"stopped","exitstatus":"403 Permission check failed (/vms/9000, VM.Backup)"}}`), nil
}}
_, err := newTestClient(d).WaitTask(context.Background(), testUPID, fastWait)
var te *TaskError
if !errors.As(err, &te) {
t.Fatalf("want *TaskError, got %T: %v", err, err)
}
if te.Privilege != "VM.Backup" {
t.Errorf("privilege = %q, want VM.Backup", te.Privilege)
}
if te.DeniedPath != "/vms/9000" {
t.Errorf("denied path = %q", te.DeniedPath)
}
if len(te.LogTail) == 0 {
t.Errorf("expected a log tail")
}
}
func TestWaitTask_Timeout(t *testing.T) {
d := &mockDoer{fn: func(r *http.Request) (*http.Response, error) {
return jsonResp(200, `{"data":{"upid":"`+testUPID+`","status":"running"}}`), nil
}}
opts := WaitOptions{Interval: time.Millisecond, MaxInterval: time.Millisecond, Timeout: 30 * time.Millisecond}
_, err := newTestClient(d).WaitTask(context.Background(), testUPID, opts)
if err == nil || !errors.Is(err, context.DeadlineExceeded) {
t.Fatalf("want deadline-exceeded, got %v", err)
}
}
func TestWaitTask_CtxCancel(t *testing.T) {
d := &mockDoer{fn: func(r *http.Request) (*http.Response, error) {
return jsonResp(200, `{"data":{"upid":"`+testUPID+`","status":"running"}}`), nil
}}
ctx, cancel := context.WithCancel(context.Background())
go func() { time.Sleep(20 * time.Millisecond); cancel() }()
opts := WaitOptions{Interval: time.Millisecond, MaxInterval: time.Millisecond, Timeout: time.Minute}
_, err := newTestClient(d).WaitTask(ctx, testUPID, opts)
if err == nil || !errors.Is(err, context.Canceled) {
t.Fatalf("want canceled, got %v", err)
}
}