Files
felhom-agent/internal/reconcile/queue.go
T
admin 05c450147c v0.4.0-rc1: slice 4 Phase A — reconcile engine (structural, runs live unfed)
New internal/reconcile package: the agent-side control core's structural half.

- Per-guest serializer Queue (doc 03 §10): the single choke point all mutation
  sources funnel through; same-vmid serial in submit order, different vmids
  parallel (cond-var FIFO lanes).
- Desired-state model + DesiredProvider seam; EmptyProvider is the only live
  source at slice 4 (no hub serving until slice 10) so the live engine computes
  an empty action set and performs zero mutations.
- Normalization layer (FieldNormalizers): normalized desired-vs-actual so
  Proxmox round-trip quirks don't read as drift. normDesc promoted out of
  main.go to reconcile.NormDescription; selftest uses the shared helper.
- Plan (pure diff): minimal benign action set (Start/Stop/SetConfig) for guests
  in both desired and actual; provision/destroy out of scope here.
- Engine: dispatches onto the shared queue; honors the dual-mode SetConfig
  contract (UPID -> WaitTask; empty UPID -> synchronous success).
- Durable op journal + idempotency store (mirrors authz.FileNonceStore):
  in-flight task ids for crash detection + AlreadyApplied dedupe across restart.
- Wired into runDaemon alongside the hub loop, sharing the queue; runs cleanly
  with no desired state and no signers.

Full module race-clean and vet-clean on the Linux build server.

CHECKPOINT: Phase A only. Awaiting validation before Phase B (the reversibility
gate + signed-op consuming layer, landing v0.4.0).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 23:21:55 +02:00

138 lines
3.4 KiB
Go

package reconcile
import (
"errors"
"sync"
)
// ErrQueueClosed is returned for a job submitted after Close.
var ErrQueueClosed = errors.New("reconcile: queue closed")
// Queue is the per-guest serializer (doc 03 §10): the single choke point ALL
// mutation sources funnel through (reconcile now; one-shot jobs and the local API
// later). Jobs for the SAME vmid run strictly one-at-a-time in submit order;
// independent vmids proceed in parallel. This is what keeps Proxmox from ever seeing
// concurrent conflicting ops on one guest while letting a multi-guest host converge
// concurrently.
//
// Each vmid gets a "lane": a goroutine draining an unbounded FIFO of jobs. Submit
// never blocks the caller (the FIFO is unbounded) and returns a channel that delivers
// that job's error when it runs — so a caller can await a specific job while others
// proceed.
type Queue struct {
mu sync.Mutex
lanes map[int]*lane
closed bool
}
// NewQueue builds an empty queue. Lanes are created lazily on first Submit per vmid
// and live for the queue's lifetime (a host has a bounded guest set); Close stops them.
func NewQueue() *Queue {
return &Queue{lanes: make(map[int]*lane)}
}
// Submit enqueues fn on vmid's lane and returns a buffered channel that receives
// fn's return value (or ErrQueueClosed) exactly once. Submission preserves per-lane
// FIFO order; different lanes run concurrently. The result channel is buffered, so the
// lane never blocks delivering a result even if the caller abandons it.
func (q *Queue) Submit(vmid int, fn func() error) <-chan error {
res := make(chan error, 1)
q.mu.Lock()
if q.closed {
q.mu.Unlock()
res <- ErrQueueClosed
return res
}
l := q.lanes[vmid]
if l == nil {
l = newLane()
q.lanes[vmid] = l
go l.run()
}
q.mu.Unlock()
l.enqueue(&laneTask{fn: fn, res: res})
return res
}
// Close stops accepting new jobs and signals every lane to drain its pending jobs and
// exit. Already-queued jobs still run (graceful drain). Idempotent.
func (q *Queue) Close() {
q.mu.Lock()
if q.closed {
q.mu.Unlock()
return
}
q.closed = true
lanes := make([]*lane, 0, len(q.lanes))
for _, l := range q.lanes {
lanes = append(lanes, l)
}
q.mu.Unlock()
for _, l := range lanes {
l.close()
}
}
type laneTask struct {
fn func() error
res chan error
}
// lane is one vmid's serial worker: a goroutine draining an unbounded FIFO guarded by
// a cond var. cond-var (not a buffered channel) gives unbounded, non-blocking,
// order-preserving submission without an arbitrary capacity.
type lane struct {
mu sync.Mutex
cond *sync.Cond
pending []*laneTask
closed bool
}
func newLane() *lane {
l := &lane{}
l.cond = sync.NewCond(&l.mu)
return l
}
func (l *lane) enqueue(t *laneTask) {
l.mu.Lock()
if l.closed {
l.mu.Unlock()
t.res <- ErrQueueClosed
return
}
l.pending = append(l.pending, t)
l.cond.Signal()
l.mu.Unlock()
}
func (l *lane) close() {
l.mu.Lock()
l.closed = true
l.cond.Broadcast()
l.mu.Unlock()
}
// run drains the FIFO one job at a time. On close it finishes any already-queued jobs
// (so nothing submitted-before-close is silently dropped) and then exits.
func (l *lane) run() {
for {
l.mu.Lock()
for len(l.pending) == 0 && !l.closed {
l.cond.Wait()
}
if len(l.pending) == 0 { // closed and drained
l.mu.Unlock()
return
}
t := l.pending[0]
l.pending = l.pending[1:]
l.mu.Unlock()
t.res <- t.fn()
}
}