05c450147c
New internal/reconcile package: the agent-side control core's structural half. - Per-guest serializer Queue (doc 03 §10): the single choke point all mutation sources funnel through; same-vmid serial in submit order, different vmids parallel (cond-var FIFO lanes). - Desired-state model + DesiredProvider seam; EmptyProvider is the only live source at slice 4 (no hub serving until slice 10) so the live engine computes an empty action set and performs zero mutations. - Normalization layer (FieldNormalizers): normalized desired-vs-actual so Proxmox round-trip quirks don't read as drift. normDesc promoted out of main.go to reconcile.NormDescription; selftest uses the shared helper. - Plan (pure diff): minimal benign action set (Start/Stop/SetConfig) for guests in both desired and actual; provision/destroy out of scope here. - Engine: dispatches onto the shared queue; honors the dual-mode SetConfig contract (UPID -> WaitTask; empty UPID -> synchronous success). - Durable op journal + idempotency store (mirrors authz.FileNonceStore): in-flight task ids for crash detection + AlreadyApplied dedupe across restart. - Wired into runDaemon alongside the hub loop, sharing the queue; runs cleanly with no desired state and no signers. Full module race-clean and vet-clean on the Linux build server. CHECKPOINT: Phase A only. Awaiting validation before Phase B (the reversibility gate + signed-op consuming layer, landing v0.4.0). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
138 lines
3.4 KiB
Go
138 lines
3.4 KiB
Go
package reconcile
|
|
|
|
import (
|
|
"errors"
|
|
"sync"
|
|
)
|
|
|
|
// ErrQueueClosed is returned for a job submitted after Close.
|
|
var ErrQueueClosed = errors.New("reconcile: queue closed")
|
|
|
|
// Queue is the per-guest serializer (doc 03 §10): the single choke point ALL
|
|
// mutation sources funnel through (reconcile now; one-shot jobs and the local API
|
|
// later). Jobs for the SAME vmid run strictly one-at-a-time in submit order;
|
|
// independent vmids proceed in parallel. This is what keeps Proxmox from ever seeing
|
|
// concurrent conflicting ops on one guest while letting a multi-guest host converge
|
|
// concurrently.
|
|
//
|
|
// Each vmid gets a "lane": a goroutine draining an unbounded FIFO of jobs. Submit
|
|
// never blocks the caller (the FIFO is unbounded) and returns a channel that delivers
|
|
// that job's error when it runs — so a caller can await a specific job while others
|
|
// proceed.
|
|
type Queue struct {
|
|
mu sync.Mutex
|
|
lanes map[int]*lane
|
|
closed bool
|
|
}
|
|
|
|
// NewQueue builds an empty queue. Lanes are created lazily on first Submit per vmid
|
|
// and live for the queue's lifetime (a host has a bounded guest set); Close stops them.
|
|
func NewQueue() *Queue {
|
|
return &Queue{lanes: make(map[int]*lane)}
|
|
}
|
|
|
|
// Submit enqueues fn on vmid's lane and returns a buffered channel that receives
|
|
// fn's return value (or ErrQueueClosed) exactly once. Submission preserves per-lane
|
|
// FIFO order; different lanes run concurrently. The result channel is buffered, so the
|
|
// lane never blocks delivering a result even if the caller abandons it.
|
|
func (q *Queue) Submit(vmid int, fn func() error) <-chan error {
|
|
res := make(chan error, 1)
|
|
|
|
q.mu.Lock()
|
|
if q.closed {
|
|
q.mu.Unlock()
|
|
res <- ErrQueueClosed
|
|
return res
|
|
}
|
|
l := q.lanes[vmid]
|
|
if l == nil {
|
|
l = newLane()
|
|
q.lanes[vmid] = l
|
|
go l.run()
|
|
}
|
|
q.mu.Unlock()
|
|
|
|
l.enqueue(&laneTask{fn: fn, res: res})
|
|
return res
|
|
}
|
|
|
|
// Close stops accepting new jobs and signals every lane to drain its pending jobs and
|
|
// exit. Already-queued jobs still run (graceful drain). Idempotent.
|
|
func (q *Queue) Close() {
|
|
q.mu.Lock()
|
|
if q.closed {
|
|
q.mu.Unlock()
|
|
return
|
|
}
|
|
q.closed = true
|
|
lanes := make([]*lane, 0, len(q.lanes))
|
|
for _, l := range q.lanes {
|
|
lanes = append(lanes, l)
|
|
}
|
|
q.mu.Unlock()
|
|
|
|
for _, l := range lanes {
|
|
l.close()
|
|
}
|
|
}
|
|
|
|
type laneTask struct {
|
|
fn func() error
|
|
res chan error
|
|
}
|
|
|
|
// lane is one vmid's serial worker: a goroutine draining an unbounded FIFO guarded by
|
|
// a cond var. cond-var (not a buffered channel) gives unbounded, non-blocking,
|
|
// order-preserving submission without an arbitrary capacity.
|
|
type lane struct {
|
|
mu sync.Mutex
|
|
cond *sync.Cond
|
|
pending []*laneTask
|
|
closed bool
|
|
}
|
|
|
|
func newLane() *lane {
|
|
l := &lane{}
|
|
l.cond = sync.NewCond(&l.mu)
|
|
return l
|
|
}
|
|
|
|
func (l *lane) enqueue(t *laneTask) {
|
|
l.mu.Lock()
|
|
if l.closed {
|
|
l.mu.Unlock()
|
|
t.res <- ErrQueueClosed
|
|
return
|
|
}
|
|
l.pending = append(l.pending, t)
|
|
l.cond.Signal()
|
|
l.mu.Unlock()
|
|
}
|
|
|
|
func (l *lane) close() {
|
|
l.mu.Lock()
|
|
l.closed = true
|
|
l.cond.Broadcast()
|
|
l.mu.Unlock()
|
|
}
|
|
|
|
// run drains the FIFO one job at a time. On close it finishes any already-queued jobs
|
|
// (so nothing submitted-before-close is silently dropped) and then exits.
|
|
func (l *lane) run() {
|
|
for {
|
|
l.mu.Lock()
|
|
for len(l.pending) == 0 && !l.closed {
|
|
l.cond.Wait()
|
|
}
|
|
if len(l.pending) == 0 { // closed and drained
|
|
l.mu.Unlock()
|
|
return
|
|
}
|
|
t := l.pending[0]
|
|
l.pending = l.pending[1:]
|
|
l.mu.Unlock()
|
|
|
|
t.res <- t.fn()
|
|
}
|
|
}
|