v0.4.0-rc1: slice 4 Phase A — reconcile engine (structural, runs live unfed)
New internal/reconcile package: the agent-side control core's structural half. - Per-guest serializer Queue (doc 03 §10): the single choke point all mutation sources funnel through; same-vmid serial in submit order, different vmids parallel (cond-var FIFO lanes). - Desired-state model + DesiredProvider seam; EmptyProvider is the only live source at slice 4 (no hub serving until slice 10) so the live engine computes an empty action set and performs zero mutations. - Normalization layer (FieldNormalizers): normalized desired-vs-actual so Proxmox round-trip quirks don't read as drift. normDesc promoted out of main.go to reconcile.NormDescription; selftest uses the shared helper. - Plan (pure diff): minimal benign action set (Start/Stop/SetConfig) for guests in both desired and actual; provision/destroy out of scope here. - Engine: dispatches onto the shared queue; honors the dual-mode SetConfig contract (UPID -> WaitTask; empty UPID -> synchronous success). - Durable op journal + idempotency store (mirrors authz.FileNonceStore): in-flight task ids for crash detection + AlreadyApplied dedupe across restart. - Wired into runDaemon alongside the hub loop, sharing the queue; runs cleanly with no desired state and no signers. Full module race-clean and vet-clean on the Linux build server. CHECKPOINT: Phase A only. Awaiting validation before Phase B (the reversibility gate + signed-op consuming layer, landing v0.4.0). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,137 @@
|
||||
package reconcile
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// ErrQueueClosed is returned for a job submitted after Close.
|
||||
var ErrQueueClosed = errors.New("reconcile: queue closed")
|
||||
|
||||
// Queue is the per-guest serializer (doc 03 §10): the single choke point ALL
|
||||
// mutation sources funnel through (reconcile now; one-shot jobs and the local API
|
||||
// later). Jobs for the SAME vmid run strictly one-at-a-time in submit order;
|
||||
// independent vmids proceed in parallel. This is what keeps Proxmox from ever seeing
|
||||
// concurrent conflicting ops on one guest while letting a multi-guest host converge
|
||||
// concurrently.
|
||||
//
|
||||
// Each vmid gets a "lane": a goroutine draining an unbounded FIFO of jobs. Submit
|
||||
// never blocks the caller (the FIFO is unbounded) and returns a channel that delivers
|
||||
// that job's error when it runs — so a caller can await a specific job while others
|
||||
// proceed.
|
||||
type Queue struct {
|
||||
mu sync.Mutex
|
||||
lanes map[int]*lane
|
||||
closed bool
|
||||
}
|
||||
|
||||
// NewQueue builds an empty queue. Lanes are created lazily on first Submit per vmid
|
||||
// and live for the queue's lifetime (a host has a bounded guest set); Close stops them.
|
||||
func NewQueue() *Queue {
|
||||
return &Queue{lanes: make(map[int]*lane)}
|
||||
}
|
||||
|
||||
// Submit enqueues fn on vmid's lane and returns a buffered channel that receives
|
||||
// fn's return value (or ErrQueueClosed) exactly once. Submission preserves per-lane
|
||||
// FIFO order; different lanes run concurrently. The result channel is buffered, so the
|
||||
// lane never blocks delivering a result even if the caller abandons it.
|
||||
func (q *Queue) Submit(vmid int, fn func() error) <-chan error {
|
||||
res := make(chan error, 1)
|
||||
|
||||
q.mu.Lock()
|
||||
if q.closed {
|
||||
q.mu.Unlock()
|
||||
res <- ErrQueueClosed
|
||||
return res
|
||||
}
|
||||
l := q.lanes[vmid]
|
||||
if l == nil {
|
||||
l = newLane()
|
||||
q.lanes[vmid] = l
|
||||
go l.run()
|
||||
}
|
||||
q.mu.Unlock()
|
||||
|
||||
l.enqueue(&laneTask{fn: fn, res: res})
|
||||
return res
|
||||
}
|
||||
|
||||
// Close stops accepting new jobs and signals every lane to drain its pending jobs and
|
||||
// exit. Already-queued jobs still run (graceful drain). Idempotent.
|
||||
func (q *Queue) Close() {
|
||||
q.mu.Lock()
|
||||
if q.closed {
|
||||
q.mu.Unlock()
|
||||
return
|
||||
}
|
||||
q.closed = true
|
||||
lanes := make([]*lane, 0, len(q.lanes))
|
||||
for _, l := range q.lanes {
|
||||
lanes = append(lanes, l)
|
||||
}
|
||||
q.mu.Unlock()
|
||||
|
||||
for _, l := range lanes {
|
||||
l.close()
|
||||
}
|
||||
}
|
||||
|
||||
type laneTask struct {
|
||||
fn func() error
|
||||
res chan error
|
||||
}
|
||||
|
||||
// lane is one vmid's serial worker: a goroutine draining an unbounded FIFO guarded by
|
||||
// a cond var. cond-var (not a buffered channel) gives unbounded, non-blocking,
|
||||
// order-preserving submission without an arbitrary capacity.
|
||||
type lane struct {
|
||||
mu sync.Mutex
|
||||
cond *sync.Cond
|
||||
pending []*laneTask
|
||||
closed bool
|
||||
}
|
||||
|
||||
func newLane() *lane {
|
||||
l := &lane{}
|
||||
l.cond = sync.NewCond(&l.mu)
|
||||
return l
|
||||
}
|
||||
|
||||
func (l *lane) enqueue(t *laneTask) {
|
||||
l.mu.Lock()
|
||||
if l.closed {
|
||||
l.mu.Unlock()
|
||||
t.res <- ErrQueueClosed
|
||||
return
|
||||
}
|
||||
l.pending = append(l.pending, t)
|
||||
l.cond.Signal()
|
||||
l.mu.Unlock()
|
||||
}
|
||||
|
||||
func (l *lane) close() {
|
||||
l.mu.Lock()
|
||||
l.closed = true
|
||||
l.cond.Broadcast()
|
||||
l.mu.Unlock()
|
||||
}
|
||||
|
||||
// run drains the FIFO one job at a time. On close it finishes any already-queued jobs
|
||||
// (so nothing submitted-before-close is silently dropped) and then exits.
|
||||
func (l *lane) run() {
|
||||
for {
|
||||
l.mu.Lock()
|
||||
for len(l.pending) == 0 && !l.closed {
|
||||
l.cond.Wait()
|
||||
}
|
||||
if len(l.pending) == 0 { // closed and drained
|
||||
l.mu.Unlock()
|
||||
return
|
||||
}
|
||||
t := l.pending[0]
|
||||
l.pending = l.pending[1:]
|
||||
l.mu.Unlock()
|
||||
|
||||
t.res <- t.fn()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user