slice 9: host-health view on the monitoring page (v0.39.0)

Add agentapi HostMetrics() + a thin /api/host-metrics proxy to the agent's
new GET /host/metrics, and a 'Szerver allapota (gazdagep)' card on the
monitoring page rendering host CPU%/load/mem/CPU-temp(n/a)/uptime + per-
storage capacity bars (thin-pool fill, disk temp/wear). Polls every 8s.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-10 16:16:15 +02:00
parent 4c9065381b
commit d8d1e17758
8 changed files with 406 additions and 35 deletions
+72
View File
@@ -267,6 +267,78 @@ func (c *Client) FormatDisk(ctx context.Context, device, fstype string) (FormatR
return out, nil
}
// ---- slice 9: host metrics (the customer host-health view) -------------------------------
// HostMetrics mirrors the agent's GET /host/metrics `host` block (shared HostMetrics wire shape).
// CPUTempC is a pointer so a host with no temp sensor is null ("n/a"), distinct from a real 0.
type HostMetrics struct {
Node string `json:"node"`
CPUPercent float64 `json:"cpu_percent"` // 0100
MemoryTotalBytes int64 `json:"memory_total_bytes"`
MemoryUsedBytes int64 `json:"memory_used_bytes"`
MemoryPercent float64 `json:"memory_percent"`
DiskTotalBytes int64 `json:"disk_total_bytes"` // host root fs
DiskUsedBytes int64 `json:"disk_used_bytes"`
DiskPercent float64 `json:"disk_percent"`
LoadAvg []string `json:"loadavg"`
UptimeSeconds int64 `json:"uptime_seconds"`
CPUTempC *int `json:"cpu_temp_c"` // °C or null ("n/a")
}
// ThinPoolFill mirrors the agent's lvmthin pool fill (a full thin-pool corrupts every guest on it).
type ThinPoolFill struct {
DataUsedFraction float64 `json:"data_used_fraction"`
MetadataUsedFraction *float64 `json:"metadata_used_fraction"`
}
// SmartSummary mirrors the agent's per-disk SMART health (only the fields the UI renders). Pointers
// are null when the device type does not expose that attribute.
type SmartSummary struct {
Health string `json:"health"` // PASSED | FAILING | UNKNOWN
TemperatureC *int `json:"temperature_c"`
PercentageUsed *int `json:"percentage_used"` // NVMe wear (%); null for SATA/USB
}
// StorageTarget mirrors the agent's GET /host/metrics storage_targets entry (the per-storage
// capacity + health the monitoring view renders). It is a SUBSET of the agent's wire shape — only
// the fields the UI reads; unknown JSON keys are ignored.
type StorageTarget struct {
Name string `json:"name"`
Type string `json:"type"`
State string `json:"state"`
Reachable bool `json:"reachable"`
TotalBytes int64 `json:"total_bytes"`
UsedBytes int64 `json:"used_bytes"`
AvailBytes int64 `json:"avail_bytes"`
UsedFraction float64 `json:"used_fraction"`
Content string `json:"content"`
MountPath string `json:"mount_path"`
ClassHint string `json:"class_hint"`
ThinPool *ThinPoolFill `json:"thin_pool,omitempty"`
Smart SmartSummary `json:"smart"`
}
// HostMetricsResponse mirrors the agent's GET /host/metrics payload (host-wide health + per-storage
// capacity). Host-wide and token-authed (one-customer-per-host); a fresh collect, not a snapshot.
type HostMetricsResponse struct {
VMID int `json:"vmid"`
Host HostMetrics `json:"host"`
StorageTargets []StorageTarget `json:"storage_targets"`
}
// HostMetrics calls GET /host/metrics and returns the host's live health + per-storage capacity.
func (c *Client) HostMetrics(ctx context.Context) (HostMetricsResponse, error) {
var out HostMetricsResponse
body, err := c.get(ctx, "/host/metrics")
if err != nil {
return out, err
}
if err := json.Unmarshal(body, &out); err != nil {
return out, fmt.Errorf("agentapi: decode /host/metrics: %w", err)
}
return out, nil
}
// get issues an authenticated GET and unwraps the {ok,data,error} envelope.
func (c *Client) get(ctx context.Context, path string) (json.RawMessage, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+path, nil)
@@ -0,0 +1,84 @@
package agentapi
import (
"context"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
// hostMetricsStub serves a GET /host/metrics payload with a populated host block (CPU temp set)
// and one storage target carrying a thin-pool + SMART temp.
func hostMetricsStub(cpuTempNull bool) (*httptest.Server, string) {
temp := `47`
if cpuTempNull {
temp = `null`
}
mux := http.NewServeMux()
mux.HandleFunc("GET /host/metrics", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`{"ok":true,"data":{
"vmid":8200,
"host":{"node":"demo-felhom","cpu_percent":12.5,"memory_total_bytes":17179869184,
"memory_used_bytes":4294967296,"memory_percent":25,"loadavg":["0.10","0.20","0.15"],
"uptime_seconds":86400,"cpu_temp_c":` + temp + `},
"storage_targets":[
{"name":"local-lvm","type":"lvmthin","state":"attached","reachable":true,
"total_bytes":100000000000,"used_bytes":42000000000,"used_fraction":0.42,
"thin_pool":{"data_used_fraction":0.42,"metadata_used_fraction":null},
"smart":{"health":"PASSED","temperature_c":38,"percentage_used":2}},
{"name":"usb-backup","type":"usb","state":"attached","reachable":true,
"total_bytes":2000000000000,"used_bytes":500000000000,"used_fraction":0.25,
"smart":{"health":"PASSED","temperature_c":null,"percentage_used":null}}
]}}`))
})
s := httptest.NewTLSServer(mux)
return s, strings.TrimPrefix(s.URL, "https://")
}
func TestHostMetrics_DecodesHostAndStorage(t *testing.T) {
s, ep := hostMetricsStub(false)
defer s.Close()
c := clientFor(t, s, ep)
resp, err := c.HostMetrics(context.Background())
if err != nil {
t.Fatal(err)
}
if resp.Host.Node != "demo-felhom" || resp.Host.CPUPercent != 12.5 {
t.Fatalf("host = %+v", resp.Host)
}
if resp.Host.CPUTempC == nil || *resp.Host.CPUTempC != 47 {
t.Fatalf("cpu_temp_c = %v, want 47", resp.Host.CPUTempC)
}
if len(resp.StorageTargets) != 2 {
t.Fatalf("storage targets = %d, want 2", len(resp.StorageTargets))
}
lvm := resp.StorageTargets[0]
if lvm.ThinPool == nil || lvm.ThinPool.DataUsedFraction != 0.42 {
t.Errorf("thin_pool = %+v", lvm.ThinPool)
}
if lvm.Smart.TemperatureC == nil || *lvm.Smart.TemperatureC != 38 {
t.Errorf("smart temp = %v, want 38", lvm.Smart.TemperatureC)
}
if lvm.Smart.PercentageUsed == nil || *lvm.Smart.PercentageUsed != 2 {
t.Errorf("smart wear = %v, want 2", lvm.Smart.PercentageUsed)
}
// USB drive: SMART temp/wear are null (USB bridge exposes no SMART) → graceful null.
if resp.StorageTargets[1].Smart.TemperatureC != nil {
t.Errorf("usb smart temp = %v, want nil", resp.StorageTargets[1].Smart.TemperatureC)
}
}
// A null cpu_temp_c decodes to a nil pointer (the UI renders "n/a").
func TestHostMetrics_NullCPUTemp(t *testing.T) {
s, ep := hostMetricsStub(true)
defer s.Close()
c := clientFor(t, s, ep)
resp, err := c.HostMetrics(context.Background())
if err != nil {
t.Fatal(err)
}
if resp.Host.CPUTempC != nil {
t.Fatalf("cpu_temp_c = %v, want nil (n/a)", resp.Host.CPUTempC)
}
}
@@ -0,0 +1,38 @@
package web
import (
"net/http"
)
// Agent-backed host metrics (slice 9).
//
// The de-privileged controller (slice 8C) sees only its own cgroup, so it cannot read host
// health itself. This thin proxy forwards GET /api/host-metrics to the agent's GET /host/metrics
// and returns the host-wide view (cpu%/mem/load/uptime/cpu-temp + per-storage capacity) for the
// monitoring page. It reuses the same pinned agentapi.Client + {ok,data,error} envelope as the
// disk proxy (agent_disk_handlers.go). Read-only; no CSRF mutation.
// ServeHostMetricsAPI proxies GET /api/host-metrics → agent GET /host/metrics.
// Wired in main.go behind RequireAuth.
func (s *Server) ServeHostMetricsAPI(w http.ResponseWriter, r *http.Request) {
if s.isDebug() {
s.logger.Printf("[DEBUG] [web] ServeHostMetricsAPI: %s %s from %s", r.Method, r.URL.Path, r.RemoteAddr)
}
if r.Method != http.MethodGet {
writeDiskJSON(w, http.StatusMethodNotAllowed, false, "method not allowed", nil)
return
}
client, err := s.agentClient()
if err != nil {
// Unprovisioned guest / no local API configured — the UI shows "host metrics unavailable".
writeDiskJSON(w, http.StatusServiceUnavailable, false, err.Error(), nil)
return
}
resp, err := client.HostMetrics(r.Context())
if err != nil {
s.logger.Printf("[ERROR] [web] host metrics via agent failed: %v", err)
writeDiskJSON(w, http.StatusBadGateway, false, err.Error(), nil)
return
}
writeDiskJSON(w, http.StatusOK, true, "", resp)
}
@@ -5,6 +5,44 @@
<h2>Rendszermonitor</h2>
</div>
<!-- Section 0: Host (Proxmox box) health — slice 9, served by the host agent over the local API.
The de-privileged controller can't read the host itself; this card shows the real box. -->
<div class="monitor-card" id="host-health-card">
<div class="monitor-card-header">
<h3>Szerver állapota (gazdagép)</h3>
<span class="sysinfo-value" id="host-health-updated" style="font-size:.75rem;color:var(--text-muted)"></span>
</div>
<div id="host-health-unavailable" class="monitoring-banner monitoring-banner-yellow" style="display:none">
A gazdagép metrikái jelenleg nem elérhetők.
</div>
<div id="host-health-body" style="display:none">
<div class="sysinfo-grid">
<div class="sysinfo-row">
<span class="sysinfo-label">CPU használat</span>
<span class="sysinfo-value" id="host-cpu"></span>
</div>
<div class="sysinfo-row">
<span class="sysinfo-label">Terhelés (load)</span>
<span class="sysinfo-value" id="host-load"></span>
</div>
<div class="sysinfo-row">
<span class="sysinfo-label">Memória</span>
<span class="sysinfo-value" id="host-mem"></span>
</div>
<div class="sysinfo-row">
<span class="sysinfo-label">CPU hőmérséklet</span>
<span class="sysinfo-value" id="host-temp"></span>
</div>
<div class="sysinfo-row">
<span class="sysinfo-label">Üzemidő</span>
<span class="sysinfo-value" id="host-uptime"></span>
</div>
</div>
<h4 style="margin:1rem 0 .5rem">Tárolók kapacitása</h4>
<div class="storage-bars" id="host-storage-bars"></div>
</div>
</div>
<!-- Section 1: System Overview -->
<div class="monitor-card">
<h3>Rendszer áttekintés</h3>
@@ -645,6 +683,118 @@
loadContainerDetail();
});
// =============================================
// HOST (PROXMOX BOX) HEALTH — slice 9, proxied from the host agent
// =============================================
function usageColorClass(pct) {
if (pct >= 85) return 'system-bar-red';
if (pct >= 70) return 'system-bar-yellow';
return 'system-bar-green';
}
function fmtBytesGB(bytes) {
if (!bytes || bytes <= 0) return '0 GB';
var gb = bytes / 1073741824;
if (gb >= 1024) return (gb / 1024).toFixed(2) + ' TB';
if (gb >= 100) return Math.round(gb) + ' GB';
return gb.toFixed(1) + ' GB';
}
function escapeHtml(s) {
return String(s).replace(/[&<>"']/g, function(c) {
return {'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c];
});
}
function showHostUnavailable() {
document.getElementById('host-health-unavailable').style.display = '';
document.getElementById('host-health-body').style.display = 'none';
document.getElementById('host-health-updated').textContent = '';
}
function renderHostHealth(d) {
var h = d.host || {};
document.getElementById('host-health-unavailable').style.display = 'none';
document.getElementById('host-health-body').style.display = '';
// CPU % + load average
document.getElementById('host-cpu').textContent =
(h.cpu_percent != null ? h.cpu_percent.toFixed(1) : '') + '%';
var load = Array.isArray(h.loadavg) ? h.loadavg.join(' ') : '';
document.getElementById('host-load').textContent = load || '';
// Memory used/total
if (h.memory_total_bytes > 0) {
document.getElementById('host-mem').textContent =
fmtBytesGB(h.memory_used_bytes) + ' / ' + fmtBytesGB(h.memory_total_bytes) +
' (' + Math.round(h.memory_percent || 0) + '%)';
} else {
document.getElementById('host-mem').textContent = '';
}
// CPU temp — null renders as "n/a" cleanly (graceful-null from the agent)
document.getElementById('host-temp').textContent =
(h.cpu_temp_c == null) ? 'n/a' : (h.cpu_temp_c + ' °C');
// Uptime (reuse the existing formatter)
document.getElementById('host-uptime').textContent =
(h.uptime_seconds > 0) ? formatUptime(h.uptime_seconds) : '';
// Per-storage capacity bars
var bars = document.getElementById('host-storage-bars');
var targets = Array.isArray(d.storage_targets) ? d.storage_targets : [];
if (!targets.length) {
bars.innerHTML = '<div class="sysinfo-value" style="color:var(--text-muted)">Nincs tároló adat.</div>';
} else {
var html = '';
targets.forEach(function(t) {
var label = escapeHtml(t.name || '') + (t.type ? ' (' + escapeHtml(t.type) + ')' : '');
if (t.state && t.state !== 'attached') {
html += '<div class="storage-item storage-disconnected">' +
'<div class="storage-header"><span class="storage-label">' + label + '</span>' +
'<span class="storage-value badge-error" style="font-size:.75rem">Nem elérhető</span></div>' +
'<div class="system-bar"><div class="system-bar-disconnected"></div></div></div>';
return;
}
var pct = (t.used_fraction != null ? t.used_fraction * 100 : 0);
// Extra detail: thin-pool fill (corrupts every guest if full) + disk temp/wear.
var extra = [];
if (t.thin_pool && t.thin_pool.data_used_fraction != null) {
extra.push('thin-pool: ' + Math.round(t.thin_pool.data_used_fraction * 100) + '%');
}
if (t.smart) {
if (t.smart.temperature_c != null) extra.push(t.smart.temperature_c + ' °C');
if (t.smart.percentage_used != null) extra.push('kopás: ' + t.smart.percentage_used + '%');
}
var extraHtml = extra.length ? ' <span style="color:var(--text-muted);font-size:.75rem">· ' + escapeHtml(extra.join(' · ')) + '</span>' : '';
html += '<div class="storage-item">' +
'<div class="storage-header">' +
'<span class="storage-label">' + label + extraHtml + '</span>' +
'<span class="storage-value">' + fmtBytesGB(t.used_bytes) + ' / ' + fmtBytesGB(t.total_bytes) +
' (' + Math.round(pct) + '%)</span></div>' +
'<div class="system-bar"><div class="system-bar-fill ' + usageColorClass(pct) +
'" style="width:' + Math.min(100, pct).toFixed(1) + '%"></div></div></div>';
});
bars.innerHTML = html;
}
var now = new Date();
document.getElementById('host-health-updated').textContent =
'Frissítve: ' + now.toLocaleTimeString('hu-HU', {timeZone: budaTZ, hour: '2-digit', minute: '2-digit', second: '2-digit'});
}
async function loadHostMetrics() {
try {
var resp = await fetch('/api/host-metrics');
var json = await resp.json();
if (!json.ok || !json.data) { showHostUnavailable(); return; }
renderHostHealth(json.data);
} catch(e) {
console.error('Failed to load host metrics:', e);
showHostUnavailable();
}
}
// =============================================
// STATIC SYSTEM INFO
// =============================================
@@ -693,6 +843,7 @@
loadSysInfo();
loadSystemMetrics();
loadContainerSummary();
loadHostMetrics();
// Auto-refresh every 60 seconds
setInterval(function() {
@@ -701,6 +852,9 @@
if (detailContainer) loadContainerDetail();
}, 60000);
// Host (Proxmox box) health is a live snapshot — poll it more often while the page is open.
setInterval(loadHostMetrics, 8000);
})();
</script>