Files
homelab-manifests/glance-system/idokep-proxy.yaml
T
2026-01-14 15:40:16 +01:00

283 lines
9.2 KiB
YAML

apiVersion: v1
kind: ConfigMap
metadata:
name: idokep-proxy
namespace: glance-system
data:
app.py: |
import os
import time
import re
from typing import Any, Dict, Optional, Tuple, List
import requests
from bs4 import BeautifulSoup
from fastapi import FastAPI, Query, Response
from fastapi.responses import JSONResponse
from prometheus_client import Gauge, Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
app = FastAPI()
IDOKEP_BASE = "https://www.idokep.hu"
DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VIII. ker")
USER_AGENT = os.getenv(
"IDOKEP_UA",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
)
CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "900")) # 15 minutes
_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
# --- Prometheus metrics (low-cardinality, place as label) ---
SCRAPES_TOTAL = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"])
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
CURRENT_TEMP_C = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
DAILY_TMIN_C = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"])
DAILY_TMAX_C = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
DAILY_PREC_MM = Gauge("idokep_daily_precip_mm", "Daily precipitation in mm", ["place", "dow"])
def _num(s: str) -> Optional[float]:
if s is None:
return None
m = re.search(r"-?\d+(\.\d+)?", s.replace(",", "."))
return float(m.group(0)) if m else None
def _abs_url(url: Optional[str]) -> Optional[str]:
if not url:
return None
if url.startswith("//"):
return "https:" + url
if url.startswith("/"):
return IDOKEP_BASE + url
return url
def _pick_text(el) -> Optional[str]:
if not el:
return None
return el.get_text(" ", strip=True)
def _fetch_place_html(place: str) -> str:
place_path = requests.utils.requote_uri(place)
url = f"{IDOKEP_BASE}/idojaras/{place_path}"
r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=20)
r.raise_for_status()
return r.text
def _parse_idokep(html: str, place: str) -> Dict[str, Any]:
soup = BeautifulSoup(html, "lxml")
# CURRENT
temp_el = soup.select_one(".current-temperature")
temp_c = _num(_pick_text(temp_el) or "")
icon_el = soup.select_one(".forecast-bigicon")
icon_url = _abs_url(icon_el.get("src") if icon_el else None)
cond_hu_el = soup.select_one(".weather-short-desc")
condition_hu = _pick_text(cond_hu_el)
# HOURLY (first 6)
hourly_cards = soup.select(".new-hourly-forecast-card")
hourly: List[Dict[str, Any]] = []
for card in hourly_cards[:6]:
hour_txt = _pick_text(card.select_one(".new-hourly-forecast-hour"))
htemp_c = _num(_pick_text(card.select_one(".tempValue .hover-over")) or "")
hicon_url = _abs_url((card.select_one(".forecast-icon") or {}).get("src")) if card.select_one(".forecast-icon") else None
hprec_pct = _num(_pick_text(card.select_one(".hourly-rain-chance a")) or "")
hourly.append(
{
"hour": hour_txt, # e.g. "15:00"
"temp_c": htemp_c,
"icon_url": hicon_url,
"precip_pct": hprec_pct,
}
)
# DAILY (next 5, skip first like your HA template did)
daily_cols = soup.select(".dailyForecastCol")
cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols
daily_raw: List[Dict[str, Any]] = []
for col in cols[:5]:
dow = _pick_text(col.select_one(".dfDay"))
daynum = _pick_text(col.select_one(".dfDayNum"))
dicon_url = _abs_url((col.select_one(".forecast") or {}).get("src")) if col.select_one(".forecast") else None
# various layouts: try a few
max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)")
min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)")
tmax_c = _num(_pick_text(max_el) or "")
tmin_c = _num(_pick_text(min_el) or "")
prec_mm = _num(_pick_text(col.select_one(".mm")) or "")
daily_raw.append(
{
"dow": dow, # e.g. "Sze"
"daynum": daynum, # e.g. "14"
"tmax_c": tmax_c,
"tmin_c": tmin_c,
"prec_mm": prec_mm,
"icon_url": dicon_url,
}
)
# Compute weekly min/max for HA-like bars (left/width)
mins = [d["tmin_c"] for d in daily_raw if d.get("tmin_c") is not None]
maxs = [d["tmax_c"] for d in daily_raw if d.get("tmax_c") is not None]
week_min = min(mins) if mins else None
week_max = max(maxs) if maxs else None
denom = (week_max - week_min) if (week_min is not None and week_max is not None and week_max != week_min) else None
daily: List[Dict[str, Any]] = []
for d in daily_raw:
left = None
width = None
if denom is not None and d.get("tmin_c") is not None and d.get("tmax_c") is not None:
left = ((d["tmin_c"] - week_min) / denom) * 100.0
width = ((d["tmax_c"] - d["tmin_c"]) / denom) * 100.0
# clamp for safety
left = max(0.0, min(100.0, left))
width = max(1.0, min(100.0, width))
d2 = dict(d)
d2["bar_left_pct"] = left
d2["bar_width_pct"] = width
daily.append(d2)
return {
"source": {
"name": "Időkép",
"url": f"{IDOKEP_BASE}/idojaras/{requests.utils.requote_uri(place)}",
},
"place": place,
"current": {
"temp_c": temp_c,
"condition_hu": condition_hu,
"icon_url": icon_url,
},
"hourly": hourly,
"daily": daily,
"weekly": {
"tmin_c": week_min,
"tmax_c": week_max,
},
"fetched_at_unix": int(time.time()),
}
@app.get("/api/idokep")
def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as in /idojaras/<place>")):
now = time.time()
cached = _cache.get(place)
if cached and cached[0] > now:
return JSONResponse(cached[1])
with SCRAPE_SECONDS.labels(place=place).time():
try:
html = _fetch_place_html(place)
payload = _parse_idokep(html, place)
_cache[place] = (now + CACHE_TTL_SEC, payload)
# update metrics (best-effort)
t = payload.get("current", {}).get("temp_c")
if t is not None:
CURRENT_TEMP_C.labels(place=place).set(float(t))
for d in payload.get("daily", []):
dow = d.get("dow") or "?"
if d.get("tmin_c") is not None:
DAILY_TMIN_C.labels(place=place, dow=dow).set(float(d["tmin_c"]))
if d.get("tmax_c") is not None:
DAILY_TMAX_C.labels(place=place, dow=dow).set(float(d["tmax_c"]))
if d.get("prec_mm") is not None:
DAILY_PREC_MM.labels(place=place, dow=dow).set(float(d["prec_mm"]))
SCRAPES_TOTAL.labels(place=place, status="ok").inc()
return JSONResponse(payload)
except Exception:
SCRAPES_TOTAL.labels(place=place, status="error").inc()
# return a structured error Glance can show
return JSONResponse(
{
"place": place,
"error": "Failed to scrape Időkép. Check the place string or Időkép page layout changes.",
"fetched_at_unix": int(time.time()),
},
status_code=502,
)
@app.get("/metrics")
def metrics():
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: idokep-proxy
namespace: glance-system
spec:
replicas: 1
selector:
matchLabels:
app: idokep-proxy
template:
metadata:
labels:
app: idokep-proxy
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
prometheus.io/path: "/metrics"
spec:
containers:
- name: idokep-proxy
image: python:3.12-slim
ports:
- containerPort: 8000
env:
- name: IDOKEP_PLACE
value: "Budapest VIII. ker"
- name: CACHE_TTL_SEC
value: "900"
resources:
requests:
cpu: 25m
memory: 128Mi
limits:
memory: 256Mi
volumeMounts:
- name: app
mountPath: /app
workingDir: /app
command: ["/bin/sh","-lc"]
args:
- |
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml prometheus_client &&
uvicorn app:app --host 0.0.0.0 --port 8000
volumes:
- name: app
configMap:
name: idokep-proxy
---
apiVersion: v1
kind: Service
metadata:
name: idokep-proxy
namespace: glance-system
spec:
selector:
app: idokep-proxy
ports:
- name: http
port: 8000
targetPort: 8000
type: ClusterIP