apiVersion: v1 kind: ConfigMap metadata: name: idokep-proxy namespace: glance-system data: app.py: | import os import time import re from typing import Any, Dict, Optional, Tuple, List import requests from bs4 import BeautifulSoup from fastapi import FastAPI, Query, Response from fastapi.responses import JSONResponse from prometheus_client import Gauge, Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST app = FastAPI() IDOKEP_BASE = "https://www.idokep.hu" DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VIII. ker") USER_AGENT = os.getenv( "IDOKEP_UA", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", ) CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "900")) # 15 minutes _cache: Dict[str, Tuple[float, Dict[str, Any]]] = {} # --- Prometheus metrics (low-cardinality, place as label) --- SCRAPES_TOTAL = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"]) SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"]) CURRENT_TEMP_C = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"]) DAILY_TMIN_C = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"]) DAILY_TMAX_C = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"]) DAILY_PREC_MM = Gauge("idokep_daily_precip_mm", "Daily precipitation in mm", ["place", "dow"]) def _num(s: str) -> Optional[float]: if s is None: return None m = re.search(r"-?\d+(\.\d+)?", s.replace(",", ".")) return float(m.group(0)) if m else None def _abs_url(url: Optional[str]) -> Optional[str]: if not url: return None if url.startswith("//"): return "https:" + url if url.startswith("/"): return IDOKEP_BASE + url return url def _pick_text(el) -> Optional[str]: if not el: return None return el.get_text(" ", strip=True) def _fetch_place_html(place: str) -> str: place_path = requests.utils.requote_uri(place) url = f"{IDOKEP_BASE}/idojaras/{place_path}" r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=20) r.raise_for_status() return r.text def _parse_idokep(html: str, place: str) -> Dict[str, Any]: soup = BeautifulSoup(html, "lxml") # CURRENT temp_el = soup.select_one(".current-temperature") temp_c = _num(_pick_text(temp_el) or "") icon_el = soup.select_one(".forecast-bigicon") icon_url = _abs_url(icon_el.get("src") if icon_el else None) cond_hu_el = soup.select_one(".weather-short-desc") condition_hu = _pick_text(cond_hu_el) # HOURLY (first 6) hourly_cards = soup.select(".new-hourly-forecast-card") hourly: List[Dict[str, Any]] = [] for card in hourly_cards[:6]: hour_txt = _pick_text(card.select_one(".new-hourly-forecast-hour")) htemp_c = _num(_pick_text(card.select_one(".tempValue .hover-over")) or "") hicon_url = _abs_url((card.select_one(".forecast-icon") or {}).get("src")) if card.select_one(".forecast-icon") else None hprec_pct = _num(_pick_text(card.select_one(".hourly-rain-chance a")) or "") hourly.append( { "hour": hour_txt, # e.g. "15:00" "temp_c": htemp_c, "icon_url": hicon_url, "precip_pct": hprec_pct, } ) # DAILY (next 5, skip first like your HA template did) daily_cols = soup.select(".dailyForecastCol") cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols daily_raw: List[Dict[str, Any]] = [] for col in cols[:5]: dow = _pick_text(col.select_one(".dfDay")) daynum = _pick_text(col.select_one(".dfDayNum")) dicon_url = _abs_url((col.select_one(".forecast") or {}).get("src")) if col.select_one(".forecast") else None # various layouts: try a few max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)") min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)") tmax_c = _num(_pick_text(max_el) or "") tmin_c = _num(_pick_text(min_el) or "") prec_mm = _num(_pick_text(col.select_one(".mm")) or "") daily_raw.append( { "dow": dow, # e.g. "Sze" "daynum": daynum, # e.g. "14" "tmax_c": tmax_c, "tmin_c": tmin_c, "prec_mm": prec_mm, "icon_url": dicon_url, } ) # Compute weekly min/max for HA-like bars (left/width) mins = [d["tmin_c"] for d in daily_raw if d.get("tmin_c") is not None] maxs = [d["tmax_c"] for d in daily_raw if d.get("tmax_c") is not None] week_min = min(mins) if mins else None week_max = max(maxs) if maxs else None denom = (week_max - week_min) if (week_min is not None and week_max is not None and week_max != week_min) else None daily: List[Dict[str, Any]] = [] for d in daily_raw: left = None width = None if denom is not None and d.get("tmin_c") is not None and d.get("tmax_c") is not None: left = ((d["tmin_c"] - week_min) / denom) * 100.0 width = ((d["tmax_c"] - d["tmin_c"]) / denom) * 100.0 # clamp for safety left = max(0.0, min(100.0, left)) width = max(1.0, min(100.0, width)) d2 = dict(d) d2["bar_left_pct"] = left d2["bar_width_pct"] = width daily.append(d2) return { "source": { "name": "Időkép", "url": f"{IDOKEP_BASE}/idojaras/{requests.utils.requote_uri(place)}", }, "place": place, "current": { "temp_c": temp_c, "condition_hu": condition_hu, "icon_url": icon_url, }, "hourly": hourly, "daily": daily, "weekly": { "tmin_c": week_min, "tmax_c": week_max, }, "fetched_at_unix": int(time.time()), } @app.get("/api/idokep") def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as in /idojaras/")): now = time.time() cached = _cache.get(place) if cached and cached[0] > now: return JSONResponse(cached[1]) with SCRAPE_SECONDS.labels(place=place).time(): try: html = _fetch_place_html(place) payload = _parse_idokep(html, place) _cache[place] = (now + CACHE_TTL_SEC, payload) # update metrics (best-effort) t = payload.get("current", {}).get("temp_c") if t is not None: CURRENT_TEMP_C.labels(place=place).set(float(t)) for d in payload.get("daily", []): dow = d.get("dow") or "?" if d.get("tmin_c") is not None: DAILY_TMIN_C.labels(place=place, dow=dow).set(float(d["tmin_c"])) if d.get("tmax_c") is not None: DAILY_TMAX_C.labels(place=place, dow=dow).set(float(d["tmax_c"])) if d.get("prec_mm") is not None: DAILY_PREC_MM.labels(place=place, dow=dow).set(float(d["prec_mm"])) SCRAPES_TOTAL.labels(place=place, status="ok").inc() return JSONResponse(payload) except Exception: SCRAPES_TOTAL.labels(place=place, status="error").inc() # return a structured error Glance can show return JSONResponse( { "place": place, "error": "Failed to scrape Időkép. Check the place string or Időkép page layout changes.", "fetched_at_unix": int(time.time()), }, status_code=502, ) @app.get("/metrics") def metrics(): return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) --- apiVersion: apps/v1 kind: Deployment metadata: name: idokep-proxy namespace: glance-system spec: replicas: 1 selector: matchLabels: app: idokep-proxy template: metadata: labels: app: idokep-proxy annotations: prometheus.io/scrape: "true" prometheus.io/port: "8000" prometheus.io/path: "/metrics" spec: containers: - name: idokep-proxy image: python:3.12-slim ports: - containerPort: 8000 env: - name: IDOKEP_PLACE value: "Budapest VIII. ker" - name: CACHE_TTL_SEC value: "900" resources: requests: cpu: 25m memory: 128Mi limits: memory: 256Mi volumeMounts: - name: app mountPath: /app workingDir: /app command: ["/bin/sh","-lc"] args: - | pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml prometheus_client && uvicorn app:app --host 0.0.0.0 --port 8000 volumes: - name: app configMap: name: idokep-proxy --- apiVersion: v1 kind: Service metadata: name: idokep-proxy namespace: glance-system spec: selector: app: idokep-proxy ports: - name: http port: 8000 targetPort: 8000 type: ClusterIP