apiVersion: v1 kind: ConfigMap metadata: name: idokep-proxy namespace: glance-system data: app.py: | import os import time import re from typing import Any, Dict, Optional, Tuple, List import requests from bs4 import BeautifulSoup from fastapi import FastAPI, Query from fastapi.responses import JSONResponse app = FastAPI() IDOKEP_BASE = "https://www.idokep.hu" DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VII. ker") USER_AGENT = os.getenv( "IDOKEP_UA", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", ) # simple in-memory cache: {place: (expires_epoch, payload)} CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "600")) # 10 min _cache: Dict[str, Tuple[float, Dict[str, Any]]] = {} def _num(s: str) -> Optional[float]: if s is None: return None m = re.search(r"-?\d+(\.\d+)?", s.replace(",", ".")) return float(m.group(0)) if m else None def _abs_url(url: Optional[str]) -> Optional[str]: if not url: return None if url.startswith("//"): return "https:" + url if url.startswith("/"): return IDOKEP_BASE + url return url def _pick_text(el) -> Optional[str]: if not el: return None return el.get_text(" ", strip=True) def _fetch_place_html(place: str) -> str: # Időkép uses the place slug in path; requests will percent-encode automatically if we build it carefully. # We'll do a conservative encoding by replacing spaces with %20, keep UTF-8. place_path = requests.utils.requote_uri(place) url = f"{IDOKEP_BASE}/idojaras/{place_path}" r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=15) r.raise_for_status() return r.text def _parse_idokep(html: str, place: str) -> Dict[str, Any]: soup = BeautifulSoup(html, "lxml") # CURRENT temp_el = soup.select_one(".current-temperature") temp_c = _num(_pick_text(temp_el) or "") icon_el = soup.select_one(".forecast-bigicon") icon_url = _abs_url(icon_el.get("src") if icon_el else None) cond_hu_el = soup.select_one(".weather-short-desc") condition_hu = _pick_text(cond_hu_el) # HOURLY (take first 6) hourly_cards = soup.select(".new-hourly-forecast-card") hourly: List[Dict[str, Any]] = [] for card in hourly_cards[:6]: hour_el = card.select_one(".new-hourly-forecast-hour") hour_txt = _pick_text(hour_el) htemp_el = card.select_one(".tempValue .hover-over") htemp_c = _num(_pick_text(htemp_el) or "") hicon_el = card.select_one(".forecast-icon") hicon_url = _abs_url(hicon_el.get("src") if hicon_el else None) hprec_el = card.select_one(".hourly-rain-chance a") hprec_pct = _num(_pick_text(hprec_el) or "") hourly.append( { "hour": hour_txt, # e.g. "15:00" "temp_c": htemp_c, "icon_url": hicon_url, "precip_pct": hprec_pct, } ) # DAILY (take next 5 columns; Időkép layout usually has an extra leading column, HA used nth-child(2) as day1) daily_cols = soup.select(".dailyForecastCol") daily: List[Dict[str, Any]] = [] # Skip first column if it looks like a header-ish column; keep behavior close to your HA selectors. cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols for col in cols[:5]: dow_el = col.select_one(".dfDay") dow = _pick_text(dow_el) daynum_el = col.select_one(".dfDayNum") daynum = _pick_text(daynum_el) dicon_el = col.select_one(".forecast") dicon_url = _abs_url(dicon_el.get("src") if dicon_el else None) # max/min sometimes show in different wrappers; try a few options max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)") min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)") tmax_c = _num(_pick_text(max_el) or "") tmin_c = _num(_pick_text(min_el) or "") mm_el = col.select_one(".mm") prec_mm = _num(_pick_text(mm_el) or "") daily.append( { "dow": dow, # e.g. "Sze" "daynum": daynum, # e.g. "14" "tmax_c": tmax_c, "tmin_c": tmin_c, "prec_mm": prec_mm, "icon_url": dicon_url, } ) return { "source": { "name": "Időkép", "url": f"{IDOKEP_BASE}/idojaras/{requests.utils.requote_uri(place)}", }, "place": place, "current": { "temp_c": temp_c, "condition_hu": condition_hu, "icon_url": icon_url, }, "hourly": hourly, "daily": daily, "fetched_at_unix": int(time.time()), } @app.get("/api/idokep") def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as used in the /idojaras/ URL")): now = time.time() cached = _cache.get(place) if cached and cached[0] > now: return JSONResponse(cached[1]) html = _fetch_place_html(place) payload = _parse_idokep(html, place) _cache[place] = (now + CACHE_TTL_SEC, payload) return JSONResponse(payload) --- apiVersion: apps/v1 kind: Deployment metadata: name: idokep-proxy namespace: glance-system spec: replicas: 1 selector: matchLabels: app: idokep-proxy template: metadata: labels: app: idokep-proxy spec: containers: - name: idokep-proxy image: python:3.12-slim ports: - containerPort: 8000 env: - name: IDOKEP_PLACE value: "Budapest VII. ker" - name: CACHE_TTL_SEC value: "900" # 15 minutes, matches your HA scan_interval philosophy volumeMounts: - name: app mountPath: /app workingDir: /app command: ["/bin/sh","-lc"] args: - | pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml && uvicorn app:app --host 0.0.0.0 --port 8000 volumes: - name: app configMap: name: idokep-proxy --- apiVersion: v1 kind: Service metadata: name: idokep-proxy namespace: glance-system spec: selector: app: idokep-proxy ports: - name: http port: 8000 targetPort: 8000 type: ClusterIP