diff --git a/glance-system/idokep-proxy.yaml b/glance-system/idokep-proxy.yaml new file mode 100644 index 0000000..50ca6c5 --- /dev/null +++ b/glance-system/idokep-proxy.yaml @@ -0,0 +1,216 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: idokep-proxy + namespace: glance-system +data: + app.py: | + import os + import time + import re + from typing import Any, Dict, Optional, Tuple, List + + import requests + from bs4 import BeautifulSoup + from fastapi import FastAPI, Query + from fastapi.responses import JSONResponse + + app = FastAPI() + + IDOKEP_BASE = "https://www.idokep.hu" + DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VII. ker") + USER_AGENT = os.getenv( + "IDOKEP_UA", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", + ) + + # simple in-memory cache: {place: (expires_epoch, payload)} + CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "600")) # 10 min + _cache: Dict[str, Tuple[float, Dict[str, Any]]] = {} + + def _num(s: str) -> Optional[float]: + if s is None: + return None + m = re.search(r"-?\d+(\.\d+)?", s.replace(",", ".")) + return float(m.group(0)) if m else None + + def _abs_url(url: Optional[str]) -> Optional[str]: + if not url: + return None + if url.startswith("//"): + return "https:" + url + if url.startswith("/"): + return IDOKEP_BASE + url + return url + + def _pick_text(el) -> Optional[str]: + if not el: + return None + return el.get_text(" ", strip=True) + + def _fetch_place_html(place: str) -> str: + # Időkép uses the place slug in path; requests will percent-encode automatically if we build it carefully. + # We'll do a conservative encoding by replacing spaces with %20, keep UTF-8. + place_path = requests.utils.requote_uri(place) + url = f"{IDOKEP_BASE}/idojaras/{place_path}" + r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=15) + r.raise_for_status() + return r.text + + def _parse_idokep(html: str, place: str) -> Dict[str, Any]: + soup = BeautifulSoup(html, "lxml") + + # CURRENT + temp_el = soup.select_one(".current-temperature") + temp_c = _num(_pick_text(temp_el) or "") + + icon_el = soup.select_one(".forecast-bigicon") + icon_url = _abs_url(icon_el.get("src") if icon_el else None) + + cond_hu_el = soup.select_one(".weather-short-desc") + condition_hu = _pick_text(cond_hu_el) + + # HOURLY (take first 6) + hourly_cards = soup.select(".new-hourly-forecast-card") + hourly: List[Dict[str, Any]] = [] + for card in hourly_cards[:6]: + hour_el = card.select_one(".new-hourly-forecast-hour") + hour_txt = _pick_text(hour_el) + + htemp_el = card.select_one(".tempValue .hover-over") + htemp_c = _num(_pick_text(htemp_el) or "") + + hicon_el = card.select_one(".forecast-icon") + hicon_url = _abs_url(hicon_el.get("src") if hicon_el else None) + + hprec_el = card.select_one(".hourly-rain-chance a") + hprec_pct = _num(_pick_text(hprec_el) or "") + + hourly.append( + { + "hour": hour_txt, # e.g. "15:00" + "temp_c": htemp_c, + "icon_url": hicon_url, + "precip_pct": hprec_pct, + } + ) + + # DAILY (take next 5 columns; Időkép layout usually has an extra leading column, HA used nth-child(2) as day1) + daily_cols = soup.select(".dailyForecastCol") + daily: List[Dict[str, Any]] = [] + + # Skip first column if it looks like a header-ish column; keep behavior close to your HA selectors. + cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols + + for col in cols[:5]: + dow_el = col.select_one(".dfDay") + dow = _pick_text(dow_el) + + daynum_el = col.select_one(".dfDayNum") + daynum = _pick_text(daynum_el) + + dicon_el = col.select_one(".forecast") + dicon_url = _abs_url(dicon_el.get("src") if dicon_el else None) + + # max/min sometimes show in different wrappers; try a few options + max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)") + min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)") + tmax_c = _num(_pick_text(max_el) or "") + tmin_c = _num(_pick_text(min_el) or "") + + mm_el = col.select_one(".mm") + prec_mm = _num(_pick_text(mm_el) or "") + + daily.append( + { + "dow": dow, # e.g. "Sze" + "daynum": daynum, # e.g. "14" + "tmax_c": tmax_c, + "tmin_c": tmin_c, + "prec_mm": prec_mm, + "icon_url": dicon_url, + } + ) + + return { + "source": { + "name": "Időkép", + "url": f"{IDOKEP_BASE}/idojaras/{requests.utils.requote_uri(place)}", + }, + "place": place, + "current": { + "temp_c": temp_c, + "condition_hu": condition_hu, + "icon_url": icon_url, + }, + "hourly": hourly, + "daily": daily, + "fetched_at_unix": int(time.time()), + } + + @app.get("/api/idokep") + def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as used in the /idojaras/ URL")): + now = time.time() + cached = _cache.get(place) + if cached and cached[0] > now: + return JSONResponse(cached[1]) + + html = _fetch_place_html(place) + payload = _parse_idokep(html, place) + _cache[place] = (now + CACHE_TTL_SEC, payload) + return JSONResponse(payload) + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: idokep-proxy + namespace: glance-system +spec: + replicas: 1 + selector: + matchLabels: + app: idokep-proxy + template: + metadata: + labels: + app: idokep-proxy + spec: + containers: + - name: idokep-proxy + image: python:3.12-slim + ports: + - containerPort: 8000 + env: + - name: IDOKEP_PLACE + value: "Budapest VII. ker" + - name: CACHE_TTL_SEC + value: "900" # 15 minutes, matches your HA scan_interval philosophy + volumeMounts: + - name: app + mountPath: /app + workingDir: /app + command: ["/bin/sh","-lc"] + args: + - | + pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml && + uvicorn app:app --host 0.0.0.0 --port 8000 + volumes: + - name: app + configMap: + name: idokep-proxy + +--- +apiVersion: v1 +kind: Service +metadata: + name: idokep-proxy + namespace: glance-system +spec: + selector: + app: idokep-proxy + ports: + - name: http + port: 8000 + targetPort: 8000 + type: ClusterIP \ No newline at end of file