Files
homelab-manifests/glance-system/idokep-proxy.yaml
T
2026-01-14 15:33:20 +01:00

216 lines
6.5 KiB
YAML

apiVersion: v1
kind: ConfigMap
metadata:
name: idokep-proxy
namespace: glance-system
data:
app.py: |
import os
import time
import re
from typing import Any, Dict, Optional, Tuple, List
import requests
from bs4 import BeautifulSoup
from fastapi import FastAPI, Query
from fastapi.responses import JSONResponse
app = FastAPI()
IDOKEP_BASE = "https://www.idokep.hu"
DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VII. ker")
USER_AGENT = os.getenv(
"IDOKEP_UA",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
)
# simple in-memory cache: {place: (expires_epoch, payload)}
CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "600")) # 10 min
_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
def _num(s: str) -> Optional[float]:
if s is None:
return None
m = re.search(r"-?\d+(\.\d+)?", s.replace(",", "."))
return float(m.group(0)) if m else None
def _abs_url(url: Optional[str]) -> Optional[str]:
if not url:
return None
if url.startswith("//"):
return "https:" + url
if url.startswith("/"):
return IDOKEP_BASE + url
return url
def _pick_text(el) -> Optional[str]:
if not el:
return None
return el.get_text(" ", strip=True)
def _fetch_place_html(place: str) -> str:
# Időkép uses the place slug in path; requests will percent-encode automatically if we build it carefully.
# We'll do a conservative encoding by replacing spaces with %20, keep UTF-8.
place_path = requests.utils.requote_uri(place)
url = f"{IDOKEP_BASE}/idojaras/{place_path}"
r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=15)
r.raise_for_status()
return r.text
def _parse_idokep(html: str, place: str) -> Dict[str, Any]:
soup = BeautifulSoup(html, "lxml")
# CURRENT
temp_el = soup.select_one(".current-temperature")
temp_c = _num(_pick_text(temp_el) or "")
icon_el = soup.select_one(".forecast-bigicon")
icon_url = _abs_url(icon_el.get("src") if icon_el else None)
cond_hu_el = soup.select_one(".weather-short-desc")
condition_hu = _pick_text(cond_hu_el)
# HOURLY (take first 6)
hourly_cards = soup.select(".new-hourly-forecast-card")
hourly: List[Dict[str, Any]] = []
for card in hourly_cards[:6]:
hour_el = card.select_one(".new-hourly-forecast-hour")
hour_txt = _pick_text(hour_el)
htemp_el = card.select_one(".tempValue .hover-over")
htemp_c = _num(_pick_text(htemp_el) or "")
hicon_el = card.select_one(".forecast-icon")
hicon_url = _abs_url(hicon_el.get("src") if hicon_el else None)
hprec_el = card.select_one(".hourly-rain-chance a")
hprec_pct = _num(_pick_text(hprec_el) or "")
hourly.append(
{
"hour": hour_txt, # e.g. "15:00"
"temp_c": htemp_c,
"icon_url": hicon_url,
"precip_pct": hprec_pct,
}
)
# DAILY (take next 5 columns; Időkép layout usually has an extra leading column, HA used nth-child(2) as day1)
daily_cols = soup.select(".dailyForecastCol")
daily: List[Dict[str, Any]] = []
# Skip first column if it looks like a header-ish column; keep behavior close to your HA selectors.
cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols
for col in cols[:5]:
dow_el = col.select_one(".dfDay")
dow = _pick_text(dow_el)
daynum_el = col.select_one(".dfDayNum")
daynum = _pick_text(daynum_el)
dicon_el = col.select_one(".forecast")
dicon_url = _abs_url(dicon_el.get("src") if dicon_el else None)
# max/min sometimes show in different wrappers; try a few options
max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)")
min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)")
tmax_c = _num(_pick_text(max_el) or "")
tmin_c = _num(_pick_text(min_el) or "")
mm_el = col.select_one(".mm")
prec_mm = _num(_pick_text(mm_el) or "")
daily.append(
{
"dow": dow, # e.g. "Sze"
"daynum": daynum, # e.g. "14"
"tmax_c": tmax_c,
"tmin_c": tmin_c,
"prec_mm": prec_mm,
"icon_url": dicon_url,
}
)
return {
"source": {
"name": "Időkép",
"url": f"{IDOKEP_BASE}/idojaras/{requests.utils.requote_uri(place)}",
},
"place": place,
"current": {
"temp_c": temp_c,
"condition_hu": condition_hu,
"icon_url": icon_url,
},
"hourly": hourly,
"daily": daily,
"fetched_at_unix": int(time.time()),
}
@app.get("/api/idokep")
def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as used in the /idojaras/<place> URL")):
now = time.time()
cached = _cache.get(place)
if cached and cached[0] > now:
return JSONResponse(cached[1])
html = _fetch_place_html(place)
payload = _parse_idokep(html, place)
_cache[place] = (now + CACHE_TTL_SEC, payload)
return JSONResponse(payload)
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: idokep-proxy
namespace: glance-system
spec:
replicas: 1
selector:
matchLabels:
app: idokep-proxy
template:
metadata:
labels:
app: idokep-proxy
spec:
containers:
- name: idokep-proxy
image: python:3.12-slim
ports:
- containerPort: 8000
env:
- name: IDOKEP_PLACE
value: "Budapest VII. ker"
- name: CACHE_TTL_SEC
value: "900" # 15 minutes, matches your HA scan_interval philosophy
volumeMounts:
- name: app
mountPath: /app
workingDir: /app
command: ["/bin/sh","-lc"]
args:
- |
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml &&
uvicorn app:app --host 0.0.0.0 --port 8000
volumes:
- name: app
configMap:
name: idokep-proxy
---
apiVersion: v1
kind: Service
metadata:
name: idokep-proxy
namespace: glance-system
spec:
selector:
app: idokep-proxy
ports:
- name: http
port: 8000
targetPort: 8000
type: ClusterIP