Files
homelab-manifests/glance-system/idokep-proxy.yaml
T
2026-01-14 19:03:44 +01:00

238 lines
8.5 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: idokep-scraper
namespace: glance-system
spec:
replicas: 1
selector:
matchLabels:
app: idokep-scraper
template:
metadata:
labels:
app: idokep-scraper
spec:
containers:
- name: idokep-scraper
image: python:3.12-slim
imagePullPolicy: IfNotPresent
env:
- name: IDOKEP_URL
value: "https://www.idokep.hu/idojaras/Budapest%20VII.%20ker"
- name: PLACE_NAME
value: "Budapest VII. ker"
ports:
- containerPort: 8000
command: ["/bin/sh", "-lc"]
args:
- |
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client &&
python -c "import uvicorn; uvicorn.run('app:APP', host='0.0.0.0', port=8000)"
volumeMounts:
- name: app
mountPath: /app
workingDir: /app
volumes:
- name: app
configMap:
name: idokep-scraper-app
---
apiVersion: v1
kind: ConfigMap
metadata:
name: idokep-scraper-app
namespace: glance-system
data:
app.py: |
import os
import time
import re
from typing import List, Dict, Any, Optional
import requests
from bs4 import BeautifulSoup
from fastapi import FastAPI, Response
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
APP = FastAPI()
IDOKEP_URL = os.getenv(
"IDOKEP_URL",
"https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker",
)
PLACE_NAME = os.getenv("PLACE_NAME", "Budapest VIII. ker")
SOURCE_NAME = "Időkép"
UA = os.getenv(
"USER_AGENT",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari",
)
# Prometheus metrics (optional)
SCRAPES = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"])
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
CURRENT_TEMP = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
DAILY_TMIN = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"])
DAILY_TMAX = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
HOURLY_TEMP = Gauge("idokep_hourly_temp_c", "Hourly temperature in Celsius", ["place", "time"])
def _abs_url(maybe_relative: Optional[str]) -> Optional[str]:
if not maybe_relative:
return None
if maybe_relative.startswith("http://") or maybe_relative.startswith("https://"):
return maybe_relative
# Időkép uses /assets/... paths
return "https://www.idokep.hu" + maybe_relative
def _to_int_temp(s: str) -> Optional[float]:
if not s:
return None
s = s.strip().replace("˚C", "").replace("°C", "").replace("°", "")
try:
return float(s)
except Exception:
return None
def scrape() -> Dict[str, Any]:
headers = {"User-Agent": UA}
r = requests.get(IDOKEP_URL, headers=headers, timeout=15)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
# Current
cur_temp_el = soup.select_one(".current-temperature")
cur_cond_el = soup.select_one(".current-weather")
cur_icon_el = soup.select_one(".forecast-bigicon")
cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True) if cur_temp_el else "")
cur_cond = cur_cond_el.get_text(strip=True) if cur_cond_el else ""
cur_icon = _abs_url(cur_icon_el.get("src") if cur_icon_el else None)
# Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card)
hourly: List[Dict[str, Any]] = []
for card in soup.select(".ik.hourly-forecast-card")[:8]:
t_el = card.select_one(".ik.hourly-forecast-hour")
temp_el = card.select_one(".ik.temperature-circled")
icon_el = card.select_one("img.ik.forecast-icon")
t = t_el.get_text(strip=True) if t_el else ""
temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else "")
icon = _abs_url(icon_el.get("src") if icon_el else None)
if t and temp is not None:
hourly.append(
{
"time": t, # e.g. "18:00"
"temp_c": temp, # e.g. -2
"icon_url": icon, # absolute URL
}
)
# Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol)
daily: List[Dict[str, Any]] = []
for col in soup.select(".ik.daily-forecast-container .ik.dailyForecastCol")[:15]:
dow_el = col.select_one(".ik.dfDay")
icon_el = col.select_one("img.ik.forecast-icon")
daynum_el = col.select_one(".ik.dfDayNum")
# Normal structure (most days)
tmax_el = col.select_one("div.ik.max")
tmin_el = col.select_one("div.ik.min")
daynum = daynum_el.get_text(strip=True) if daynum_el else ""
dow = dow_el.get_text(strip=True) if dow_el else ""
icon = _abs_url(icon_el.get("src") if icon_el else None)
tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else "")
tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else "")
# Fallback structure (e.g. "vacation" days) where div.ik.max/min are missing
# In those cases the visible temps are usually the first two numeric <a> texts
# inside .ik.min-max-container (order: max, min).
if tmax is None or tmin is None:
vals: List[str] = []
for a in col.select(".ik.min-max-container a"):
txt = a.get_text(strip=True)
if re.fullmatch(r"-?\d+", txt or ""):
vals.append(txt)
if len(vals) >= 2:
tmax = _to_int_temp(vals[0])
tmin = _to_int_temp(vals[1])
# Keep only rows that look valid
if dow and (tmin is not None) and (tmax is not None):
daily.append(
{
"daynum": daynum,
"dow": dow, # e.g. "Cs", "P", "Sz"
"tmin_c": tmin,
"tmax_c": tmax,
"icon_url": icon,
}
)
# Limit to 5 days for your widget (first 5 columns in the table, including "vacation" days)
daily = daily[:5]
return {
"source": {"name": SOURCE_NAME, "url": IDOKEP_URL},
"location": {"name": PLACE_NAME},
"current": {"temp_c": cur_temp, "condition": cur_cond, "icon_url": cur_icon},
"hourly": hourly,
"daily": daily,
"fetched_at_unix": int(time.time()),
}
@APP.get("/api")
def api():
status = "ok"
with SCRAPE_SECONDS.labels(place=PLACE_NAME).time():
try:
data = scrape()
except Exception:
status = "error"
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
raise
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
# Update Prometheus gauges (best-effort)
try:
if data.get("current", {}).get("temp_c") is not None:
CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data["current"]["temp_c"]))
for d in data.get("daily", []):
DAILY_TMIN.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmin_c"]))
DAILY_TMAX.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmax_c"]))
for h in data.get("hourly", []):
HOURLY_TEMP.labels(place=PLACE_NAME, time=h["time"]).set(float(h["temp_c"]))
except Exception:
pass
# IMPORTANT: force JSON content-type so Glance exposes `.JSON`
import json
return Response(content=json.dumps(data, ensure_ascii=False), media_type="application/json; charset=utf-8")
@APP.get("/metrics")
def metrics():
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
---
apiVersion: v1
kind: Service
metadata:
name: idokep-scraper
namespace: glance-system
spec:
selector:
app: idokep-scraper
ports:
- name: http
port: 8000
targetPort: 8000