apiVersion: apps/v1 kind: Deployment metadata: name: idokep-scraper namespace: glance-system spec: replicas: 1 selector: matchLabels: app: idokep-scraper template: metadata: labels: app: idokep-scraper spec: containers: - name: idokep-scraper image: python:3.12-slim imagePullPolicy: IfNotPresent env: - name: IDOKEP_URL value: "https://www.idokep.hu/idojaras/Budapest%20VII.%20ker" - name: PLACE_NAME value: "Budapest VII. ker" ports: - containerPort: 8000 command: ["/bin/sh", "-lc"] args: - | pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client && python -c "import uvicorn; uvicorn.run('app:APP', host='0.0.0.0', port=8000)" volumeMounts: - name: app mountPath: /app workingDir: /app volumes: - name: app configMap: name: idokep-scraper-app --- apiVersion: v1 kind: ConfigMap metadata: name: idokep-scraper-app namespace: glance-system data: app.py: | import os import time import re from typing import List, Dict, Any, Optional import requests from bs4 import BeautifulSoup from fastapi import FastAPI, Response from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST APP = FastAPI() IDOKEP_URL = os.getenv( "IDOKEP_URL", "https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker", ) PLACE_NAME = os.getenv("PLACE_NAME", "Budapest VIII. ker") SOURCE_NAME = "Időkép" UA = os.getenv( "USER_AGENT", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari", ) # Prometheus metrics (optional) SCRAPES = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"]) SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"]) CURRENT_TEMP = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"]) DAILY_TMIN = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"]) DAILY_TMAX = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"]) HOURLY_TEMP = Gauge("idokep_hourly_temp_c", "Hourly temperature in Celsius", ["place", "time"]) def _abs_url(maybe_relative: Optional[str]) -> Optional[str]: if not maybe_relative: return None if maybe_relative.startswith("http://") or maybe_relative.startswith("https://"): return maybe_relative # Időkép uses /assets/... paths return "https://www.idokep.hu" + maybe_relative def _to_int_temp(s: str) -> Optional[float]: if not s: return None s = s.strip().replace("˚C", "").replace("°C", "").replace("°", "") try: return float(s) except Exception: return None def scrape() -> Dict[str, Any]: headers = {"User-Agent": UA} r = requests.get(IDOKEP_URL, headers=headers, timeout=15) r.raise_for_status() soup = BeautifulSoup(r.text, "html.parser") # Current cur_temp_el = soup.select_one(".current-temperature") cur_cond_el = soup.select_one(".current-weather") cur_icon_el = soup.select_one(".forecast-bigicon") cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True) if cur_temp_el else "") cur_cond = cur_cond_el.get_text(strip=True) if cur_cond_el else "" cur_icon = _abs_url(cur_icon_el.get("src") if cur_icon_el else None) # Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card) hourly: List[Dict[str, Any]] = [] for card in soup.select(".ik.hourly-forecast-card")[:8]: t_el = card.select_one(".ik.hourly-forecast-hour") temp_el = card.select_one(".ik.temperature-circled") icon_el = card.select_one("img.ik.forecast-icon") t = t_el.get_text(strip=True) if t_el else "" temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else "") icon = _abs_url(icon_el.get("src") if icon_el else None) if t and temp is not None: hourly.append( { "time": t, # e.g. "18:00" "temp_c": temp, # e.g. -2 "icon_url": icon, # absolute URL } ) # Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol) daily: List[Dict[str, Any]] = [] for col in soup.select(".ik.daily-forecast-container .ik.dailyForecastCol")[:15]: dow_el = col.select_one(".ik.dfDay") icon_el = col.select_one("img.ik.forecast-icon") # Normal structure (most days) tmax_el = col.select_one("div.ik.max") tmin_el = col.select_one("div.ik.min") dow = dow_el.get_text(strip=True) if dow_el else "" icon = _abs_url(icon_el.get("src") if icon_el else None) tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else "") tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else "") # Fallback structure (e.g. "vacation" days) where div.ik.max/min are missing # In those cases the visible temps are usually the first two numeric texts # inside .ik.min-max-container (order: max, min). if tmax is None or tmin is None: vals: List[str] = [] for a in col.select(".ik.min-max-container a"): txt = a.get_text(strip=True) if re.fullmatch(r"-?\d+", txt or ""): vals.append(txt) if len(vals) >= 2: tmax = _to_int_temp(vals[0]) tmin = _to_int_temp(vals[1]) # Keep only rows that look valid if dow and (tmin is not None) and (tmax is not None): daily.append( { "dow": dow, # e.g. "Cs", "P", "Sz" "tmin_c": tmin, "tmax_c": tmax, "icon_url": icon, } ) # Limit to 5 days for your widget (first 5 columns in the table, including "vacation" days) daily = daily[:5] return { "source": {"name": SOURCE_NAME, "url": IDOKEP_URL}, "location": {"name": PLACE_NAME}, "current": {"temp_c": cur_temp, "condition": cur_cond, "icon_url": cur_icon}, "hourly": hourly, "daily": daily, "fetched_at_unix": int(time.time()), } @APP.get("/api") def api(): status = "ok" with SCRAPE_SECONDS.labels(place=PLACE_NAME).time(): try: data = scrape() except Exception: status = "error" SCRAPES.labels(place=PLACE_NAME, status=status).inc() raise SCRAPES.labels(place=PLACE_NAME, status=status).inc() # Update Prometheus gauges (best-effort) try: if data.get("current", {}).get("temp_c") is not None: CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data["current"]["temp_c"])) for d in data.get("daily", []): DAILY_TMIN.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmin_c"])) DAILY_TMAX.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmax_c"])) for h in data.get("hourly", []): HOURLY_TEMP.labels(place=PLACE_NAME, time=h["time"]).set(float(h["temp_c"])) except Exception: pass # IMPORTANT: force JSON content-type so Glance exposes `.JSON` import json return Response(content=json.dumps(data, ensure_ascii=False), media_type="application/json; charset=utf-8") @APP.get("/metrics") def metrics(): return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) --- apiVersion: v1 kind: Service metadata: name: idokep-scraper namespace: glance-system spec: selector: app: idokep-scraper ports: - name: http port: 8000 targetPort: 8000