diff --git a/glance-system/idokep-proxy.yaml b/glance-system/idokep-proxy.yaml index 50ca6c5..2a9505e 100644 --- a/glance-system/idokep-proxy.yaml +++ b/glance-system/idokep-proxy.yaml @@ -12,22 +12,31 @@ data: import requests from bs4 import BeautifulSoup - from fastapi import FastAPI, Query + from fastapi import FastAPI, Query, Response from fastapi.responses import JSONResponse + from prometheus_client import Gauge, Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST + app = FastAPI() IDOKEP_BASE = "https://www.idokep.hu" - DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VII. ker") + DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VIII. ker") USER_AGENT = os.getenv( "IDOKEP_UA", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", ) - # simple in-memory cache: {place: (expires_epoch, payload)} - CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "600")) # 10 min + CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "900")) # 15 minutes _cache: Dict[str, Tuple[float, Dict[str, Any]]] = {} + # --- Prometheus metrics (low-cardinality, place as label) --- + SCRAPES_TOTAL = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"]) + SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"]) + CURRENT_TEMP_C = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"]) + DAILY_TMIN_C = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"]) + DAILY_TMAX_C = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"]) + DAILY_PREC_MM = Gauge("idokep_daily_precip_mm", "Daily precipitation in mm", ["place", "dow"]) + def _num(s: str) -> Optional[float]: if s is None: return None @@ -49,11 +58,9 @@ data: return el.get_text(" ", strip=True) def _fetch_place_html(place: str) -> str: - # Időkép uses the place slug in path; requests will percent-encode automatically if we build it carefully. - # We'll do a conservative encoding by replacing spaces with %20, keep UTF-8. place_path = requests.utils.requote_uri(place) url = f"{IDOKEP_BASE}/idojaras/{place_path}" - r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=15) + r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=20) r.raise_for_status() return r.text @@ -70,21 +77,17 @@ data: cond_hu_el = soup.select_one(".weather-short-desc") condition_hu = _pick_text(cond_hu_el) - # HOURLY (take first 6) + # HOURLY (first 6) hourly_cards = soup.select(".new-hourly-forecast-card") hourly: List[Dict[str, Any]] = [] for card in hourly_cards[:6]: - hour_el = card.select_one(".new-hourly-forecast-hour") - hour_txt = _pick_text(hour_el) + hour_txt = _pick_text(card.select_one(".new-hourly-forecast-hour")) - htemp_el = card.select_one(".tempValue .hover-over") - htemp_c = _num(_pick_text(htemp_el) or "") + htemp_c = _num(_pick_text(card.select_one(".tempValue .hover-over")) or "") - hicon_el = card.select_one(".forecast-icon") - hicon_url = _abs_url(hicon_el.get("src") if hicon_el else None) + hicon_url = _abs_url((card.select_one(".forecast-icon") or {}).get("src")) if card.select_one(".forecast-icon") else None - hprec_el = card.select_one(".hourly-rain-chance a") - hprec_pct = _num(_pick_text(hprec_el) or "") + hprec_pct = _num(_pick_text(card.select_one(".hourly-rain-chance a")) or "") hourly.append( { @@ -95,33 +98,26 @@ data: } ) - # DAILY (take next 5 columns; Időkép layout usually has an extra leading column, HA used nth-child(2) as day1) + # DAILY (next 5, skip first like your HA template did) daily_cols = soup.select(".dailyForecastCol") - daily: List[Dict[str, Any]] = [] - - # Skip first column if it looks like a header-ish column; keep behavior close to your HA selectors. cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols + daily_raw: List[Dict[str, Any]] = [] for col in cols[:5]: - dow_el = col.select_one(".dfDay") - dow = _pick_text(dow_el) + dow = _pick_text(col.select_one(".dfDay")) + daynum = _pick_text(col.select_one(".dfDayNum")) - daynum_el = col.select_one(".dfDayNum") - daynum = _pick_text(daynum_el) + dicon_url = _abs_url((col.select_one(".forecast") or {}).get("src")) if col.select_one(".forecast") else None - dicon_el = col.select_one(".forecast") - dicon_url = _abs_url(dicon_el.get("src") if dicon_el else None) - - # max/min sometimes show in different wrappers; try a few options + # various layouts: try a few max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)") min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)") tmax_c = _num(_pick_text(max_el) or "") tmin_c = _num(_pick_text(min_el) or "") - mm_el = col.select_one(".mm") - prec_mm = _num(_pick_text(mm_el) or "") + prec_mm = _num(_pick_text(col.select_one(".mm")) or "") - daily.append( + daily_raw.append( { "dow": dow, # e.g. "Sze" "daynum": daynum, # e.g. "14" @@ -132,6 +128,28 @@ data: } ) + # Compute weekly min/max for HA-like bars (left/width) + mins = [d["tmin_c"] for d in daily_raw if d.get("tmin_c") is not None] + maxs = [d["tmax_c"] for d in daily_raw if d.get("tmax_c") is not None] + week_min = min(mins) if mins else None + week_max = max(maxs) if maxs else None + denom = (week_max - week_min) if (week_min is not None and week_max is not None and week_max != week_min) else None + + daily: List[Dict[str, Any]] = [] + for d in daily_raw: + left = None + width = None + if denom is not None and d.get("tmin_c") is not None and d.get("tmax_c") is not None: + left = ((d["tmin_c"] - week_min) / denom) * 100.0 + width = ((d["tmax_c"] - d["tmin_c"]) / denom) * 100.0 + # clamp for safety + left = max(0.0, min(100.0, left)) + width = max(1.0, min(100.0, width)) + d2 = dict(d) + d2["bar_left_pct"] = left + d2["bar_width_pct"] = width + daily.append(d2) + return { "source": { "name": "Időkép", @@ -145,20 +163,59 @@ data: }, "hourly": hourly, "daily": daily, + "weekly": { + "tmin_c": week_min, + "tmax_c": week_max, + }, "fetched_at_unix": int(time.time()), } @app.get("/api/idokep") - def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as used in the /idojaras/ URL")): + def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as in /idojaras/")): now = time.time() + cached = _cache.get(place) if cached and cached[0] > now: return JSONResponse(cached[1]) - html = _fetch_place_html(place) - payload = _parse_idokep(html, place) - _cache[place] = (now + CACHE_TTL_SEC, payload) - return JSONResponse(payload) + with SCRAPE_SECONDS.labels(place=place).time(): + try: + html = _fetch_place_html(place) + payload = _parse_idokep(html, place) + _cache[place] = (now + CACHE_TTL_SEC, payload) + + # update metrics (best-effort) + t = payload.get("current", {}).get("temp_c") + if t is not None: + CURRENT_TEMP_C.labels(place=place).set(float(t)) + + for d in payload.get("daily", []): + dow = d.get("dow") or "?" + if d.get("tmin_c") is not None: + DAILY_TMIN_C.labels(place=place, dow=dow).set(float(d["tmin_c"])) + if d.get("tmax_c") is not None: + DAILY_TMAX_C.labels(place=place, dow=dow).set(float(d["tmax_c"])) + if d.get("prec_mm") is not None: + DAILY_PREC_MM.labels(place=place, dow=dow).set(float(d["prec_mm"])) + + SCRAPES_TOTAL.labels(place=place, status="ok").inc() + return JSONResponse(payload) + + except Exception: + SCRAPES_TOTAL.labels(place=place, status="error").inc() + # return a structured error Glance can show + return JSONResponse( + { + "place": place, + "error": "Failed to scrape Időkép. Check the place string or Időkép page layout changes.", + "fetched_at_unix": int(time.time()), + }, + status_code=502, + ) + + @app.get("/metrics") + def metrics(): + return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) --- apiVersion: apps/v1 @@ -175,6 +232,10 @@ spec: metadata: labels: app: idokep-proxy + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8000" + prometheus.io/path: "/metrics" spec: containers: - name: idokep-proxy @@ -183,9 +244,15 @@ spec: - containerPort: 8000 env: - name: IDOKEP_PLACE - value: "Budapest VII. ker" + value: "Budapest VIII. ker" - name: CACHE_TTL_SEC - value: "900" # 15 minutes, matches your HA scan_interval philosophy + value: "900" + resources: + requests: + cpu: 25m + memory: 128Mi + limits: + memory: 256Mi volumeMounts: - name: app mountPath: /app @@ -193,7 +260,7 @@ spec: command: ["/bin/sh","-lc"] args: - | - pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml && + pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml prometheus_client && uvicorn app:app --host 0.0.0.0 --port 8000 volumes: - name: app