From 6eb7b5fa119bf666a77bd1cfded7886eac746666 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Thu, 15 Jan 2026 10:06:49 +0100 Subject: [PATCH] added glance-helper --- glance-system/glance-helper.yaml | 314 +++++++++++++++++++++++++++++++ glance-system/idokep-proxy.yaml | 238 ----------------------- 2 files changed, 314 insertions(+), 238 deletions(-) create mode 100644 glance-system/glance-helper.yaml delete mode 100644 glance-system/idokep-proxy.yaml diff --git a/glance-system/glance-helper.yaml b/glance-system/glance-helper.yaml new file mode 100644 index 0000000..47eceff --- /dev/null +++ b/glance-system/glance-helper.yaml @@ -0,0 +1,314 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: glance-helper-data + namespace: glance-system +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 200Mi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: glance-helper + namespace: glance-system +spec: + replicas: 1 + selector: + matchLabels: + app: glance-helper + template: + metadata: + labels: + app: glance-helper + spec: + containers: + - name: glance-helper + image: python:3.12-slim + imagePullPolicy: IfNotPresent + env: + - name: IDOKEP_URL + value: https://www.idokep.hu/idojaras/Budapest%20VII.%20ker + - name: PLACE_NAME + value: Budapest VII. ker + - name: TANDOOR_INTERNAL_URL + value: http://tandoor.tandoor-system.svc.cluster.local:8080 + - name: TANDOOR_PUBLIC_URL + value: https://tandoor.dooplex.hu + - name: TANDOOR_TOKEN + value: 'tda_8a8b169c_5d1f_4962_83a2_0f2719c7d61a' + - name: GLANCE_HELPER_PUBLIC_URL + value: https://glance-helper.dooplex.hu + - name: DATA_DIR + value: /data + - name: GLANCE_HELPER_KEY + value: 'oplQqnLnJK2vErRVYJpvVUcSDBOSbCHZSbsYY2bwSifgTMfT' + ports: + - containerPort: 8000 + command: + - /bin/sh + - -lc + args: + - 'pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client + && + + python -c "import uvicorn; uvicorn.run(''app:APP'', host=''0.0.0.0'', port=8000)" + + ' + volumeMounts: + - name: app + mountPath: /app + - name: data + mountPath: /data + workingDir: /app + volumes: + - name: app + configMap: + name: glance-helper-app + - name: data + persistentVolumeClaim: + claimName: glance-helper-data +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: glance-helper-app + namespace: glance-system +data: + app.py: "import os\nimport time\nimport re\nfrom typing import List, Dict, Any,\ + \ Optional\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom fastapi import\ + \ FastAPI, Response, Request, HTTPException, Query\nfrom fastapi.responses import\ + \ JSONResponse, RedirectResponse\nfrom prometheus_client import Counter, Histogram,\ + \ Gauge, generate_latest, CONTENT_TYPE_LATEST\n\nAPP = FastAPI()\n\nIDOKEP_URL\ + \ = os.getenv(\n \"IDOKEP_URL\",\n \"https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker\"\ + ,\n)\nPLACE_NAME = os.getenv(\"PLACE_NAME\", \"Budapest VIII. ker\")\nSOURCE_NAME\ + \ = \"Id\u0151k\xE9p\"\n\nUA = os.getenv(\n \"USER_AGENT\",\n \"Mozilla/5.0\ + \ (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari\",\n\ + )\n\n# Prometheus metrics (optional)\nSCRAPES = Counter(\"idokep_scrapes_total\"\ + , \"Total Id\u0151k\xE9p scrapes\", [\"place\", \"status\"])\nSCRAPE_SECONDS =\ + \ Histogram(\"idokep_scrape_seconds\", \"Id\u0151k\xE9p scrape duration in seconds\"\ + , [\"place\"])\nCURRENT_TEMP = Gauge(\"idokep_current_temp_c\", \"Current temperature\ + \ in Celsius\", [\"place\"])\nDAILY_TMIN = Gauge(\"idokep_daily_tmin_c\", \"Daily\ + \ minimum temperature in Celsius\", [\"place\", \"dow\"])\nDAILY_TMAX = Gauge(\"\ + idokep_daily_tmax_c\", \"Daily maximum temperature in Celsius\", [\"place\", \"\ + dow\"])\nHOURLY_TEMP = Gauge(\"idokep_hourly_temp_c\", \"Hourly temperature in\ + \ Celsius\", [\"place\", \"time\"])\n\n\ndef _abs_url(maybe_relative: Optional[str])\ + \ -> Optional[str]:\n if not maybe_relative:\n return None\n if maybe_relative.startswith(\"\ + http://\") or maybe_relative.startswith(\"https://\"):\n return maybe_relative\n\ + \ # Id\u0151k\xE9p uses /assets/... paths\n return \"https://www.idokep.hu\"\ + \ + maybe_relative\n\n\ndef _to_int_temp(s: str) -> Optional[float]:\n if not\ + \ s:\n return None\n s = s.strip().replace(\"\u02DAC\", \"\").replace(\"\ + \xB0C\", \"\").replace(\"\xB0\", \"\")\n try:\n return float(s)\n \ + \ except Exception:\n return None\n\n\ndef scrape() -> Dict[str, Any]:\n\ + \ headers = {\"User-Agent\": UA}\n r = requests.get(IDOKEP_URL, headers=headers,\ + \ timeout=15)\n r.raise_for_status()\n\n soup = BeautifulSoup(r.text, \"\ + html.parser\")\n\n # Current\n cur_temp_el = soup.select_one(\".current-temperature\"\ + )\n cur_cond_el = soup.select_one(\".current-weather\")\n cur_icon_el =\ + \ soup.select_one(\".forecast-bigicon\")\n\n cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True)\ + \ if cur_temp_el else \"\")\n cur_cond = cur_cond_el.get_text(strip=True) if\ + \ cur_cond_el else \"\"\n cur_icon = _abs_url(cur_icon_el.get(\"src\") if cur_icon_el\ + \ else None)\n\n # Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card)\n\ + \ hourly: List[Dict[str, Any]] = []\n for card in soup.select(\".ik.hourly-forecast-card\"\ + )[:8]:\n t_el = card.select_one(\".ik.hourly-forecast-hour\")\n \ + \ temp_el = card.select_one(\".ik.temperature-circled\")\n icon_el = card.select_one(\"\ + img.ik.forecast-icon\")\n\n t = t_el.get_text(strip=True) if t_el else\ + \ \"\"\n temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else\ + \ \"\")\n icon = _abs_url(icon_el.get(\"src\") if icon_el else None)\n\n\ + \ if t and temp is not None:\n hourly.append(\n \ + \ {\n \"time\": t, # e.g. \"18:00\"\n \ + \ \"temp_c\": temp, # e.g. -2\n \"\ + icon_url\": icon, # absolute URL\n }\n )\n\n\ + \ # Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol)\n\ + \ daily: List[Dict[str, Any]] = []\n for col in soup.select(\".ik.daily-forecast-container\ + \ .ik.dailyForecastCol\")[:15]:\n dow_el = col.select_one(\".ik.dfDay\"\ + )\n icon_el = col.select_one(\"img.ik.forecast-icon\")\n daynum_el\ + \ = col.select_one(\".ik.dfDayNum\")\n\n # Normal structure (most days)\n\ + \ tmax_el = col.select_one(\"div.ik.max\")\n tmin_el = col.select_one(\"\ + div.ik.min\")\n \n daynum = daynum_el.get_text(strip=True) if daynum_el\ + \ else \"\"\n dow = dow_el.get_text(strip=True) if dow_el else \"\"\n \ + \ icon = _abs_url(icon_el.get(\"src\") if icon_el else None)\n\n \ + \ tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else \"\")\n \ + \ tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else \"\")\n\ + \n # Fallback structure (e.g. \"vacation\" days) where div.ik.max/min are\ + \ missing\n # In those cases the visible temps are usually the first two\ + \ numeric texts\n # inside .ik.min-max-container (order: max, min).\n\ + \ if tmax is None or tmin is None:\n vals: List[str] = []\n\ + \ for a in col.select(\".ik.min-max-container a\"):\n \ + \ txt = a.get_text(strip=True)\n if re.fullmatch(r\"-?\\d+\"\ + , txt or \"\"):\n vals.append(txt)\n\n if len(vals)\ + \ >= 2:\n tmax = _to_int_temp(vals[0])\n tmin =\ + \ _to_int_temp(vals[1])\n\n # Keep only rows that look valid\n if\ + \ dow and (tmin is not None) and (tmax is not None):\n daily.append(\n\ + \ {\n \"daynum\": daynum,\n \ + \ \"dow\": dow, # e.g. \"Cs\", \"P\", \"Sz\"\n \ + \ \"tmin_c\": tmin,\n \"tmax_c\": tmax,\n \ + \ \"icon_url\": icon,\n }\n )\n\n # Limit to 5\ + \ days for your widget (first 5 columns in the table, including \"vacation\" days)\n\ + \ daily = daily[:5]\n\n return {\n \"source\": {\"name\": SOURCE_NAME,\ + \ \"url\": IDOKEP_URL},\n \"location\": {\"name\": PLACE_NAME},\n \ + \ \"current\": {\"temp_c\": cur_temp, \"condition\": cur_cond, \"icon_url\"\ + : cur_icon},\n \"hourly\": hourly,\n \"daily\": daily,\n \ + \ \"fetched_at_unix\": int(time.time()),\n }\n\n\n@APP.get(\"/api\")\ndef api():\n\ + \ status = \"ok\"\n with SCRAPE_SECONDS.labels(place=PLACE_NAME).time():\n\ + \ try:\n data = scrape()\n except Exception:\n \ + \ status = \"error\"\n SCRAPES.labels(place=PLACE_NAME, status=status).inc()\n\ + \ raise\n\n SCRAPES.labels(place=PLACE_NAME, status=status).inc()\n\ + \n # Update Prometheus gauges (best-effort)\n try:\n if data.get(\"\ + current\", {}).get(\"temp_c\") is not None:\n CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data[\"\ + current\"][\"temp_c\"]))\n for d in data.get(\"daily\", []):\n \ + \ DAILY_TMIN.labels(place=PLACE_NAME, dow=d[\"dow\"]).set(float(d[\"tmin_c\"\ + ]))\n DAILY_TMAX.labels(place=PLACE_NAME, dow=d[\"dow\"]).set(float(d[\"\ + tmax_c\"]))\n for h in data.get(\"hourly\", []):\n HOURLY_TEMP.labels(place=PLACE_NAME,\ + \ time=h[\"time\"]).set(float(h[\"temp_c\"]))\n except Exception:\n \ + \ pass\n\n # IMPORTANT: force JSON content-type so Glance exposes `.JSON`\n\ + \ import json\n return Response(content=json.dumps(data, ensure_ascii=False),\ + \ media_type=\"application/json; charset=utf-8\")\n\n\n@APP.get(\"/metrics\")\n\ + def metrics():\n return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)\n\ + \n\n# -------------------------------\n# Tandoor helpers\n# -------------------------------\n\ + def _today_str() -> str:\n # Use Europe/Budapest for \"day\" boundaries\n \ + \ return datetime.now(tz=ZoneInfo(\"Europe/Budapest\")).date().isoformat()\n\ + \ndef _load_json(path: Path, default):\n try:\n with path.open(\"r\"\ + , encoding=\"utf-8\") as f:\n return json.load(f)\n except Exception:\n\ + \ return default\n\ndef _save_json(path: Path, data) -> None:\n tmp\ + \ = path.with_suffix(path.suffix + \".tmp\")\n with tmp.open(\"w\", encoding=\"\ + utf-8\") as f:\n json.dump(data, f, ensure_ascii=False, indent=2)\n \ + \ tmp.replace(path)\n\ndef _tandoor_headers() -> Dict[str, str]:\n token =\ + \ os.getenv(\"TANDOOR_TOKEN\", \"\")\n if not token:\n return {\"Accept\"\ + : \"application/json\"}\n return {\"Accept\": \"application/json\", \"Authorization\"\ + : f\"Bearer {token}\"}\n\ndef _rewrite_to_public(maybe_url: Optional[str]) ->\ + \ Optional[str]:\n if not maybe_url:\n return None\n\n # Relative\ + \ path -> public\n if maybe_url.startswith(\"/\"):\n return TANDOOR_PUBLIC_URL\ + \ + maybe_url\n\n # If the API returns internal host URLs, rewrite scheme+host\ + \ to public\n try:\n u = urlparse(maybe_url)\n pub = urlparse(TANDOOR_PUBLIC_URL)\n\ + \ internal = urlparse(TANDOOR_INTERNAL_URL)\n if u.netloc and internal.netloc\ + \ and u.netloc == internal.netloc:\n u = u._replace(scheme=pub.scheme,\ + \ netloc=pub.netloc)\n return urlunparse(u)\n except Exception:\n\ + \ pass\n\n return maybe_url\n\ndef _fetch_recipes_flat() -> List[Dict[str,\ + \ Any]]:\n # Prefer /api/recipe/flat/ because it's already {id,name,image}\ + \ list\n flat_url = f\"{TANDOOR_INTERNAL_URL}/api/recipe/flat/\"\n r = requests.get(flat_url,\ + \ headers=_tandoor_headers(), timeout=15)\n if r.status_code == 200:\n \ + \ data = r.json()\n # Expected: list\n if isinstance(data, list):\n\ + \ out = []\n for x in data:\n out.append({\n\ + \ \"id\": int(x.get(\"id\", 0)),\n \"name\"\ + : str(x.get(\"name\", \"\")),\n \"image\": _rewrite_to_public(x.get(\"\ + image\")),\n })\n return [x for x in out if x[\"id\"\ + ] and x[\"name\"]]\n\n # Fallback: paginated /api/recipe/\n list_url = f\"\ + {TANDOOR_INTERNAL_URL}/api/recipe/?page_size=250\"\n r = requests.get(list_url,\ + \ headers=_tandoor_headers(), timeout=15)\n r.raise_for_status()\n data\ + \ = r.json()\n items = data.get(\"results\", []) if isinstance(data, dict)\ + \ else []\n out = []\n for x in items:\n out.append({\n \ + \ \"id\": int(x.get(\"id\", 0)),\n \"name\": str(x.get(\"name\",\ + \ \"\")),\n \"image\": _rewrite_to_public(x.get(\"image\")),\n \ + \ })\n return [x for x in out if x[\"id\"] and x[\"name\"]]\n\ndef _get_cooked_for_today()\ + \ -> List[int]:\n today = _today_str()\n cooked = _load_json(COOKED_PATH,\ + \ {})\n ids = cooked.get(today, [])\n # normalize\n try:\n return\ + \ [int(i) for i in ids]\n except Exception:\n return []\n\ndef _set_cooked_today(ids:\ + \ List[int]) -> None:\n today = _today_str()\n cooked = _load_json(COOKED_PATH,\ + \ {})\n cooked[today] = sorted(list({int(i) for i in ids}))\n # Optional\ + \ cleanup: keep only last 14 days\n try:\n keys = sorted(cooked.keys())\n\ + \ if len(keys) > 14:\n for k in keys[:-14]:\n \ + \ cooked.pop(k, None)\n except Exception:\n pass\n _save_json(COOKED_PATH,\ + \ cooked)\n\ndef _get_picks_today() -> List[int]:\n today = _today_str()\n\ + \ picks = _load_json(PICKS_PATH, {})\n ids = picks.get(today, [])\n try:\n\ + \ return [int(i) for i in ids]\n except Exception:\n return []\n\ + \ndef _set_picks_today(ids: List[int]) -> None:\n today = _today_str()\n \ + \ picks = _load_json(PICKS_PATH, {})\n picks[today] = [int(i) for i in ids\ + \ if int(i) > 0]\n # cleanup old days\n try:\n keys = sorted(picks.keys())\n\ + \ if len(keys) > 14:\n for k in keys[:-14]:\n \ + \ picks.pop(k, None)\n except Exception:\n pass\n _save_json(PICKS_PATH,\ + \ picks)\n\ndef _ensure_daily_picks(recipes: List[Dict[str, Any]], count: int)\ + \ -> List[int]:\n cooked = set(_get_cooked_for_today())\n picks = _get_picks_today()\n\ + \n # Remove picks that are cooked today\n picks = [i for i in picks if i\ + \ not in cooked]\n\n # Top up to requested count if needed\n if len(picks)\ + \ < count:\n available = [r[\"id\"] for r in recipes if r[\"id\"] not in\ + \ cooked and r[\"id\"] not in picks]\n # If everything is cooked (or too\ + \ few recipes), allow repeats from all recipes\n if len(available) < (count\ + \ - len(picks)):\n available = [r[\"id\"] for r in recipes if r[\"\ + id\"] not in picks]\n\n need = max(0, count - len(picks))\n if need\ + \ > 0 and available:\n picks += random.sample(available, k=min(need,\ + \ len(available)))\n\n # If no picks yet (first call today), choose fresh\n\ + \ if not picks:\n available = [r[\"id\"] for r in recipes if r[\"id\"\ + ] not in cooked]\n if not available:\n available = [r[\"id\"\ + ] for r in recipes]\n picks = random.sample(available, k=min(count, len(available)))\n\ + \n _set_picks_today(picks)\n return picks\n\n@APP.get(\"/tandoor/daily\"\ + )\ndef tandoor_daily(count: int = Query(3, ge=1, le=10)):\n try:\n recipes\ + \ = _fetch_recipes_flat()\n except Exception as e:\n raise HTTPException(status_code=502,\ + \ detail=f\"Failed to fetch recipes from Tandoor: {e}\")\n\n if not recipes:\n\ + \ return JSONResponse({\"date\": _today_str(), \"total_recipes\": 0, \"\ + items\": []})\n\n ids = _ensure_daily_picks(recipes, count)\n by_id = {r[\"\ + id\"]: r for r in recipes}\n\n items = []\n for rid in ids:\n r =\ + \ by_id.get(rid)\n if not r:\n continue\n items.append({\n\ + \ \"id\": r[\"id\"],\n \"name\": r[\"name\"],\n \ + \ \"image\": r.get(\"image\"),\n \"url\": f\"{TANDOOR_PUBLIC_URL}/recipe/{r['id']}\"\ + ,\n # state-changing endpoint requires key if set\n \"cook_url\"\ + : f\"{GLANCE_HELPER_PUBLIC_URL}/tandoor/cook?id={r['id']}\" + (f\"&key={GLANCE_HELPER_KEY}\"\ + \ if GLANCE_HELPER_KEY else \"\"),\n })\n\n return JSONResponse({\n\ + \ \"date\": _today_str(),\n \"total_recipes\": len(recipes),\n \ + \ \"items\": items,\n })\n\n@APP.get(\"/tandoor/cook\")\ndef tandoor_cook(\n\ + \ id: int = Query(..., ge=1),\n key: str = Query(\"\", alias=\"key\"),\n\ + \ redirect: str = Query(\"\", alias=\"redirect\")\n):\n # Protect state-changing\ + \ calls with a shared key (recommended)\n if GLANCE_HELPER_KEY and key != GLANCE_HELPER_KEY:\n\ + \ raise HTTPException(status_code=403, detail=\"Forbidden\")\n\n cooked\ + \ = set(_get_cooked_for_today())\n cooked.add(int(id))\n _set_cooked_today(list(cooked))\n\ + \n # Also remove from today's picks (so daily list can refill)\n picks =\ + \ [i for i in _get_picks_today() if i != int(id)]\n _set_picks_today(picks)\n\ + \n if redirect:\n return RedirectResponse(url=redirect, status_code=302)\n\ + \n return JSONResponse({\"ok\": True, \"date\": _today_str(), \"cooked_today\"\ + : sorted(list(cooked))})\n" +--- +apiVersion: v1 +kind: Service +metadata: + name: idokep-scraper + namespace: glance-system +spec: + selector: + app: glance-helper + ports: + - name: http + port: 8000 + targetPort: 8000 +--- +apiVersion: v1 +kind: Service +metadata: + name: glance-helper + namespace: glance-system +spec: + selector: + app: glance-helper + ports: + - name: http + port: 8000 + targetPort: 8000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: glance-helper + namespace: glance-system + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + external-dns.alpha.kubernetes.io/hostname: glance-helper.dooplex.hu,glance-helper.home + nginx.ingress.kubernetes.io/ssl-redirect: '"true"' + nginx.ingress.kubernetes.io/proxy-body-size: 10m +spec: + ingressClassName: nginx-internal + rules: + - host: glance-helper.dooplex.hu + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: glance-helper + port: + number: 8000 + tls: + - hosts: + - glance-helper.dooplex.hu + secretName: glance-helper-tls \ No newline at end of file diff --git a/glance-system/idokep-proxy.yaml b/glance-system/idokep-proxy.yaml deleted file mode 100644 index 27b4e40..0000000 --- a/glance-system/idokep-proxy.yaml +++ /dev/null @@ -1,238 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: idokep-scraper - namespace: glance-system -spec: - replicas: 1 - selector: - matchLabels: - app: idokep-scraper - template: - metadata: - labels: - app: idokep-scraper - spec: - containers: - - name: idokep-scraper - image: python:3.12-slim - imagePullPolicy: IfNotPresent - env: - - name: IDOKEP_URL - value: "https://www.idokep.hu/idojaras/Budapest%20VII.%20ker" - - name: PLACE_NAME - value: "Budapest VII. ker" - ports: - - containerPort: 8000 - command: ["/bin/sh", "-lc"] - args: - - | - pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client && - python -c "import uvicorn; uvicorn.run('app:APP', host='0.0.0.0', port=8000)" - volumeMounts: - - name: app - mountPath: /app - workingDir: /app - volumes: - - name: app - configMap: - name: idokep-scraper-app ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: idokep-scraper-app - namespace: glance-system -data: - app.py: | - import os - import time - import re - from typing import List, Dict, Any, Optional - - import requests - from bs4 import BeautifulSoup - from fastapi import FastAPI, Response - from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST - - APP = FastAPI() - - IDOKEP_URL = os.getenv( - "IDOKEP_URL", - "https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker", - ) - PLACE_NAME = os.getenv("PLACE_NAME", "Budapest VIII. ker") - SOURCE_NAME = "Időkép" - - UA = os.getenv( - "USER_AGENT", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari", - ) - - # Prometheus metrics (optional) - SCRAPES = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"]) - SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"]) - CURRENT_TEMP = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"]) - DAILY_TMIN = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"]) - DAILY_TMAX = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"]) - HOURLY_TEMP = Gauge("idokep_hourly_temp_c", "Hourly temperature in Celsius", ["place", "time"]) - - - def _abs_url(maybe_relative: Optional[str]) -> Optional[str]: - if not maybe_relative: - return None - if maybe_relative.startswith("http://") or maybe_relative.startswith("https://"): - return maybe_relative - # Időkép uses /assets/... paths - return "https://www.idokep.hu" + maybe_relative - - - def _to_int_temp(s: str) -> Optional[float]: - if not s: - return None - s = s.strip().replace("˚C", "").replace("°C", "").replace("°", "") - try: - return float(s) - except Exception: - return None - - - def scrape() -> Dict[str, Any]: - headers = {"User-Agent": UA} - r = requests.get(IDOKEP_URL, headers=headers, timeout=15) - r.raise_for_status() - - soup = BeautifulSoup(r.text, "html.parser") - - # Current - cur_temp_el = soup.select_one(".current-temperature") - cur_cond_el = soup.select_one(".current-weather") - cur_icon_el = soup.select_one(".forecast-bigicon") - - cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True) if cur_temp_el else "") - cur_cond = cur_cond_el.get_text(strip=True) if cur_cond_el else "" - cur_icon = _abs_url(cur_icon_el.get("src") if cur_icon_el else None) - - # Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card) - hourly: List[Dict[str, Any]] = [] - for card in soup.select(".ik.hourly-forecast-card")[:8]: - t_el = card.select_one(".ik.hourly-forecast-hour") - temp_el = card.select_one(".ik.temperature-circled") - icon_el = card.select_one("img.ik.forecast-icon") - - t = t_el.get_text(strip=True) if t_el else "" - temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else "") - icon = _abs_url(icon_el.get("src") if icon_el else None) - - if t and temp is not None: - hourly.append( - { - "time": t, # e.g. "18:00" - "temp_c": temp, # e.g. -2 - "icon_url": icon, # absolute URL - } - ) - - # Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol) - daily: List[Dict[str, Any]] = [] - for col in soup.select(".ik.daily-forecast-container .ik.dailyForecastCol")[:15]: - dow_el = col.select_one(".ik.dfDay") - icon_el = col.select_one("img.ik.forecast-icon") - daynum_el = col.select_one(".ik.dfDayNum") - - # Normal structure (most days) - tmax_el = col.select_one("div.ik.max") - tmin_el = col.select_one("div.ik.min") - - daynum = daynum_el.get_text(strip=True) if daynum_el else "" - dow = dow_el.get_text(strip=True) if dow_el else "" - icon = _abs_url(icon_el.get("src") if icon_el else None) - - tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else "") - tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else "") - - # Fallback structure (e.g. "vacation" days) where div.ik.max/min are missing - # In those cases the visible temps are usually the first two numeric texts - # inside .ik.min-max-container (order: max, min). - if tmax is None or tmin is None: - vals: List[str] = [] - for a in col.select(".ik.min-max-container a"): - txt = a.get_text(strip=True) - if re.fullmatch(r"-?\d+", txt or ""): - vals.append(txt) - - if len(vals) >= 2: - tmax = _to_int_temp(vals[0]) - tmin = _to_int_temp(vals[1]) - - # Keep only rows that look valid - if dow and (tmin is not None) and (tmax is not None): - daily.append( - { - "daynum": daynum, - "dow": dow, # e.g. "Cs", "P", "Sz" - "tmin_c": tmin, - "tmax_c": tmax, - "icon_url": icon, - } - ) - - # Limit to 5 days for your widget (first 5 columns in the table, including "vacation" days) - daily = daily[:5] - - return { - "source": {"name": SOURCE_NAME, "url": IDOKEP_URL}, - "location": {"name": PLACE_NAME}, - "current": {"temp_c": cur_temp, "condition": cur_cond, "icon_url": cur_icon}, - "hourly": hourly, - "daily": daily, - "fetched_at_unix": int(time.time()), - } - - - @APP.get("/api") - def api(): - status = "ok" - with SCRAPE_SECONDS.labels(place=PLACE_NAME).time(): - try: - data = scrape() - except Exception: - status = "error" - SCRAPES.labels(place=PLACE_NAME, status=status).inc() - raise - - SCRAPES.labels(place=PLACE_NAME, status=status).inc() - - # Update Prometheus gauges (best-effort) - try: - if data.get("current", {}).get("temp_c") is not None: - CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data["current"]["temp_c"])) - for d in data.get("daily", []): - DAILY_TMIN.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmin_c"])) - DAILY_TMAX.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmax_c"])) - for h in data.get("hourly", []): - HOURLY_TEMP.labels(place=PLACE_NAME, time=h["time"]).set(float(h["temp_c"])) - except Exception: - pass - - # IMPORTANT: force JSON content-type so Glance exposes `.JSON` - import json - return Response(content=json.dumps(data, ensure_ascii=False), media_type="application/json; charset=utf-8") - - - @APP.get("/metrics") - def metrics(): - return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) ---- -apiVersion: v1 -kind: Service -metadata: - name: idokep-scraper - namespace: glance-system -spec: - selector: - app: idokep-scraper - ports: - - name: http - port: 8000 - targetPort: 8000 \ No newline at end of file