added glance-helper

This commit is contained in:
2026-01-15 10:06:49 +01:00
parent d567984538
commit 6eb7b5fa11
2 changed files with 314 additions and 238 deletions
+314
View File
@@ -0,0 +1,314 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: glance-helper-data
namespace: glance-system
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 200Mi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: glance-helper
namespace: glance-system
spec:
replicas: 1
selector:
matchLabels:
app: glance-helper
template:
metadata:
labels:
app: glance-helper
spec:
containers:
- name: glance-helper
image: python:3.12-slim
imagePullPolicy: IfNotPresent
env:
- name: IDOKEP_URL
value: https://www.idokep.hu/idojaras/Budapest%20VII.%20ker
- name: PLACE_NAME
value: Budapest VII. ker
- name: TANDOOR_INTERNAL_URL
value: http://tandoor.tandoor-system.svc.cluster.local:8080
- name: TANDOOR_PUBLIC_URL
value: https://tandoor.dooplex.hu
- name: TANDOOR_TOKEN
value: 'tda_8a8b169c_5d1f_4962_83a2_0f2719c7d61a'
- name: GLANCE_HELPER_PUBLIC_URL
value: https://glance-helper.dooplex.hu
- name: DATA_DIR
value: /data
- name: GLANCE_HELPER_KEY
value: 'oplQqnLnJK2vErRVYJpvVUcSDBOSbCHZSbsYY2bwSifgTMfT'
ports:
- containerPort: 8000
command:
- /bin/sh
- -lc
args:
- 'pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client
&&
python -c "import uvicorn; uvicorn.run(''app:APP'', host=''0.0.0.0'', port=8000)"
'
volumeMounts:
- name: app
mountPath: /app
- name: data
mountPath: /data
workingDir: /app
volumes:
- name: app
configMap:
name: glance-helper-app
- name: data
persistentVolumeClaim:
claimName: glance-helper-data
---
apiVersion: v1
kind: ConfigMap
metadata:
name: glance-helper-app
namespace: glance-system
data:
app.py: "import os\nimport time\nimport re\nfrom typing import List, Dict, Any,\
\ Optional\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom fastapi import\
\ FastAPI, Response, Request, HTTPException, Query\nfrom fastapi.responses import\
\ JSONResponse, RedirectResponse\nfrom prometheus_client import Counter, Histogram,\
\ Gauge, generate_latest, CONTENT_TYPE_LATEST\n\nAPP = FastAPI()\n\nIDOKEP_URL\
\ = os.getenv(\n \"IDOKEP_URL\",\n \"https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker\"\
,\n)\nPLACE_NAME = os.getenv(\"PLACE_NAME\", \"Budapest VIII. ker\")\nSOURCE_NAME\
\ = \"Id\u0151k\xE9p\"\n\nUA = os.getenv(\n \"USER_AGENT\",\n \"Mozilla/5.0\
\ (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari\",\n\
)\n\n# Prometheus metrics (optional)\nSCRAPES = Counter(\"idokep_scrapes_total\"\
, \"Total Id\u0151k\xE9p scrapes\", [\"place\", \"status\"])\nSCRAPE_SECONDS =\
\ Histogram(\"idokep_scrape_seconds\", \"Id\u0151k\xE9p scrape duration in seconds\"\
, [\"place\"])\nCURRENT_TEMP = Gauge(\"idokep_current_temp_c\", \"Current temperature\
\ in Celsius\", [\"place\"])\nDAILY_TMIN = Gauge(\"idokep_daily_tmin_c\", \"Daily\
\ minimum temperature in Celsius\", [\"place\", \"dow\"])\nDAILY_TMAX = Gauge(\"\
idokep_daily_tmax_c\", \"Daily maximum temperature in Celsius\", [\"place\", \"\
dow\"])\nHOURLY_TEMP = Gauge(\"idokep_hourly_temp_c\", \"Hourly temperature in\
\ Celsius\", [\"place\", \"time\"])\n\n\ndef _abs_url(maybe_relative: Optional[str])\
\ -> Optional[str]:\n if not maybe_relative:\n return None\n if maybe_relative.startswith(\"\
http://\") or maybe_relative.startswith(\"https://\"):\n return maybe_relative\n\
\ # Id\u0151k\xE9p uses /assets/... paths\n return \"https://www.idokep.hu\"\
\ + maybe_relative\n\n\ndef _to_int_temp(s: str) -> Optional[float]:\n if not\
\ s:\n return None\n s = s.strip().replace(\"\u02DAC\", \"\").replace(\"\
\xB0C\", \"\").replace(\"\xB0\", \"\")\n try:\n return float(s)\n \
\ except Exception:\n return None\n\n\ndef scrape() -> Dict[str, Any]:\n\
\ headers = {\"User-Agent\": UA}\n r = requests.get(IDOKEP_URL, headers=headers,\
\ timeout=15)\n r.raise_for_status()\n\n soup = BeautifulSoup(r.text, \"\
html.parser\")\n\n # Current\n cur_temp_el = soup.select_one(\".current-temperature\"\
)\n cur_cond_el = soup.select_one(\".current-weather\")\n cur_icon_el =\
\ soup.select_one(\".forecast-bigicon\")\n\n cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True)\
\ if cur_temp_el else \"\")\n cur_cond = cur_cond_el.get_text(strip=True) if\
\ cur_cond_el else \"\"\n cur_icon = _abs_url(cur_icon_el.get(\"src\") if cur_icon_el\
\ else None)\n\n # Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card)\n\
\ hourly: List[Dict[str, Any]] = []\n for card in soup.select(\".ik.hourly-forecast-card\"\
)[:8]:\n t_el = card.select_one(\".ik.hourly-forecast-hour\")\n \
\ temp_el = card.select_one(\".ik.temperature-circled\")\n icon_el = card.select_one(\"\
img.ik.forecast-icon\")\n\n t = t_el.get_text(strip=True) if t_el else\
\ \"\"\n temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else\
\ \"\")\n icon = _abs_url(icon_el.get(\"src\") if icon_el else None)\n\n\
\ if t and temp is not None:\n hourly.append(\n \
\ {\n \"time\": t, # e.g. \"18:00\"\n \
\ \"temp_c\": temp, # e.g. -2\n \"\
icon_url\": icon, # absolute URL\n }\n )\n\n\
\ # Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol)\n\
\ daily: List[Dict[str, Any]] = []\n for col in soup.select(\".ik.daily-forecast-container\
\ .ik.dailyForecastCol\")[:15]:\n dow_el = col.select_one(\".ik.dfDay\"\
)\n icon_el = col.select_one(\"img.ik.forecast-icon\")\n daynum_el\
\ = col.select_one(\".ik.dfDayNum\")\n\n # Normal structure (most days)\n\
\ tmax_el = col.select_one(\"div.ik.max\")\n tmin_el = col.select_one(\"\
div.ik.min\")\n \n daynum = daynum_el.get_text(strip=True) if daynum_el\
\ else \"\"\n dow = dow_el.get_text(strip=True) if dow_el else \"\"\n \
\ icon = _abs_url(icon_el.get(\"src\") if icon_el else None)\n\n \
\ tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else \"\")\n \
\ tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else \"\")\n\
\n # Fallback structure (e.g. \"vacation\" days) where div.ik.max/min are\
\ missing\n # In those cases the visible temps are usually the first two\
\ numeric <a> texts\n # inside .ik.min-max-container (order: max, min).\n\
\ if tmax is None or tmin is None:\n vals: List[str] = []\n\
\ for a in col.select(\".ik.min-max-container a\"):\n \
\ txt = a.get_text(strip=True)\n if re.fullmatch(r\"-?\\d+\"\
, txt or \"\"):\n vals.append(txt)\n\n if len(vals)\
\ >= 2:\n tmax = _to_int_temp(vals[0])\n tmin =\
\ _to_int_temp(vals[1])\n\n # Keep only rows that look valid\n if\
\ dow and (tmin is not None) and (tmax is not None):\n daily.append(\n\
\ {\n \"daynum\": daynum,\n \
\ \"dow\": dow, # e.g. \"Cs\", \"P\", \"Sz\"\n \
\ \"tmin_c\": tmin,\n \"tmax_c\": tmax,\n \
\ \"icon_url\": icon,\n }\n )\n\n # Limit to 5\
\ days for your widget (first 5 columns in the table, including \"vacation\" days)\n\
\ daily = daily[:5]\n\n return {\n \"source\": {\"name\": SOURCE_NAME,\
\ \"url\": IDOKEP_URL},\n \"location\": {\"name\": PLACE_NAME},\n \
\ \"current\": {\"temp_c\": cur_temp, \"condition\": cur_cond, \"icon_url\"\
: cur_icon},\n \"hourly\": hourly,\n \"daily\": daily,\n \
\ \"fetched_at_unix\": int(time.time()),\n }\n\n\n@APP.get(\"/api\")\ndef api():\n\
\ status = \"ok\"\n with SCRAPE_SECONDS.labels(place=PLACE_NAME).time():\n\
\ try:\n data = scrape()\n except Exception:\n \
\ status = \"error\"\n SCRAPES.labels(place=PLACE_NAME, status=status).inc()\n\
\ raise\n\n SCRAPES.labels(place=PLACE_NAME, status=status).inc()\n\
\n # Update Prometheus gauges (best-effort)\n try:\n if data.get(\"\
current\", {}).get(\"temp_c\") is not None:\n CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data[\"\
current\"][\"temp_c\"]))\n for d in data.get(\"daily\", []):\n \
\ DAILY_TMIN.labels(place=PLACE_NAME, dow=d[\"dow\"]).set(float(d[\"tmin_c\"\
]))\n DAILY_TMAX.labels(place=PLACE_NAME, dow=d[\"dow\"]).set(float(d[\"\
tmax_c\"]))\n for h in data.get(\"hourly\", []):\n HOURLY_TEMP.labels(place=PLACE_NAME,\
\ time=h[\"time\"]).set(float(h[\"temp_c\"]))\n except Exception:\n \
\ pass\n\n # IMPORTANT: force JSON content-type so Glance exposes `.JSON`\n\
\ import json\n return Response(content=json.dumps(data, ensure_ascii=False),\
\ media_type=\"application/json; charset=utf-8\")\n\n\n@APP.get(\"/metrics\")\n\
def metrics():\n return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)\n\
\n\n# -------------------------------\n# Tandoor helpers\n# -------------------------------\n\
def _today_str() -> str:\n # Use Europe/Budapest for \"day\" boundaries\n \
\ return datetime.now(tz=ZoneInfo(\"Europe/Budapest\")).date().isoformat()\n\
\ndef _load_json(path: Path, default):\n try:\n with path.open(\"r\"\
, encoding=\"utf-8\") as f:\n return json.load(f)\n except Exception:\n\
\ return default\n\ndef _save_json(path: Path, data) -> None:\n tmp\
\ = path.with_suffix(path.suffix + \".tmp\")\n with tmp.open(\"w\", encoding=\"\
utf-8\") as f:\n json.dump(data, f, ensure_ascii=False, indent=2)\n \
\ tmp.replace(path)\n\ndef _tandoor_headers() -> Dict[str, str]:\n token =\
\ os.getenv(\"TANDOOR_TOKEN\", \"\")\n if not token:\n return {\"Accept\"\
: \"application/json\"}\n return {\"Accept\": \"application/json\", \"Authorization\"\
: f\"Bearer {token}\"}\n\ndef _rewrite_to_public(maybe_url: Optional[str]) ->\
\ Optional[str]:\n if not maybe_url:\n return None\n\n # Relative\
\ path -> public\n if maybe_url.startswith(\"/\"):\n return TANDOOR_PUBLIC_URL\
\ + maybe_url\n\n # If the API returns internal host URLs, rewrite scheme+host\
\ to public\n try:\n u = urlparse(maybe_url)\n pub = urlparse(TANDOOR_PUBLIC_URL)\n\
\ internal = urlparse(TANDOOR_INTERNAL_URL)\n if u.netloc and internal.netloc\
\ and u.netloc == internal.netloc:\n u = u._replace(scheme=pub.scheme,\
\ netloc=pub.netloc)\n return urlunparse(u)\n except Exception:\n\
\ pass\n\n return maybe_url\n\ndef _fetch_recipes_flat() -> List[Dict[str,\
\ Any]]:\n # Prefer /api/recipe/flat/ because it's already {id,name,image}\
\ list\n flat_url = f\"{TANDOOR_INTERNAL_URL}/api/recipe/flat/\"\n r = requests.get(flat_url,\
\ headers=_tandoor_headers(), timeout=15)\n if r.status_code == 200:\n \
\ data = r.json()\n # Expected: list\n if isinstance(data, list):\n\
\ out = []\n for x in data:\n out.append({\n\
\ \"id\": int(x.get(\"id\", 0)),\n \"name\"\
: str(x.get(\"name\", \"\")),\n \"image\": _rewrite_to_public(x.get(\"\
image\")),\n })\n return [x for x in out if x[\"id\"\
] and x[\"name\"]]\n\n # Fallback: paginated /api/recipe/\n list_url = f\"\
{TANDOOR_INTERNAL_URL}/api/recipe/?page_size=250\"\n r = requests.get(list_url,\
\ headers=_tandoor_headers(), timeout=15)\n r.raise_for_status()\n data\
\ = r.json()\n items = data.get(\"results\", []) if isinstance(data, dict)\
\ else []\n out = []\n for x in items:\n out.append({\n \
\ \"id\": int(x.get(\"id\", 0)),\n \"name\": str(x.get(\"name\",\
\ \"\")),\n \"image\": _rewrite_to_public(x.get(\"image\")),\n \
\ })\n return [x for x in out if x[\"id\"] and x[\"name\"]]\n\ndef _get_cooked_for_today()\
\ -> List[int]:\n today = _today_str()\n cooked = _load_json(COOKED_PATH,\
\ {})\n ids = cooked.get(today, [])\n # normalize\n try:\n return\
\ [int(i) for i in ids]\n except Exception:\n return []\n\ndef _set_cooked_today(ids:\
\ List[int]) -> None:\n today = _today_str()\n cooked = _load_json(COOKED_PATH,\
\ {})\n cooked[today] = sorted(list({int(i) for i in ids}))\n # Optional\
\ cleanup: keep only last 14 days\n try:\n keys = sorted(cooked.keys())\n\
\ if len(keys) > 14:\n for k in keys[:-14]:\n \
\ cooked.pop(k, None)\n except Exception:\n pass\n _save_json(COOKED_PATH,\
\ cooked)\n\ndef _get_picks_today() -> List[int]:\n today = _today_str()\n\
\ picks = _load_json(PICKS_PATH, {})\n ids = picks.get(today, [])\n try:\n\
\ return [int(i) for i in ids]\n except Exception:\n return []\n\
\ndef _set_picks_today(ids: List[int]) -> None:\n today = _today_str()\n \
\ picks = _load_json(PICKS_PATH, {})\n picks[today] = [int(i) for i in ids\
\ if int(i) > 0]\n # cleanup old days\n try:\n keys = sorted(picks.keys())\n\
\ if len(keys) > 14:\n for k in keys[:-14]:\n \
\ picks.pop(k, None)\n except Exception:\n pass\n _save_json(PICKS_PATH,\
\ picks)\n\ndef _ensure_daily_picks(recipes: List[Dict[str, Any]], count: int)\
\ -> List[int]:\n cooked = set(_get_cooked_for_today())\n picks = _get_picks_today()\n\
\n # Remove picks that are cooked today\n picks = [i for i in picks if i\
\ not in cooked]\n\n # Top up to requested count if needed\n if len(picks)\
\ < count:\n available = [r[\"id\"] for r in recipes if r[\"id\"] not in\
\ cooked and r[\"id\"] not in picks]\n # If everything is cooked (or too\
\ few recipes), allow repeats from all recipes\n if len(available) < (count\
\ - len(picks)):\n available = [r[\"id\"] for r in recipes if r[\"\
id\"] not in picks]\n\n need = max(0, count - len(picks))\n if need\
\ > 0 and available:\n picks += random.sample(available, k=min(need,\
\ len(available)))\n\n # If no picks yet (first call today), choose fresh\n\
\ if not picks:\n available = [r[\"id\"] for r in recipes if r[\"id\"\
] not in cooked]\n if not available:\n available = [r[\"id\"\
] for r in recipes]\n picks = random.sample(available, k=min(count, len(available)))\n\
\n _set_picks_today(picks)\n return picks\n\n@APP.get(\"/tandoor/daily\"\
)\ndef tandoor_daily(count: int = Query(3, ge=1, le=10)):\n try:\n recipes\
\ = _fetch_recipes_flat()\n except Exception as e:\n raise HTTPException(status_code=502,\
\ detail=f\"Failed to fetch recipes from Tandoor: {e}\")\n\n if not recipes:\n\
\ return JSONResponse({\"date\": _today_str(), \"total_recipes\": 0, \"\
items\": []})\n\n ids = _ensure_daily_picks(recipes, count)\n by_id = {r[\"\
id\"]: r for r in recipes}\n\n items = []\n for rid in ids:\n r =\
\ by_id.get(rid)\n if not r:\n continue\n items.append({\n\
\ \"id\": r[\"id\"],\n \"name\": r[\"name\"],\n \
\ \"image\": r.get(\"image\"),\n \"url\": f\"{TANDOOR_PUBLIC_URL}/recipe/{r['id']}\"\
,\n # state-changing endpoint requires key if set\n \"cook_url\"\
: f\"{GLANCE_HELPER_PUBLIC_URL}/tandoor/cook?id={r['id']}\" + (f\"&key={GLANCE_HELPER_KEY}\"\
\ if GLANCE_HELPER_KEY else \"\"),\n })\n\n return JSONResponse({\n\
\ \"date\": _today_str(),\n \"total_recipes\": len(recipes),\n \
\ \"items\": items,\n })\n\n@APP.get(\"/tandoor/cook\")\ndef tandoor_cook(\n\
\ id: int = Query(..., ge=1),\n key: str = Query(\"\", alias=\"key\"),\n\
\ redirect: str = Query(\"\", alias=\"redirect\")\n):\n # Protect state-changing\
\ calls with a shared key (recommended)\n if GLANCE_HELPER_KEY and key != GLANCE_HELPER_KEY:\n\
\ raise HTTPException(status_code=403, detail=\"Forbidden\")\n\n cooked\
\ = set(_get_cooked_for_today())\n cooked.add(int(id))\n _set_cooked_today(list(cooked))\n\
\n # Also remove from today's picks (so daily list can refill)\n picks =\
\ [i for i in _get_picks_today() if i != int(id)]\n _set_picks_today(picks)\n\
\n if redirect:\n return RedirectResponse(url=redirect, status_code=302)\n\
\n return JSONResponse({\"ok\": True, \"date\": _today_str(), \"cooked_today\"\
: sorted(list(cooked))})\n"
---
apiVersion: v1
kind: Service
metadata:
name: idokep-scraper
namespace: glance-system
spec:
selector:
app: glance-helper
ports:
- name: http
port: 8000
targetPort: 8000
---
apiVersion: v1
kind: Service
metadata:
name: glance-helper
namespace: glance-system
spec:
selector:
app: glance-helper
ports:
- name: http
port: 8000
targetPort: 8000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: glance-helper
namespace: glance-system
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
external-dns.alpha.kubernetes.io/hostname: glance-helper.dooplex.hu,glance-helper.home
nginx.ingress.kubernetes.io/ssl-redirect: '"true"'
nginx.ingress.kubernetes.io/proxy-body-size: 10m
spec:
ingressClassName: nginx-internal
rules:
- host: glance-helper.dooplex.hu
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: glance-helper
port:
number: 8000
tls:
- hosts:
- glance-helper.dooplex.hu
secretName: glance-helper-tls
-238
View File
@@ -1,238 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: idokep-scraper
namespace: glance-system
spec:
replicas: 1
selector:
matchLabels:
app: idokep-scraper
template:
metadata:
labels:
app: idokep-scraper
spec:
containers:
- name: idokep-scraper
image: python:3.12-slim
imagePullPolicy: IfNotPresent
env:
- name: IDOKEP_URL
value: "https://www.idokep.hu/idojaras/Budapest%20VII.%20ker"
- name: PLACE_NAME
value: "Budapest VII. ker"
ports:
- containerPort: 8000
command: ["/bin/sh", "-lc"]
args:
- |
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client &&
python -c "import uvicorn; uvicorn.run('app:APP', host='0.0.0.0', port=8000)"
volumeMounts:
- name: app
mountPath: /app
workingDir: /app
volumes:
- name: app
configMap:
name: idokep-scraper-app
---
apiVersion: v1
kind: ConfigMap
metadata:
name: idokep-scraper-app
namespace: glance-system
data:
app.py: |
import os
import time
import re
from typing import List, Dict, Any, Optional
import requests
from bs4 import BeautifulSoup
from fastapi import FastAPI, Response
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
APP = FastAPI()
IDOKEP_URL = os.getenv(
"IDOKEP_URL",
"https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker",
)
PLACE_NAME = os.getenv("PLACE_NAME", "Budapest VIII. ker")
SOURCE_NAME = "Időkép"
UA = os.getenv(
"USER_AGENT",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari",
)
# Prometheus metrics (optional)
SCRAPES = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"])
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
CURRENT_TEMP = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
DAILY_TMIN = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"])
DAILY_TMAX = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
HOURLY_TEMP = Gauge("idokep_hourly_temp_c", "Hourly temperature in Celsius", ["place", "time"])
def _abs_url(maybe_relative: Optional[str]) -> Optional[str]:
if not maybe_relative:
return None
if maybe_relative.startswith("http://") or maybe_relative.startswith("https://"):
return maybe_relative
# Időkép uses /assets/... paths
return "https://www.idokep.hu" + maybe_relative
def _to_int_temp(s: str) -> Optional[float]:
if not s:
return None
s = s.strip().replace("˚C", "").replace("°C", "").replace("°", "")
try:
return float(s)
except Exception:
return None
def scrape() -> Dict[str, Any]:
headers = {"User-Agent": UA}
r = requests.get(IDOKEP_URL, headers=headers, timeout=15)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
# Current
cur_temp_el = soup.select_one(".current-temperature")
cur_cond_el = soup.select_one(".current-weather")
cur_icon_el = soup.select_one(".forecast-bigicon")
cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True) if cur_temp_el else "")
cur_cond = cur_cond_el.get_text(strip=True) if cur_cond_el else ""
cur_icon = _abs_url(cur_icon_el.get("src") if cur_icon_el else None)
# Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card)
hourly: List[Dict[str, Any]] = []
for card in soup.select(".ik.hourly-forecast-card")[:8]:
t_el = card.select_one(".ik.hourly-forecast-hour")
temp_el = card.select_one(".ik.temperature-circled")
icon_el = card.select_one("img.ik.forecast-icon")
t = t_el.get_text(strip=True) if t_el else ""
temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else "")
icon = _abs_url(icon_el.get("src") if icon_el else None)
if t and temp is not None:
hourly.append(
{
"time": t, # e.g. "18:00"
"temp_c": temp, # e.g. -2
"icon_url": icon, # absolute URL
}
)
# Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol)
daily: List[Dict[str, Any]] = []
for col in soup.select(".ik.daily-forecast-container .ik.dailyForecastCol")[:15]:
dow_el = col.select_one(".ik.dfDay")
icon_el = col.select_one("img.ik.forecast-icon")
daynum_el = col.select_one(".ik.dfDayNum")
# Normal structure (most days)
tmax_el = col.select_one("div.ik.max")
tmin_el = col.select_one("div.ik.min")
daynum = daynum_el.get_text(strip=True) if daynum_el else ""
dow = dow_el.get_text(strip=True) if dow_el else ""
icon = _abs_url(icon_el.get("src") if icon_el else None)
tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else "")
tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else "")
# Fallback structure (e.g. "vacation" days) where div.ik.max/min are missing
# In those cases the visible temps are usually the first two numeric <a> texts
# inside .ik.min-max-container (order: max, min).
if tmax is None or tmin is None:
vals: List[str] = []
for a in col.select(".ik.min-max-container a"):
txt = a.get_text(strip=True)
if re.fullmatch(r"-?\d+", txt or ""):
vals.append(txt)
if len(vals) >= 2:
tmax = _to_int_temp(vals[0])
tmin = _to_int_temp(vals[1])
# Keep only rows that look valid
if dow and (tmin is not None) and (tmax is not None):
daily.append(
{
"daynum": daynum,
"dow": dow, # e.g. "Cs", "P", "Sz"
"tmin_c": tmin,
"tmax_c": tmax,
"icon_url": icon,
}
)
# Limit to 5 days for your widget (first 5 columns in the table, including "vacation" days)
daily = daily[:5]
return {
"source": {"name": SOURCE_NAME, "url": IDOKEP_URL},
"location": {"name": PLACE_NAME},
"current": {"temp_c": cur_temp, "condition": cur_cond, "icon_url": cur_icon},
"hourly": hourly,
"daily": daily,
"fetched_at_unix": int(time.time()),
}
@APP.get("/api")
def api():
status = "ok"
with SCRAPE_SECONDS.labels(place=PLACE_NAME).time():
try:
data = scrape()
except Exception:
status = "error"
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
raise
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
# Update Prometheus gauges (best-effort)
try:
if data.get("current", {}).get("temp_c") is not None:
CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data["current"]["temp_c"]))
for d in data.get("daily", []):
DAILY_TMIN.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmin_c"]))
DAILY_TMAX.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmax_c"]))
for h in data.get("hourly", []):
HOURLY_TEMP.labels(place=PLACE_NAME, time=h["time"]).set(float(h["temp_c"]))
except Exception:
pass
# IMPORTANT: force JSON content-type so Glance exposes `.JSON`
import json
return Response(content=json.dumps(data, ensure_ascii=False), media_type="application/json; charset=utf-8")
@APP.get("/metrics")
def metrics():
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
---
apiVersion: v1
kind: Service
metadata:
name: idokep-scraper
namespace: glance-system
spec:
selector:
app: idokep-scraper
ports:
- name: http
port: 8000
targetPort: 8000