added glance-helper
This commit is contained in:
@@ -0,0 +1,314 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: glance-helper-data
|
||||
namespace: glance-system
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 200Mi
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: glance-helper
|
||||
namespace: glance-system
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: glance-helper
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: glance-helper
|
||||
spec:
|
||||
containers:
|
||||
- name: glance-helper
|
||||
image: python:3.12-slim
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: IDOKEP_URL
|
||||
value: https://www.idokep.hu/idojaras/Budapest%20VII.%20ker
|
||||
- name: PLACE_NAME
|
||||
value: Budapest VII. ker
|
||||
- name: TANDOOR_INTERNAL_URL
|
||||
value: http://tandoor.tandoor-system.svc.cluster.local:8080
|
||||
- name: TANDOOR_PUBLIC_URL
|
||||
value: https://tandoor.dooplex.hu
|
||||
- name: TANDOOR_TOKEN
|
||||
value: 'tda_8a8b169c_5d1f_4962_83a2_0f2719c7d61a'
|
||||
- name: GLANCE_HELPER_PUBLIC_URL
|
||||
value: https://glance-helper.dooplex.hu
|
||||
- name: DATA_DIR
|
||||
value: /data
|
||||
- name: GLANCE_HELPER_KEY
|
||||
value: 'oplQqnLnJK2vErRVYJpvVUcSDBOSbCHZSbsYY2bwSifgTMfT'
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
command:
|
||||
- /bin/sh
|
||||
- -lc
|
||||
args:
|
||||
- 'pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client
|
||||
&&
|
||||
|
||||
python -c "import uvicorn; uvicorn.run(''app:APP'', host=''0.0.0.0'', port=8000)"
|
||||
|
||||
'
|
||||
volumeMounts:
|
||||
- name: app
|
||||
mountPath: /app
|
||||
- name: data
|
||||
mountPath: /data
|
||||
workingDir: /app
|
||||
volumes:
|
||||
- name: app
|
||||
configMap:
|
||||
name: glance-helper-app
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: glance-helper-data
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: glance-helper-app
|
||||
namespace: glance-system
|
||||
data:
|
||||
app.py: "import os\nimport time\nimport re\nfrom typing import List, Dict, Any,\
|
||||
\ Optional\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom fastapi import\
|
||||
\ FastAPI, Response, Request, HTTPException, Query\nfrom fastapi.responses import\
|
||||
\ JSONResponse, RedirectResponse\nfrom prometheus_client import Counter, Histogram,\
|
||||
\ Gauge, generate_latest, CONTENT_TYPE_LATEST\n\nAPP = FastAPI()\n\nIDOKEP_URL\
|
||||
\ = os.getenv(\n \"IDOKEP_URL\",\n \"https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker\"\
|
||||
,\n)\nPLACE_NAME = os.getenv(\"PLACE_NAME\", \"Budapest VIII. ker\")\nSOURCE_NAME\
|
||||
\ = \"Id\u0151k\xE9p\"\n\nUA = os.getenv(\n \"USER_AGENT\",\n \"Mozilla/5.0\
|
||||
\ (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari\",\n\
|
||||
)\n\n# Prometheus metrics (optional)\nSCRAPES = Counter(\"idokep_scrapes_total\"\
|
||||
, \"Total Id\u0151k\xE9p scrapes\", [\"place\", \"status\"])\nSCRAPE_SECONDS =\
|
||||
\ Histogram(\"idokep_scrape_seconds\", \"Id\u0151k\xE9p scrape duration in seconds\"\
|
||||
, [\"place\"])\nCURRENT_TEMP = Gauge(\"idokep_current_temp_c\", \"Current temperature\
|
||||
\ in Celsius\", [\"place\"])\nDAILY_TMIN = Gauge(\"idokep_daily_tmin_c\", \"Daily\
|
||||
\ minimum temperature in Celsius\", [\"place\", \"dow\"])\nDAILY_TMAX = Gauge(\"\
|
||||
idokep_daily_tmax_c\", \"Daily maximum temperature in Celsius\", [\"place\", \"\
|
||||
dow\"])\nHOURLY_TEMP = Gauge(\"idokep_hourly_temp_c\", \"Hourly temperature in\
|
||||
\ Celsius\", [\"place\", \"time\"])\n\n\ndef _abs_url(maybe_relative: Optional[str])\
|
||||
\ -> Optional[str]:\n if not maybe_relative:\n return None\n if maybe_relative.startswith(\"\
|
||||
http://\") or maybe_relative.startswith(\"https://\"):\n return maybe_relative\n\
|
||||
\ # Id\u0151k\xE9p uses /assets/... paths\n return \"https://www.idokep.hu\"\
|
||||
\ + maybe_relative\n\n\ndef _to_int_temp(s: str) -> Optional[float]:\n if not\
|
||||
\ s:\n return None\n s = s.strip().replace(\"\u02DAC\", \"\").replace(\"\
|
||||
\xB0C\", \"\").replace(\"\xB0\", \"\")\n try:\n return float(s)\n \
|
||||
\ except Exception:\n return None\n\n\ndef scrape() -> Dict[str, Any]:\n\
|
||||
\ headers = {\"User-Agent\": UA}\n r = requests.get(IDOKEP_URL, headers=headers,\
|
||||
\ timeout=15)\n r.raise_for_status()\n\n soup = BeautifulSoup(r.text, \"\
|
||||
html.parser\")\n\n # Current\n cur_temp_el = soup.select_one(\".current-temperature\"\
|
||||
)\n cur_cond_el = soup.select_one(\".current-weather\")\n cur_icon_el =\
|
||||
\ soup.select_one(\".forecast-bigicon\")\n\n cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True)\
|
||||
\ if cur_temp_el else \"\")\n cur_cond = cur_cond_el.get_text(strip=True) if\
|
||||
\ cur_cond_el else \"\"\n cur_icon = _abs_url(cur_icon_el.get(\"src\") if cur_icon_el\
|
||||
\ else None)\n\n # Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card)\n\
|
||||
\ hourly: List[Dict[str, Any]] = []\n for card in soup.select(\".ik.hourly-forecast-card\"\
|
||||
)[:8]:\n t_el = card.select_one(\".ik.hourly-forecast-hour\")\n \
|
||||
\ temp_el = card.select_one(\".ik.temperature-circled\")\n icon_el = card.select_one(\"\
|
||||
img.ik.forecast-icon\")\n\n t = t_el.get_text(strip=True) if t_el else\
|
||||
\ \"\"\n temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else\
|
||||
\ \"\")\n icon = _abs_url(icon_el.get(\"src\") if icon_el else None)\n\n\
|
||||
\ if t and temp is not None:\n hourly.append(\n \
|
||||
\ {\n \"time\": t, # e.g. \"18:00\"\n \
|
||||
\ \"temp_c\": temp, # e.g. -2\n \"\
|
||||
icon_url\": icon, # absolute URL\n }\n )\n\n\
|
||||
\ # Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol)\n\
|
||||
\ daily: List[Dict[str, Any]] = []\n for col in soup.select(\".ik.daily-forecast-container\
|
||||
\ .ik.dailyForecastCol\")[:15]:\n dow_el = col.select_one(\".ik.dfDay\"\
|
||||
)\n icon_el = col.select_one(\"img.ik.forecast-icon\")\n daynum_el\
|
||||
\ = col.select_one(\".ik.dfDayNum\")\n\n # Normal structure (most days)\n\
|
||||
\ tmax_el = col.select_one(\"div.ik.max\")\n tmin_el = col.select_one(\"\
|
||||
div.ik.min\")\n \n daynum = daynum_el.get_text(strip=True) if daynum_el\
|
||||
\ else \"\"\n dow = dow_el.get_text(strip=True) if dow_el else \"\"\n \
|
||||
\ icon = _abs_url(icon_el.get(\"src\") if icon_el else None)\n\n \
|
||||
\ tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else \"\")\n \
|
||||
\ tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else \"\")\n\
|
||||
\n # Fallback structure (e.g. \"vacation\" days) where div.ik.max/min are\
|
||||
\ missing\n # In those cases the visible temps are usually the first two\
|
||||
\ numeric <a> texts\n # inside .ik.min-max-container (order: max, min).\n\
|
||||
\ if tmax is None or tmin is None:\n vals: List[str] = []\n\
|
||||
\ for a in col.select(\".ik.min-max-container a\"):\n \
|
||||
\ txt = a.get_text(strip=True)\n if re.fullmatch(r\"-?\\d+\"\
|
||||
, txt or \"\"):\n vals.append(txt)\n\n if len(vals)\
|
||||
\ >= 2:\n tmax = _to_int_temp(vals[0])\n tmin =\
|
||||
\ _to_int_temp(vals[1])\n\n # Keep only rows that look valid\n if\
|
||||
\ dow and (tmin is not None) and (tmax is not None):\n daily.append(\n\
|
||||
\ {\n \"daynum\": daynum,\n \
|
||||
\ \"dow\": dow, # e.g. \"Cs\", \"P\", \"Sz\"\n \
|
||||
\ \"tmin_c\": tmin,\n \"tmax_c\": tmax,\n \
|
||||
\ \"icon_url\": icon,\n }\n )\n\n # Limit to 5\
|
||||
\ days for your widget (first 5 columns in the table, including \"vacation\" days)\n\
|
||||
\ daily = daily[:5]\n\n return {\n \"source\": {\"name\": SOURCE_NAME,\
|
||||
\ \"url\": IDOKEP_URL},\n \"location\": {\"name\": PLACE_NAME},\n \
|
||||
\ \"current\": {\"temp_c\": cur_temp, \"condition\": cur_cond, \"icon_url\"\
|
||||
: cur_icon},\n \"hourly\": hourly,\n \"daily\": daily,\n \
|
||||
\ \"fetched_at_unix\": int(time.time()),\n }\n\n\n@APP.get(\"/api\")\ndef api():\n\
|
||||
\ status = \"ok\"\n with SCRAPE_SECONDS.labels(place=PLACE_NAME).time():\n\
|
||||
\ try:\n data = scrape()\n except Exception:\n \
|
||||
\ status = \"error\"\n SCRAPES.labels(place=PLACE_NAME, status=status).inc()\n\
|
||||
\ raise\n\n SCRAPES.labels(place=PLACE_NAME, status=status).inc()\n\
|
||||
\n # Update Prometheus gauges (best-effort)\n try:\n if data.get(\"\
|
||||
current\", {}).get(\"temp_c\") is not None:\n CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data[\"\
|
||||
current\"][\"temp_c\"]))\n for d in data.get(\"daily\", []):\n \
|
||||
\ DAILY_TMIN.labels(place=PLACE_NAME, dow=d[\"dow\"]).set(float(d[\"tmin_c\"\
|
||||
]))\n DAILY_TMAX.labels(place=PLACE_NAME, dow=d[\"dow\"]).set(float(d[\"\
|
||||
tmax_c\"]))\n for h in data.get(\"hourly\", []):\n HOURLY_TEMP.labels(place=PLACE_NAME,\
|
||||
\ time=h[\"time\"]).set(float(h[\"temp_c\"]))\n except Exception:\n \
|
||||
\ pass\n\n # IMPORTANT: force JSON content-type so Glance exposes `.JSON`\n\
|
||||
\ import json\n return Response(content=json.dumps(data, ensure_ascii=False),\
|
||||
\ media_type=\"application/json; charset=utf-8\")\n\n\n@APP.get(\"/metrics\")\n\
|
||||
def metrics():\n return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)\n\
|
||||
\n\n# -------------------------------\n# Tandoor helpers\n# -------------------------------\n\
|
||||
def _today_str() -> str:\n # Use Europe/Budapest for \"day\" boundaries\n \
|
||||
\ return datetime.now(tz=ZoneInfo(\"Europe/Budapest\")).date().isoformat()\n\
|
||||
\ndef _load_json(path: Path, default):\n try:\n with path.open(\"r\"\
|
||||
, encoding=\"utf-8\") as f:\n return json.load(f)\n except Exception:\n\
|
||||
\ return default\n\ndef _save_json(path: Path, data) -> None:\n tmp\
|
||||
\ = path.with_suffix(path.suffix + \".tmp\")\n with tmp.open(\"w\", encoding=\"\
|
||||
utf-8\") as f:\n json.dump(data, f, ensure_ascii=False, indent=2)\n \
|
||||
\ tmp.replace(path)\n\ndef _tandoor_headers() -> Dict[str, str]:\n token =\
|
||||
\ os.getenv(\"TANDOOR_TOKEN\", \"\")\n if not token:\n return {\"Accept\"\
|
||||
: \"application/json\"}\n return {\"Accept\": \"application/json\", \"Authorization\"\
|
||||
: f\"Bearer {token}\"}\n\ndef _rewrite_to_public(maybe_url: Optional[str]) ->\
|
||||
\ Optional[str]:\n if not maybe_url:\n return None\n\n # Relative\
|
||||
\ path -> public\n if maybe_url.startswith(\"/\"):\n return TANDOOR_PUBLIC_URL\
|
||||
\ + maybe_url\n\n # If the API returns internal host URLs, rewrite scheme+host\
|
||||
\ to public\n try:\n u = urlparse(maybe_url)\n pub = urlparse(TANDOOR_PUBLIC_URL)\n\
|
||||
\ internal = urlparse(TANDOOR_INTERNAL_URL)\n if u.netloc and internal.netloc\
|
||||
\ and u.netloc == internal.netloc:\n u = u._replace(scheme=pub.scheme,\
|
||||
\ netloc=pub.netloc)\n return urlunparse(u)\n except Exception:\n\
|
||||
\ pass\n\n return maybe_url\n\ndef _fetch_recipes_flat() -> List[Dict[str,\
|
||||
\ Any]]:\n # Prefer /api/recipe/flat/ because it's already {id,name,image}\
|
||||
\ list\n flat_url = f\"{TANDOOR_INTERNAL_URL}/api/recipe/flat/\"\n r = requests.get(flat_url,\
|
||||
\ headers=_tandoor_headers(), timeout=15)\n if r.status_code == 200:\n \
|
||||
\ data = r.json()\n # Expected: list\n if isinstance(data, list):\n\
|
||||
\ out = []\n for x in data:\n out.append({\n\
|
||||
\ \"id\": int(x.get(\"id\", 0)),\n \"name\"\
|
||||
: str(x.get(\"name\", \"\")),\n \"image\": _rewrite_to_public(x.get(\"\
|
||||
image\")),\n })\n return [x for x in out if x[\"id\"\
|
||||
] and x[\"name\"]]\n\n # Fallback: paginated /api/recipe/\n list_url = f\"\
|
||||
{TANDOOR_INTERNAL_URL}/api/recipe/?page_size=250\"\n r = requests.get(list_url,\
|
||||
\ headers=_tandoor_headers(), timeout=15)\n r.raise_for_status()\n data\
|
||||
\ = r.json()\n items = data.get(\"results\", []) if isinstance(data, dict)\
|
||||
\ else []\n out = []\n for x in items:\n out.append({\n \
|
||||
\ \"id\": int(x.get(\"id\", 0)),\n \"name\": str(x.get(\"name\",\
|
||||
\ \"\")),\n \"image\": _rewrite_to_public(x.get(\"image\")),\n \
|
||||
\ })\n return [x for x in out if x[\"id\"] and x[\"name\"]]\n\ndef _get_cooked_for_today()\
|
||||
\ -> List[int]:\n today = _today_str()\n cooked = _load_json(COOKED_PATH,\
|
||||
\ {})\n ids = cooked.get(today, [])\n # normalize\n try:\n return\
|
||||
\ [int(i) for i in ids]\n except Exception:\n return []\n\ndef _set_cooked_today(ids:\
|
||||
\ List[int]) -> None:\n today = _today_str()\n cooked = _load_json(COOKED_PATH,\
|
||||
\ {})\n cooked[today] = sorted(list({int(i) for i in ids}))\n # Optional\
|
||||
\ cleanup: keep only last 14 days\n try:\n keys = sorted(cooked.keys())\n\
|
||||
\ if len(keys) > 14:\n for k in keys[:-14]:\n \
|
||||
\ cooked.pop(k, None)\n except Exception:\n pass\n _save_json(COOKED_PATH,\
|
||||
\ cooked)\n\ndef _get_picks_today() -> List[int]:\n today = _today_str()\n\
|
||||
\ picks = _load_json(PICKS_PATH, {})\n ids = picks.get(today, [])\n try:\n\
|
||||
\ return [int(i) for i in ids]\n except Exception:\n return []\n\
|
||||
\ndef _set_picks_today(ids: List[int]) -> None:\n today = _today_str()\n \
|
||||
\ picks = _load_json(PICKS_PATH, {})\n picks[today] = [int(i) for i in ids\
|
||||
\ if int(i) > 0]\n # cleanup old days\n try:\n keys = sorted(picks.keys())\n\
|
||||
\ if len(keys) > 14:\n for k in keys[:-14]:\n \
|
||||
\ picks.pop(k, None)\n except Exception:\n pass\n _save_json(PICKS_PATH,\
|
||||
\ picks)\n\ndef _ensure_daily_picks(recipes: List[Dict[str, Any]], count: int)\
|
||||
\ -> List[int]:\n cooked = set(_get_cooked_for_today())\n picks = _get_picks_today()\n\
|
||||
\n # Remove picks that are cooked today\n picks = [i for i in picks if i\
|
||||
\ not in cooked]\n\n # Top up to requested count if needed\n if len(picks)\
|
||||
\ < count:\n available = [r[\"id\"] for r in recipes if r[\"id\"] not in\
|
||||
\ cooked and r[\"id\"] not in picks]\n # If everything is cooked (or too\
|
||||
\ few recipes), allow repeats from all recipes\n if len(available) < (count\
|
||||
\ - len(picks)):\n available = [r[\"id\"] for r in recipes if r[\"\
|
||||
id\"] not in picks]\n\n need = max(0, count - len(picks))\n if need\
|
||||
\ > 0 and available:\n picks += random.sample(available, k=min(need,\
|
||||
\ len(available)))\n\n # If no picks yet (first call today), choose fresh\n\
|
||||
\ if not picks:\n available = [r[\"id\"] for r in recipes if r[\"id\"\
|
||||
] not in cooked]\n if not available:\n available = [r[\"id\"\
|
||||
] for r in recipes]\n picks = random.sample(available, k=min(count, len(available)))\n\
|
||||
\n _set_picks_today(picks)\n return picks\n\n@APP.get(\"/tandoor/daily\"\
|
||||
)\ndef tandoor_daily(count: int = Query(3, ge=1, le=10)):\n try:\n recipes\
|
||||
\ = _fetch_recipes_flat()\n except Exception as e:\n raise HTTPException(status_code=502,\
|
||||
\ detail=f\"Failed to fetch recipes from Tandoor: {e}\")\n\n if not recipes:\n\
|
||||
\ return JSONResponse({\"date\": _today_str(), \"total_recipes\": 0, \"\
|
||||
items\": []})\n\n ids = _ensure_daily_picks(recipes, count)\n by_id = {r[\"\
|
||||
id\"]: r for r in recipes}\n\n items = []\n for rid in ids:\n r =\
|
||||
\ by_id.get(rid)\n if not r:\n continue\n items.append({\n\
|
||||
\ \"id\": r[\"id\"],\n \"name\": r[\"name\"],\n \
|
||||
\ \"image\": r.get(\"image\"),\n \"url\": f\"{TANDOOR_PUBLIC_URL}/recipe/{r['id']}\"\
|
||||
,\n # state-changing endpoint requires key if set\n \"cook_url\"\
|
||||
: f\"{GLANCE_HELPER_PUBLIC_URL}/tandoor/cook?id={r['id']}\" + (f\"&key={GLANCE_HELPER_KEY}\"\
|
||||
\ if GLANCE_HELPER_KEY else \"\"),\n })\n\n return JSONResponse({\n\
|
||||
\ \"date\": _today_str(),\n \"total_recipes\": len(recipes),\n \
|
||||
\ \"items\": items,\n })\n\n@APP.get(\"/tandoor/cook\")\ndef tandoor_cook(\n\
|
||||
\ id: int = Query(..., ge=1),\n key: str = Query(\"\", alias=\"key\"),\n\
|
||||
\ redirect: str = Query(\"\", alias=\"redirect\")\n):\n # Protect state-changing\
|
||||
\ calls with a shared key (recommended)\n if GLANCE_HELPER_KEY and key != GLANCE_HELPER_KEY:\n\
|
||||
\ raise HTTPException(status_code=403, detail=\"Forbidden\")\n\n cooked\
|
||||
\ = set(_get_cooked_for_today())\n cooked.add(int(id))\n _set_cooked_today(list(cooked))\n\
|
||||
\n # Also remove from today's picks (so daily list can refill)\n picks =\
|
||||
\ [i for i in _get_picks_today() if i != int(id)]\n _set_picks_today(picks)\n\
|
||||
\n if redirect:\n return RedirectResponse(url=redirect, status_code=302)\n\
|
||||
\n return JSONResponse({\"ok\": True, \"date\": _today_str(), \"cooked_today\"\
|
||||
: sorted(list(cooked))})\n"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: idokep-scraper
|
||||
namespace: glance-system
|
||||
spec:
|
||||
selector:
|
||||
app: glance-helper
|
||||
ports:
|
||||
- name: http
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: glance-helper
|
||||
namespace: glance-system
|
||||
spec:
|
||||
selector:
|
||||
app: glance-helper
|
||||
ports:
|
||||
- name: http
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: glance-helper
|
||||
namespace: glance-system
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
external-dns.alpha.kubernetes.io/hostname: glance-helper.dooplex.hu,glance-helper.home
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: '"true"'
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: 10m
|
||||
spec:
|
||||
ingressClassName: nginx-internal
|
||||
rules:
|
||||
- host: glance-helper.dooplex.hu
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: glance-helper
|
||||
port:
|
||||
number: 8000
|
||||
tls:
|
||||
- hosts:
|
||||
- glance-helper.dooplex.hu
|
||||
secretName: glance-helper-tls
|
||||
@@ -1,238 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: idokep-scraper
|
||||
namespace: glance-system
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: idokep-scraper
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: idokep-scraper
|
||||
spec:
|
||||
containers:
|
||||
- name: idokep-scraper
|
||||
image: python:3.12-slim
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: IDOKEP_URL
|
||||
value: "https://www.idokep.hu/idojaras/Budapest%20VII.%20ker"
|
||||
- name: PLACE_NAME
|
||||
value: "Budapest VII. ker"
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
command: ["/bin/sh", "-lc"]
|
||||
args:
|
||||
- |
|
||||
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client &&
|
||||
python -c "import uvicorn; uvicorn.run('app:APP', host='0.0.0.0', port=8000)"
|
||||
volumeMounts:
|
||||
- name: app
|
||||
mountPath: /app
|
||||
workingDir: /app
|
||||
volumes:
|
||||
- name: app
|
||||
configMap:
|
||||
name: idokep-scraper-app
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: idokep-scraper-app
|
||||
namespace: glance-system
|
||||
data:
|
||||
app.py: |
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from fastapi import FastAPI, Response
|
||||
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
|
||||
|
||||
APP = FastAPI()
|
||||
|
||||
IDOKEP_URL = os.getenv(
|
||||
"IDOKEP_URL",
|
||||
"https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker",
|
||||
)
|
||||
PLACE_NAME = os.getenv("PLACE_NAME", "Budapest VIII. ker")
|
||||
SOURCE_NAME = "Időkép"
|
||||
|
||||
UA = os.getenv(
|
||||
"USER_AGENT",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari",
|
||||
)
|
||||
|
||||
# Prometheus metrics (optional)
|
||||
SCRAPES = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"])
|
||||
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
|
||||
CURRENT_TEMP = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
|
||||
DAILY_TMIN = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"])
|
||||
DAILY_TMAX = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
|
||||
HOURLY_TEMP = Gauge("idokep_hourly_temp_c", "Hourly temperature in Celsius", ["place", "time"])
|
||||
|
||||
|
||||
def _abs_url(maybe_relative: Optional[str]) -> Optional[str]:
|
||||
if not maybe_relative:
|
||||
return None
|
||||
if maybe_relative.startswith("http://") or maybe_relative.startswith("https://"):
|
||||
return maybe_relative
|
||||
# Időkép uses /assets/... paths
|
||||
return "https://www.idokep.hu" + maybe_relative
|
||||
|
||||
|
||||
def _to_int_temp(s: str) -> Optional[float]:
|
||||
if not s:
|
||||
return None
|
||||
s = s.strip().replace("˚C", "").replace("°C", "").replace("°", "")
|
||||
try:
|
||||
return float(s)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def scrape() -> Dict[str, Any]:
|
||||
headers = {"User-Agent": UA}
|
||||
r = requests.get(IDOKEP_URL, headers=headers, timeout=15)
|
||||
r.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
# Current
|
||||
cur_temp_el = soup.select_one(".current-temperature")
|
||||
cur_cond_el = soup.select_one(".current-weather")
|
||||
cur_icon_el = soup.select_one(".forecast-bigicon")
|
||||
|
||||
cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True) if cur_temp_el else "")
|
||||
cur_cond = cur_cond_el.get_text(strip=True) if cur_cond_el else ""
|
||||
cur_icon = _abs_url(cur_icon_el.get("src") if cur_icon_el else None)
|
||||
|
||||
# Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card)
|
||||
hourly: List[Dict[str, Any]] = []
|
||||
for card in soup.select(".ik.hourly-forecast-card")[:8]:
|
||||
t_el = card.select_one(".ik.hourly-forecast-hour")
|
||||
temp_el = card.select_one(".ik.temperature-circled")
|
||||
icon_el = card.select_one("img.ik.forecast-icon")
|
||||
|
||||
t = t_el.get_text(strip=True) if t_el else ""
|
||||
temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else "")
|
||||
icon = _abs_url(icon_el.get("src") if icon_el else None)
|
||||
|
||||
if t and temp is not None:
|
||||
hourly.append(
|
||||
{
|
||||
"time": t, # e.g. "18:00"
|
||||
"temp_c": temp, # e.g. -2
|
||||
"icon_url": icon, # absolute URL
|
||||
}
|
||||
)
|
||||
|
||||
# Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol)
|
||||
daily: List[Dict[str, Any]] = []
|
||||
for col in soup.select(".ik.daily-forecast-container .ik.dailyForecastCol")[:15]:
|
||||
dow_el = col.select_one(".ik.dfDay")
|
||||
icon_el = col.select_one("img.ik.forecast-icon")
|
||||
daynum_el = col.select_one(".ik.dfDayNum")
|
||||
|
||||
# Normal structure (most days)
|
||||
tmax_el = col.select_one("div.ik.max")
|
||||
tmin_el = col.select_one("div.ik.min")
|
||||
|
||||
daynum = daynum_el.get_text(strip=True) if daynum_el else ""
|
||||
dow = dow_el.get_text(strip=True) if dow_el else ""
|
||||
icon = _abs_url(icon_el.get("src") if icon_el else None)
|
||||
|
||||
tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else "")
|
||||
tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else "")
|
||||
|
||||
# Fallback structure (e.g. "vacation" days) where div.ik.max/min are missing
|
||||
# In those cases the visible temps are usually the first two numeric <a> texts
|
||||
# inside .ik.min-max-container (order: max, min).
|
||||
if tmax is None or tmin is None:
|
||||
vals: List[str] = []
|
||||
for a in col.select(".ik.min-max-container a"):
|
||||
txt = a.get_text(strip=True)
|
||||
if re.fullmatch(r"-?\d+", txt or ""):
|
||||
vals.append(txt)
|
||||
|
||||
if len(vals) >= 2:
|
||||
tmax = _to_int_temp(vals[0])
|
||||
tmin = _to_int_temp(vals[1])
|
||||
|
||||
# Keep only rows that look valid
|
||||
if dow and (tmin is not None) and (tmax is not None):
|
||||
daily.append(
|
||||
{
|
||||
"daynum": daynum,
|
||||
"dow": dow, # e.g. "Cs", "P", "Sz"
|
||||
"tmin_c": tmin,
|
||||
"tmax_c": tmax,
|
||||
"icon_url": icon,
|
||||
}
|
||||
)
|
||||
|
||||
# Limit to 5 days for your widget (first 5 columns in the table, including "vacation" days)
|
||||
daily = daily[:5]
|
||||
|
||||
return {
|
||||
"source": {"name": SOURCE_NAME, "url": IDOKEP_URL},
|
||||
"location": {"name": PLACE_NAME},
|
||||
"current": {"temp_c": cur_temp, "condition": cur_cond, "icon_url": cur_icon},
|
||||
"hourly": hourly,
|
||||
"daily": daily,
|
||||
"fetched_at_unix": int(time.time()),
|
||||
}
|
||||
|
||||
|
||||
@APP.get("/api")
|
||||
def api():
|
||||
status = "ok"
|
||||
with SCRAPE_SECONDS.labels(place=PLACE_NAME).time():
|
||||
try:
|
||||
data = scrape()
|
||||
except Exception:
|
||||
status = "error"
|
||||
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
|
||||
raise
|
||||
|
||||
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
|
||||
|
||||
# Update Prometheus gauges (best-effort)
|
||||
try:
|
||||
if data.get("current", {}).get("temp_c") is not None:
|
||||
CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data["current"]["temp_c"]))
|
||||
for d in data.get("daily", []):
|
||||
DAILY_TMIN.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmin_c"]))
|
||||
DAILY_TMAX.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmax_c"]))
|
||||
for h in data.get("hourly", []):
|
||||
HOURLY_TEMP.labels(place=PLACE_NAME, time=h["time"]).set(float(h["temp_c"]))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# IMPORTANT: force JSON content-type so Glance exposes `.JSON`
|
||||
import json
|
||||
return Response(content=json.dumps(data, ensure_ascii=False), media_type="application/json; charset=utf-8")
|
||||
|
||||
|
||||
@APP.get("/metrics")
|
||||
def metrics():
|
||||
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: idokep-scraper
|
||||
namespace: glance-system
|
||||
spec:
|
||||
selector:
|
||||
app: idokep-scraper
|
||||
ports:
|
||||
- name: http
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
Reference in New Issue
Block a user