Files
homelab-manifests/glance-system/glance-helper.yaml
T
2026-01-15 12:01:23 +01:00

476 lines
17 KiB
YAML

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: glance-helper-data
namespace: glance-system
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 200Mi
---
apiVersion: v1
kind: ConfigMap
metadata:
name: glance-helper-app
namespace: glance-system
data:
app.py: |-
import os
import time
import re
from typing import List, Dict, Any, Optional
import requests
from bs4 import BeautifulSoup
from fastapi import FastAPI, Response
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
APP = FastAPI()
IDOKEP_URL = os.getenv(
"IDOKEP_URL",
"https://www.idokep.hu/idojaras/Budapest%20VII.%20ker",
)
PLACE_NAME = os.getenv("PLACE_NAME", "Budapest VII. ker")
SOURCE_NAME = "Időkép"
UA = os.getenv(
"USER_AGENT",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari",
)
# Prometheus metrics (optional)
SCRAPES = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"])
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
CURRENT_TEMP = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
DAILY_TMIN = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"])
DAILY_TMAX = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
HOURLY_TEMP = Gauge("idokep_hourly_temp_c", "Hourly temperature in Celsius", ["place", "time"])
def _abs_url(maybe_relative: Optional[str]) -> Optional[str]:
if not maybe_relative:
return None
if maybe_relative.startswith("http://") or maybe_relative.startswith("https://"):
return maybe_relative
# Időkép uses /assets/... paths
return "https://www.idokep.hu" + maybe_relative
def _to_int_temp(s: str) -> Optional[float]:
if not s:
return None
s = s.strip().replace("˚C", "").replace("°C", "").replace("°", "")
try:
return float(s)
except Exception:
return None
def scrape() -> Dict[str, Any]:
headers = {"User-Agent": UA}
r = requests.get(IDOKEP_URL, headers=headers, timeout=15)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
# Current
cur_temp_el = soup.select_one(".current-temperature")
cur_cond_el = soup.select_one(".current-weather")
cur_icon_el = soup.select_one(".forecast-bigicon")
cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True) if cur_temp_el else "")
cur_cond = cur_cond_el.get_text(strip=True) if cur_cond_el else ""
cur_icon = _abs_url(cur_icon_el.get("src") if cur_icon_el else None)
# Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card)
hourly: List[Dict[str, Any]] = []
for card in soup.select(".ik.hourly-forecast-card")[:8]:
t_el = card.select_one(".ik.hourly-forecast-hour")
temp_el = card.select_one(".ik.temperature-circled")
icon_el = card.select_one("img.ik.forecast-icon")
t = t_el.get_text(strip=True) if t_el else ""
temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else "")
icon = _abs_url(icon_el.get("src") if icon_el else None)
if t and temp is not None:
hourly.append(
{
"time": t, # e.g. "18:00"
"temp_c": temp, # e.g. -2
"icon_url": icon, # absolute URL
}
)
# Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol)
daily: List[Dict[str, Any]] = []
for col in soup.select(".ik.daily-forecast-container .ik.dailyForecastCol")[:15]:
dow_el = col.select_one(".ik.dfDay")
icon_el = col.select_one("img.ik.forecast-icon")
daynum_el = col.select_one(".ik.dfDayNum")
# Normal structure (most days)
tmax_el = col.select_one("div.ik.max")
tmin_el = col.select_one("div.ik.min")
daynum = daynum_el.get_text(strip=True) if daynum_el else ""
dow = dow_el.get_text(strip=True) if dow_el else ""
icon = _abs_url(icon_el.get("src") if icon_el else None)
tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else "")
tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else "")
# Fallback structure (e.g. "vacation" days) where div.ik.max/min are missing
# In those cases the visible temps are usually the first two numeric <a> texts
# inside .ik.min-max-container (order: max, min).
if tmax is None or tmin is None:
vals: List[str] = []
for a in col.select(".ik.min-max-container a"):
txt = a.get_text(strip=True)
if re.fullmatch(r"-?\d+", txt or ""):
vals.append(txt)
if len(vals) >= 2:
tmax = _to_int_temp(vals[0])
tmin = _to_int_temp(vals[1])
# Keep only rows that look valid
if dow and (tmin is not None) and (tmax is not None):
daily.append(
{
"daynum": daynum,
"dow": dow, # e.g. "Cs", "P", "Sz"
"tmin_c": tmin,
"tmax_c": tmax,
"icon_url": icon,
}
)
# Limit to 5 days for your widget (first 5 columns in the table, including "vacation" days)
daily = daily[:5]
return {
"source": {"name": SOURCE_NAME, "url": IDOKEP_URL},
"location": {"name": PLACE_NAME},
"current": {"temp_c": cur_temp, "condition": cur_cond, "icon_url": cur_icon},
"hourly": hourly,
"daily": daily,
"fetched_at_unix": int(time.time()),
}
@APP.get("/api")
def api():
status = "ok"
with SCRAPE_SECONDS.labels(place=PLACE_NAME).time():
try:
data = scrape()
except Exception:
status = "error"
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
raise
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
# Update Prometheus gauges (best-effort)
try:
if data.get("current", {}).get("temp_c") is not None:
CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data["current"]["temp_c"]))
for d in data.get("daily", []):
DAILY_TMIN.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmin_c"]))
DAILY_TMAX.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmax_c"]))
for h in data.get("hourly", []):
HOURLY_TEMP.labels(place=PLACE_NAME, time=h["time"]).set(float(h["temp_c"]))
except Exception:
pass
# IMPORTANT: force JSON content-type so Glance exposes `.JSON`
import json
return Response(content=json.dumps(data, ensure_ascii=False), media_type="application/json; charset=utf-8")
@APP.get("/metrics")
def metrics():
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
ersion: v1
: Service
data:
me: idokep-scraper
mespace: glance-system
:
lector:
app: idokep-scraper
rts:
- name: http
port: 8000
# -------------------------
# Tandoor "Meal of the Day"
# -------------------------
from datetime import datetime, timezone
from zoneinfo import ZoneInfo
from urllib.parse import urlencode
from fastapi import HTTPException, Query
from fastapi.responses import RedirectResponse
import json
import random
from pathlib import Path
TANDOOR_INTERNAL_URL = os.getenv("TANDOOR_INTERNAL_URL", "").rstrip("/")
TANDOOR_PUBLIC_URL = os.getenv("TANDOOR_PUBLIC_URL", "").rstrip("/")
GLANCE_HELPER_PUBLIC_URL = os.getenv("GLANCE_HELPER_PUBLIC_URL", "").rstrip("/")
GLANCE_HELPER_KEY = os.getenv("GLANCE_HELPER_KEY", "")
DATA_DIR = Path(os.getenv("DATA_DIR", "/data"))
DATA_DIR.mkdir(parents=True, exist_ok=True)
COOKED_PATH = DATA_DIR / "tandoor-cooked.json"
PICKS_PATH = DATA_DIR / "tandoor-picks.json"
def _today_str() -> str:
"""YYYY-MM-DD in Europe/Budapest if tzdata exists, else UTC."""
try:
tz = ZoneInfo("Europe/Budapest")
return datetime.now(tz).strftime("%Y-%m-%d")
except Exception:
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
def _load_json(path: Path, default):
try:
if path.exists():
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
pass
return default
def _save_json(path: Path, obj) -> None:
tmp = path.with_suffix(path.suffix + ".tmp")
tmp.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
tmp.replace(path)
def _tandoor_headers():
token = os.getenv("TANDOOR_TOKEN", "")
if not token:
raise HTTPException(status_code=500, detail="TANDOOR_TOKEN is not set")
return {"Authorization": f"Bearer {token}", "Accept": "application/json"}
def _rewrite_to_public(url: str) -> str:
"""Turn internal URLs into public ones (images/links)."""
if not url:
return url
if TANDOOR_PUBLIC_URL and TANDOOR_INTERNAL_URL and url.startswith(TANDOOR_INTERNAL_URL):
return TANDOOR_PUBLIC_URL + url[len(TANDOOR_INTERNAL_URL):]
return url
def _fetch_all_recipes() -> list[dict]:
if not TANDOOR_INTERNAL_URL:
raise HTTPException(status_code=500, detail="TANDOOR_INTERNAL_URL is not set")
# Prefer paginated /api/recipe/
url = f"{TANDOOR_INTERNAL_URL}/api/recipe/?page_size=200"
out = []
for _ in range(100): # safety
r = requests.get(url, headers=_tandoor_headers(), timeout=15)
if r.status_code != 200:
raise HTTPException(status_code=502, detail=f"Tandoor returned {r.status_code}: {r.text[:200]}")
j = r.json()
results = j.get("results", [])
out.extend(results)
url = j.get("next")
if not url:
break
# `next` may be absolute internal URL already; keep as-is.
return out
def _compute_daily_picks(count: int) -> dict:
today = _today_str()
cooked = _load_json(COOKED_PATH, {})
cooked_today = set(cooked.get(today, []))
picks_doc = _load_json(PICKS_PATH, {})
if picks_doc.get("date") == today and isinstance(picks_doc.get("ids"), list) and picks_doc.get("count") == count:
return picks_doc # stable for the day
recipes = _fetch_all_recipes()
# Make a stable deterministic base random seed once, but still "true daily random":
# created once per day and stored.
available = [r for r in recipes if r.get("id") not in cooked_today]
if not available:
available = recipes[:] # fallback: if everything cooked, ignore filter
chosen = random.sample(available, k=min(count, len(available))) if available else []
picks_doc = {"date": today, "count": count, "ids": [r.get("id") for r in chosen if r.get("id") is not None]}
_save_json(PICKS_PATH, picks_doc)
return picks_doc
def _build_items_from_ids(ids: list[int]) -> tuple[list[dict], int]:
recipes = _fetch_all_recipes()
by_id = {r.get("id"): r for r in recipes if r.get("id") is not None}
items = []
for rid in ids:
r = by_id.get(rid)
if not r:
continue
img = _rewrite_to_public(r.get("image") or "")
url = f"{TANDOOR_PUBLIC_URL}/recipe/{rid}" if TANDOOR_PUBLIC_URL else ""
cook_params = {"id": rid}
if GLANCE_HELPER_KEY:
cook_params["key"] = GLANCE_HELPER_KEY
cook_url = f"{GLANCE_HELPER_PUBLIC_URL}/tandoor/cook?{urlencode(cook_params)}" if GLANCE_HELPER_PUBLIC_URL else ""
items.append({"id": rid, "name": r.get("name") or "", "image": img, "url": url, "cook_url": cook_url})
return items, len(recipes)
@APP.get("/tandoor/daily")
def tandoor_daily(count: int = Query(3, ge=1, le=10)):
picks_doc = _compute_daily_picks(count)
ids = picks_doc.get("ids", [])
items, total = _build_items_from_ids(ids)
return {"date": picks_doc.get("date"), "total_recipes": total, "items": items}
@APP.get("/tandoor/cook")
def tandoor_cook(id: int, key: str | None = None, redirect: str | None = None):
if GLANCE_HELPER_KEY and key != GLANCE_HELPER_KEY:
raise HTTPException(status_code=403, detail="Invalid key")
today = _today_str()
cooked = _load_json(COOKED_PATH, {})
cooked_today = set(cooked.get(today, []))
cooked_today.add(id)
cooked[today] = sorted(list(cooked_today))
_save_json(COOKED_PATH, cooked)
# Remove from today's picks and try to refill to keep count
picks = _load_json(PICKS_PATH, {})
if picks.get("date") == today and isinstance(picks.get("ids"), list):
ids = [x for x in picks["ids"] if x != id]
target = int(picks.get("count") or len(ids))
if len(ids) < target:
recipes = _fetch_all_recipes()
avoid = set(ids) | cooked_today
candidates = [r.get("id") for r in recipes if r.get("id") is not None and r.get("id") not in avoid]
if candidates:
ids.append(random.choice(candidates))
picks["ids"] = ids[:target]
_save_json(PICKS_PATH, picks)
if redirect:
return RedirectResponse(url=redirect, status_code=302)
return {"ok": True, "date": today, "cooked": id}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: glance-helper
namespace: glance-system
labels:
app.kubernetes.io/name: glance-helper
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: glance-helper
template:
metadata:
labels:
app.kubernetes.io/name: glance-helper
spec:
containers:
- name: glance-helper
image: python:3.12-bookworm
ports:
- containerPort: 8000
env:
- name: IDOKEP_URL
value: "https://www.idokep.hu/idojaras/Budapest%20VII.%20ker"
- name: PLACE_NAME
value: "Budapest VII. ker"
- name: TZ
value: "Europe/Budapest"
- name: TANDOOR_INTERNAL_URL
value: "http://tandoor.tandoor-system.svc.cluster.local:8080"
- name: TANDOOR_PUBLIC_URL
value: "https://tandoor.dooplex.hu"
- name: GLANCE_HELPER_PUBLIC_URL
value: "https://glance-helper.dooplex.hu"
- name: GLANCE_HELPER_KEY
value: "oplQqnLnJK2vErRVYJpvVUcSDBOSbCHZSbsYY2bwSifgTMfT"
- name: DATA_DIR
value: "/data"
- name: TANDOOR_TOKEN
value: "tda_8a8b169c_5d1f_4962_83a2_0f2719c7d61a"
volumeMounts:
- name: app
mountPath: /app
- name: data
mountPath: /data
command: ["/bin/bash","-lc"]
args:
- |
set -e
apt-get update
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends curl tzdata dnsutils iputils-ping
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus_client
cd /app
uvicorn app:APP --host 0.0.0.0 --port 8000
volumes:
- name: app
configMap:
name: glance-helper-app
- name: data
persistentVolumeClaim:
claimName: glance-helper-data
---
apiVersion: v1
kind: Service
metadata:
name: glance-helper
namespace: glance-system
spec:
selector:
app.kubernetes.io/name: glance-helper
ports:
- name: http
port: 8000
targetPort: 8000
---
apiVersion: v1
kind: Service
metadata:
name: idokep-scraper
namespace: glance-system
spec:
selector:
app.kubernetes.io/name: glance-helper
ports:
- name: http
port: 8000
targetPort: 8000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: glance-helper
namespace: glance-system
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
external-dns.alpha.kubernetes.io/hostname: glance-helper.dooplex.hu
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
ingressClassName: nginx-internal
rules:
- host: glance-helper.dooplex.hu
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: glance-helper
port:
number: 8000
tls:
- hosts:
- glance-helper.dooplex.hu
secretName: glance-helper-tls