Files
homelab-manifests/glance-system/glance-helper.yaml
T
2026-01-15 11:41:44 +01:00

564 lines
20 KiB
YAML

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: glance-helper-data
namespace: glance-system
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 200Mi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: glance-helper
namespace: glance-system
spec:
replicas: 1
selector:
matchLabels:
app: glance-helper
template:
metadata:
labels:
app: glance-helper
spec:
containers:
- name: glance-helper
image: python:3.12-bookworm
imagePullPolicy: IfNotPresent
env:
- name: IDOKEP_URL
value: https://www.idokep.hu/idojaras/Budapest%20VII.%20ker
- name: PLACE_NAME
value: Budapest VII. ker
- name: TANDOOR_INTERNAL_URL
value: http://tandoor.tandoor-system.svc.cluster.local:8080
- name: TANDOOR_PUBLIC_URL
value: https://tandoor.dooplex.hu
- name: TANDOOR_TOKEN
value: tda_8a8b169c_5d1f_4962_83a2_0f2719c7d61a
- name: GLANCE_HELPER_PUBLIC_URL
value: https://glance-helper.dooplex.hu
- name: DATA_DIR
value: /data
- name: GLANCE_HELPER_KEY
value: oplQqnLnJK2vErRVYJpvVUcSDBOSbCHZSbsYY2bwSifgTMfT
- name: TZ
value: Europe/Budapest
ports:
- containerPort: 8000
command:
- /bin/sh
- -lc
args:
- 'set -eux;
export DEBIAN_FRONTEND=noninteractive;
apt-get update;
apt-get install -y --no-install-recommends curl ca-certificates iputils-ping
dnsutils tzdata net-tools;
rm -rf /var/lib/apt/lists/*;
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client;
python -c "import uvicorn; uvicorn.run(''app:APP'', host=''0.0.0.0'', port=8000)"'
volumeMounts:
- name: app
mountPath: /app
- name: data
mountPath: /data
workingDir: /app
volumes:
- name: app
configMap:
name: glance-helper-app
- name: data
persistentVolumeClaim:
claimName: glance-helper-data
---
apiVersion: v1
kind: ConfigMap
metadata:
name: glance-helper-app
namespace: glance-system
data:
app.py: |-
import os
import time
import re
from typing import List, Dict, Any,\ Optional
from datetime import datetime, timezone
from zoneinfo import ZoneInfo
from urllib.parse import urlparse, urlunparse
import json
import random
from datetime import datetime
from\ zoneinfo import ZoneInfo
from pathlib import Path
from urllib.parse import\ urlparse, urlunparse
import requests
from bs4 import BeautifulSoup
from\ fastapi import FastAPI, Response, Request, HTTPException, Query
from fastapi.responses\ import JSONResponse, RedirectResponse
from prometheus_client import Counter,\ Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
APP = FastAPI()
IDOKEP_URL = os.getenv(
"IDOKEP_URL",
"https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker",
)
PLACE_NAME = os.getenv("PLACE_NAME", "Budapest VIII. ker")
SOURCE_NAME\ = "Időkép"
UA = os.getenv(
"USER_AGENT",
"Mozilla/5.0\ (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari",
)
# Glance-helper config
DATA_DIR = os.getenv("DATA_DIR", "/data")
TANDOOR_INTERNAL_URL\ = os.getenv("TANDOOR_INTERNAL_URL", "").rstrip("/")
TANDOOR_PUBLIC_URL\ = os.getenv("TANDOOR_PUBLIC_URL", "").rstrip("/")
GLANCE_HELPER_PUBLIC_URL\ = os.getenv("GLANCE_HELPER_PUBLIC_URL", "").rstrip("/")
GLANCE_HELPER_KEY\ = os.getenv("GLANCE_HELPER_KEY", "")
DATA_DIR = Path(os.getenv("DATA_DIR", "/data"))
DATA_DIR.mkdir(parents=True, exist_ok=True)
COOKED_PATH = DATA_DIR / "tandoor-cooked.json"
PICKS_PATH = DATA_DIR / "tandoor-picks.json"
# Prometheus metrics (optional)
SCRAPES = Counter("idokep_scrapes_total", "Total Időkép scrapes",\ ["place", "status"])
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
CURRENT_TEMP =\ Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
DAILY_TMIN = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in\ Celsius", ["place", "dow"])
DAILY_TMAX = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
HOURLY_TEMP\ = Gauge("idokep_hourly_temp_c", "Hourly temperature in Celsius", ["place", "time"])
def _abs_url(maybe_relative: Optional[str]) -> Optional[str]:
\ if not maybe_relative:
return None
if maybe_relative.startswith("http://") or maybe_relative.startswith("https://"):
return maybe_relative
\ # Időkép uses /assets/... paths
return "https://www.idokep.hu"\ + maybe_relative
def _to_int_temp(s: str) -> Optional[float]:
if not\ s:
return None
s = s.strip().replace("˚C", "").replace("°C", "").replace("°", "")
try:
return float(s)
\ except Exception:
return None
def scrape() -> Dict[str, Any]:
\ headers = {"User-Agent": UA}
r = requests.get(IDOKEP_URL, headers=headers,\ timeout=15)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
# Current
cur_temp_el = soup.select_one(".current-temperature")
cur_cond_el = soup.select_one(".current-weather")
cur_icon_el =\ soup.select_one(".forecast-bigicon")
cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True)\ if cur_temp_el else "")
cur_cond = cur_cond_el.get_text(strip=True) if\ cur_cond_el else ""
cur_icon = _abs_url(cur_icon_el.get("src") if cur_icon_el\ else None)
# Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card)
\ hourly: List[Dict[str, Any]] = []
for card in soup.select(".ik.hourly-forecast-card")[:8]:
t_el = card.select_one(".ik.hourly-forecast-hour")
\ temp_el = card.select_one(".ik.temperature-circled")
icon_el = card.select_one("img.ik.forecast-icon")
t = t_el.get_text(strip=True) if t_el else\ ""
temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else\ "")
icon = _abs_url(icon_el.get("src") if icon_el else None)
\ if t and temp is not None:
hourly.append(
\ {
"time": t, # e.g. "18:00"
\ "temp_c": temp, # e.g. -2
"icon_url": icon, # absolute URL
}
)
\ # Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol)
\ daily: List[Dict[str, Any]] = []
for col in soup.select(".ik.daily-forecast-container\ .ik.dailyForecastCol")[:15]:
dow_el = col.select_one(".ik.dfDay")
icon_el = col.select_one("img.ik.forecast-icon")
daynum_el\ = col.select_one(".ik.dfDayNum")
# Normal structure (most days)
\ tmax_el = col.select_one("div.ik.max")
tmin_el = col.select_one("div.ik.min")
daynum = daynum_el.get_text(strip=True) if daynum_el\ else ""
dow = dow_el.get_text(strip=True) if dow_el else ""
\ icon = _abs_url(icon_el.get("src") if icon_el else None)
\ tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else "")
\ tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else "")
# Fallback structure (e.g. "vacation" days) where div.ik.max/min are\ missing
# In those cases the visible temps are usually the first two\ numeric <a> texts
# inside .ik.min-max-container (order: max, min).
\ if tmax is None or tmin is None:
vals: List[str] = []
\ for a in col.select(".ik.min-max-container a"):
\ txt = a.get_text(strip=True)
if re.fullmatch(r"-?\d+", txt or ""):
vals.append(txt)
if len(vals)\ >= 2:
tmax = _to_int_temp(vals[0])
tmin =\ _to_int_temp(vals[1])
# Keep only rows that look valid
if\ dow and (tmin is not None) and (tmax is not None):
daily.append(
\ {
"daynum": daynum,
\ "dow": dow, # e.g. "Cs", "P", "Sz"
\ "tmin_c": tmin,
"tmax_c": tmax,
\ "icon_url": icon,
}
)
# Limit to 5\ days for your widget (first 5 columns in the table, including "vacation" days)
\ daily = daily[:5]
return {
"source": {"name": SOURCE_NAME,\ "url": IDOKEP_URL},
"location": {"name": PLACE_NAME},
\ "current": {"temp_c": cur_temp, "condition": cur_cond, "icon_url": cur_icon},
"hourly": hourly,
"daily": daily,
\ "fetched_at_unix": int(time.time()),
}
@APP.get("/api")
def api():
\ status = "ok"
with SCRAPE_SECONDS.labels(place=PLACE_NAME).time():
\ try:
data = scrape()
except Exception:
\ status = "error"
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
\ raise
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
# Update Prometheus gauges (best-effort)
try:
if data.get("current", {}).get("temp_c") is not None:
CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data["current"]["temp_c"]))
for d in data.get("daily", []):
\ DAILY_TMIN.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmin_c"]))
DAILY_TMAX.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmax_c"]))
for h in data.get("hourly", []):
HOURLY_TEMP.labels(place=PLACE_NAME,\ time=h["time"]).set(float(h["temp_c"]))
except Exception:
\ pass
# IMPORTANT: force JSON content-type so Glance exposes `.JSON`
\ import json
return Response(content=json.dumps(data, ensure_ascii=False),\ media_type="application/json; charset=utf-8")
@APP.get("/metrics")
def metrics():
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
# -------------------------------
# Tandoor helpers
# -------------------------------
def _today_str() -> str:
# Use Europe/Budapest for "day" boundaries (fallback\ to UTC if tzdata missing)
try:
return datetime.now(tz=ZoneInfo("Europe/Budapest")).date().isoformat()
except Exception:
return\ datetime.utcnow().date().isoformat()
def _load_json(path: Path, default):
\ try:
with path.open("r", encoding="utf-8") as f:
\ return json.load(f)
except Exception:
return default
def\ _save_json(path: Path, data) -> None:
tmp = path.with_suffix(path.suffix\ + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(data,\ f, ensure_ascii=False, indent=2)
tmp.replace(path)
def _tandoor_headers()\ -> Dict[str, str]:
token = os.getenv("TANDOOR_TOKEN", "")
if not\ token:
return {"Accept": "application/json"}
return {"Accept": "application/json", "Authorization": f"Bearer {token}"}
def _rewrite_to_public(maybe_url:\ Optional[str]) -> Optional[str]:
if not maybe_url:
return None
# Relative path -> public
if maybe_url.startswith("/"):
\ return TANDOOR_PUBLIC_URL + maybe_url
# If the API returns internal host\ URLs, rewrite scheme+host to public
try:
u = urlparse(maybe_url)
\ pub = urlparse(TANDOOR_PUBLIC_URL)
internal = urlparse(TANDOOR_INTERNAL_URL)
\ if u.netloc and internal.netloc and u.netloc == internal.netloc:
\ u = u._replace(scheme=pub.scheme, netloc=pub.netloc)
return\ urlunparse(u)
except Exception:
pass
return maybe_url
def _fetch_recipes_flat() -> List[Dict[str, Any]]:
# Prefer /api/recipe/flat/\ because it's already {id,name,image} list
flat_url = f"{TANDOOR_INTERNAL_URL}/api/recipe/flat/"
r = requests.get(flat_url, headers=_tandoor_headers(), timeout=15)
\ if r.status_code == 200:
data = r.json()
# Expected: list
\ if isinstance(data, list):
out = []
for x in\ data:
out.append({
"id": int(x.get("id", 0)),
"name": str(x.get("name", "")),
\ "image": _rewrite_to_public(x.get("image")),
\ })
return [x for x in out if x["id"] and x["name"]]
\ # Fallback: paginated /api/recipe/
list_url = f"{TANDOOR_INTERNAL_URL}/api/recipe/?page_size=250"
r = requests.get(list_url, headers=_tandoor_headers(), timeout=15)
\ r.raise_for_status()
data = r.json()
items = data.get("results", []) if isinstance(data, dict) else []
out = []
for x in items:
\ out.append({
"id": int(x.get("id", 0)),
\ "name": str(x.get("name", "")),
"image": _rewrite_to_public(x.get("image")),
})
return [x for x in out if x["id"] and x["name"]]
def _get_cooked_for_today() -> List[int]:
today = _today_str()
\ cooked = _load_json(COOKED_PATH, {})
ids = cooked.get(today, [])
\ # normalize
try:
return [int(i) for i in ids]
except Exception:
\ return []
def _set_cooked_today(ids: List[int]) -> None:
today\ = _today_str()
cooked = _load_json(COOKED_PATH, {})
cooked[today]\ = sorted(list({int(i) for i in ids}))
# Optional cleanup: keep only last\ 14 days
try:
keys = sorted(cooked.keys())
if len(keys)\ > 14:
for k in keys[:-14]:
cooked.pop(k, None)
\ except Exception:
pass
_save_json(COOKED_PATH, cooked)
def _get_picks_today() -> List[int]:
today = _today_str()
picks = _load_json(PICKS_PATH,\ {})
ids = picks.get(today, [])
try:
return [int(i) for i\ in ids]
except Exception:
return []
def _set_picks_today(ids:\ List[int]) -> None:
today = _today_str()
picks = _load_json(PICKS_PATH,\ {})
picks[today] = [int(i) for i in ids if int(i) > 0]
# cleanup old\ days
try:
keys = sorted(picks.keys())
if len(keys) >\ 14:
for k in keys[:-14]:
picks.pop(k, None)
\ except Exception:
pass
_save_json(PICKS_PATH, picks)
def\ _ensure_daily_picks(recipes: List[Dict[str, Any]], count: int) -> List[int]:
\ cooked = set(_get_cooked_for_today())
picks = _get_picks_today()
\ # Remove picks that are cooked today
picks = [i for i in picks if i\ not in cooked]
# Top up to requested count if needed
if len(picks)\ < count:
available = [r["id"] for r in recipes if r["id"] not in\ cooked and r["id"] not in picks]
# If everything is cooked (or too\ few recipes), allow repeats from all recipes
if len(available) < (count\ - len(picks)):
available = [r["id"] for r in recipes if r["id"] not in picks]
need = max(0, count - len(picks))
if need\ > 0 and available:
picks += random.sample(available, k=min(need,\ len(available)))
# If no picks yet (first call today), choose fresh
\ if not picks:
available = [r["id"] for r in recipes if r["id"] not in cooked]
if not available:
available = [r["id"] for r in recipes]
picks = random.sample(available, k=min(count, len(available)))
_set_picks_today(picks)
return picks
@APP.get("/tandoor/daily")
def tandoor_daily(count: int = Query(3, ge=1, le=10)):
try:
recipes\ = _fetch_recipes_flat()
except Exception as e:
raise HTTPException(status_code=502,\ detail=f"Failed to fetch recipes from Tandoor: {e}")
if not recipes:
\ return JSONResponse({"date": _today_str(), "total_recipes": 0, "items": []})
ids = _ensure_daily_picks(recipes, count)
by_id = {r["id"]: r for r in recipes}
items = []
for rid in ids:
r =\ by_id.get(rid)
if not r:
continue
items.append({
\ "id": r["id"],
"name": r["name"],
\ "image": r.get("image"),
"url": f"{TANDOOR_PUBLIC_URL}/recipe/{r['id']}",
# state-changing endpoint requires key if set
"cook_url": f"{GLANCE_HELPER_PUBLIC_URL}/tandoor/cook?id={r['id']}" + (f"&key={GLANCE_HELPER_KEY}"\ if GLANCE_HELPER_KEY else ""),
})
return JSONResponse({
\ "date": _today_str(),
"total_recipes": len(recipes),
\ "items": items,
})
@APP.get("/tandoor/cook")
def tandoor_cook(
\ id: int = Query(..., ge=1),
key: str = Query("", alias="key"),
\ redirect: str = Query("", alias="redirect")
):
# Protect state-changing\ calls with a shared key (recommended)
if GLANCE_HELPER_KEY and key != GLANCE_HELPER_KEY:
\ raise HTTPException(status_code=403, detail="Forbidden")
cooked\ = set(_get_cooked_for_today())
cooked.add(int(id))
_set_cooked_today(list(cooked))
# Also remove from today's picks (so daily list can refill)
picks =\ [i for i in _get_picks_today() if i != int(id)]
_set_picks_today(picks)
if redirect:
return RedirectResponse(url=redirect, status_code=302)
return JSONResponse({"ok": True, "date": _today_str(), "cooked_today": sorted(list(cooked))})
---
apiVersion: v1
kind: Service
metadata:
name: idokep-scraper
namespace: glance-system
spec:
selector:
app: glance-helper
ports:
- name: http
port: 8000
targetPort: 8000
---
apiVersion: v1
kind: Service
metadata:
name: glance-helper
namespace: glance-system
spec:
selector:
app: glance-helper
ports:
- name: http
port: 8000
targetPort: 8000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
argocd.argoproj.io/tracking-id: glance:networking.k8s.io/Ingress:glance-system/glance-helper
cert-manager.io/cluster-issuer: letsencrypt-prod
external-dns.alpha.kubernetes.io/hostname: glance-helper.dooplex.hu,glance-helper.home
nginx.ingress.kubernetes.io/proxy-body-size: 10m
nginx.ingress.kubernetes.io/ssl-redirect: '"true"'
name: glance-helper
namespace: glance-system
spec:
ingressClassName: nginx-internal
rules:
- host: glance-helper.dooplex.hu
http:
paths:
- backend:
service:
name: glance-helper
port:
number: 8000
path: /
pathType: Prefix
tls:
- hosts:
- glance-helper.dooplex.hu
secretName: glance-helper-tls
status:
loadBalancer:
ingress:
- ip: 192.168.0.192