This commit is contained in:
2026-01-14 15:40:16 +01:00
parent eb84dbc387
commit 9da7594052
+106 -39
View File
@@ -12,22 +12,31 @@ data:
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from fastapi import FastAPI, Query from fastapi import FastAPI, Query, Response
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from prometheus_client import Gauge, Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
app = FastAPI() app = FastAPI()
IDOKEP_BASE = "https://www.idokep.hu" IDOKEP_BASE = "https://www.idokep.hu"
DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VII. ker") DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VIII. ker")
USER_AGENT = os.getenv( USER_AGENT = os.getenv(
"IDOKEP_UA", "IDOKEP_UA",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
) )
# simple in-memory cache: {place: (expires_epoch, payload)} CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "900")) # 15 minutes
CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "600")) # 10 min
_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {} _cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
# --- Prometheus metrics (low-cardinality, place as label) ---
SCRAPES_TOTAL = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"])
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
CURRENT_TEMP_C = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
DAILY_TMIN_C = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"])
DAILY_TMAX_C = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
DAILY_PREC_MM = Gauge("idokep_daily_precip_mm", "Daily precipitation in mm", ["place", "dow"])
def _num(s: str) -> Optional[float]: def _num(s: str) -> Optional[float]:
if s is None: if s is None:
return None return None
@@ -49,11 +58,9 @@ data:
return el.get_text(" ", strip=True) return el.get_text(" ", strip=True)
def _fetch_place_html(place: str) -> str: def _fetch_place_html(place: str) -> str:
# Időkép uses the place slug in path; requests will percent-encode automatically if we build it carefully.
# We'll do a conservative encoding by replacing spaces with %20, keep UTF-8.
place_path = requests.utils.requote_uri(place) place_path = requests.utils.requote_uri(place)
url = f"{IDOKEP_BASE}/idojaras/{place_path}" url = f"{IDOKEP_BASE}/idojaras/{place_path}"
r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=15) r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=20)
r.raise_for_status() r.raise_for_status()
return r.text return r.text
@@ -70,21 +77,17 @@ data:
cond_hu_el = soup.select_one(".weather-short-desc") cond_hu_el = soup.select_one(".weather-short-desc")
condition_hu = _pick_text(cond_hu_el) condition_hu = _pick_text(cond_hu_el)
# HOURLY (take first 6) # HOURLY (first 6)
hourly_cards = soup.select(".new-hourly-forecast-card") hourly_cards = soup.select(".new-hourly-forecast-card")
hourly: List[Dict[str, Any]] = [] hourly: List[Dict[str, Any]] = []
for card in hourly_cards[:6]: for card in hourly_cards[:6]:
hour_el = card.select_one(".new-hourly-forecast-hour") hour_txt = _pick_text(card.select_one(".new-hourly-forecast-hour"))
hour_txt = _pick_text(hour_el)
htemp_el = card.select_one(".tempValue .hover-over") htemp_c = _num(_pick_text(card.select_one(".tempValue .hover-over")) or "")
htemp_c = _num(_pick_text(htemp_el) or "")
hicon_el = card.select_one(".forecast-icon") hicon_url = _abs_url((card.select_one(".forecast-icon") or {}).get("src")) if card.select_one(".forecast-icon") else None
hicon_url = _abs_url(hicon_el.get("src") if hicon_el else None)
hprec_el = card.select_one(".hourly-rain-chance a") hprec_pct = _num(_pick_text(card.select_one(".hourly-rain-chance a")) or "")
hprec_pct = _num(_pick_text(hprec_el) or "")
hourly.append( hourly.append(
{ {
@@ -95,33 +98,26 @@ data:
} }
) )
# DAILY (take next 5 columns; Időkép layout usually has an extra leading column, HA used nth-child(2) as day1) # DAILY (next 5, skip first like your HA template did)
daily_cols = soup.select(".dailyForecastCol") daily_cols = soup.select(".dailyForecastCol")
daily: List[Dict[str, Any]] = []
# Skip first column if it looks like a header-ish column; keep behavior close to your HA selectors.
cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols
daily_raw: List[Dict[str, Any]] = []
for col in cols[:5]: for col in cols[:5]:
dow_el = col.select_one(".dfDay") dow = _pick_text(col.select_one(".dfDay"))
dow = _pick_text(dow_el) daynum = _pick_text(col.select_one(".dfDayNum"))
daynum_el = col.select_one(".dfDayNum") dicon_url = _abs_url((col.select_one(".forecast") or {}).get("src")) if col.select_one(".forecast") else None
daynum = _pick_text(daynum_el)
dicon_el = col.select_one(".forecast") # various layouts: try a few
dicon_url = _abs_url(dicon_el.get("src") if dicon_el else None)
# max/min sometimes show in different wrappers; try a few options
max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)") max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)")
min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)") min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)")
tmax_c = _num(_pick_text(max_el) or "") tmax_c = _num(_pick_text(max_el) or "")
tmin_c = _num(_pick_text(min_el) or "") tmin_c = _num(_pick_text(min_el) or "")
mm_el = col.select_one(".mm") prec_mm = _num(_pick_text(col.select_one(".mm")) or "")
prec_mm = _num(_pick_text(mm_el) or "")
daily.append( daily_raw.append(
{ {
"dow": dow, # e.g. "Sze" "dow": dow, # e.g. "Sze"
"daynum": daynum, # e.g. "14" "daynum": daynum, # e.g. "14"
@@ -132,6 +128,28 @@ data:
} }
) )
# Compute weekly min/max for HA-like bars (left/width)
mins = [d["tmin_c"] for d in daily_raw if d.get("tmin_c") is not None]
maxs = [d["tmax_c"] for d in daily_raw if d.get("tmax_c") is not None]
week_min = min(mins) if mins else None
week_max = max(maxs) if maxs else None
denom = (week_max - week_min) if (week_min is not None and week_max is not None and week_max != week_min) else None
daily: List[Dict[str, Any]] = []
for d in daily_raw:
left = None
width = None
if denom is not None and d.get("tmin_c") is not None and d.get("tmax_c") is not None:
left = ((d["tmin_c"] - week_min) / denom) * 100.0
width = ((d["tmax_c"] - d["tmin_c"]) / denom) * 100.0
# clamp for safety
left = max(0.0, min(100.0, left))
width = max(1.0, min(100.0, width))
d2 = dict(d)
d2["bar_left_pct"] = left
d2["bar_width_pct"] = width
daily.append(d2)
return { return {
"source": { "source": {
"name": "Időkép", "name": "Időkép",
@@ -145,20 +163,59 @@ data:
}, },
"hourly": hourly, "hourly": hourly,
"daily": daily, "daily": daily,
"weekly": {
"tmin_c": week_min,
"tmax_c": week_max,
},
"fetched_at_unix": int(time.time()), "fetched_at_unix": int(time.time()),
} }
@app.get("/api/idokep") @app.get("/api/idokep")
def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as used in the /idojaras/<place> URL")): def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as in /idojaras/<place>")):
now = time.time() now = time.time()
cached = _cache.get(place) cached = _cache.get(place)
if cached and cached[0] > now: if cached and cached[0] > now:
return JSONResponse(cached[1]) return JSONResponse(cached[1])
html = _fetch_place_html(place) with SCRAPE_SECONDS.labels(place=place).time():
payload = _parse_idokep(html, place) try:
_cache[place] = (now + CACHE_TTL_SEC, payload) html = _fetch_place_html(place)
return JSONResponse(payload) payload = _parse_idokep(html, place)
_cache[place] = (now + CACHE_TTL_SEC, payload)
# update metrics (best-effort)
t = payload.get("current", {}).get("temp_c")
if t is not None:
CURRENT_TEMP_C.labels(place=place).set(float(t))
for d in payload.get("daily", []):
dow = d.get("dow") or "?"
if d.get("tmin_c") is not None:
DAILY_TMIN_C.labels(place=place, dow=dow).set(float(d["tmin_c"]))
if d.get("tmax_c") is not None:
DAILY_TMAX_C.labels(place=place, dow=dow).set(float(d["tmax_c"]))
if d.get("prec_mm") is not None:
DAILY_PREC_MM.labels(place=place, dow=dow).set(float(d["prec_mm"]))
SCRAPES_TOTAL.labels(place=place, status="ok").inc()
return JSONResponse(payload)
except Exception:
SCRAPES_TOTAL.labels(place=place, status="error").inc()
# return a structured error Glance can show
return JSONResponse(
{
"place": place,
"error": "Failed to scrape Időkép. Check the place string or Időkép page layout changes.",
"fetched_at_unix": int(time.time()),
},
status_code=502,
)
@app.get("/metrics")
def metrics():
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
--- ---
apiVersion: apps/v1 apiVersion: apps/v1
@@ -175,6 +232,10 @@ spec:
metadata: metadata:
labels: labels:
app: idokep-proxy app: idokep-proxy
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
prometheus.io/path: "/metrics"
spec: spec:
containers: containers:
- name: idokep-proxy - name: idokep-proxy
@@ -183,9 +244,15 @@ spec:
- containerPort: 8000 - containerPort: 8000
env: env:
- name: IDOKEP_PLACE - name: IDOKEP_PLACE
value: "Budapest VII. ker" value: "Budapest VIII. ker"
- name: CACHE_TTL_SEC - name: CACHE_TTL_SEC
value: "900" # 15 minutes, matches your HA scan_interval philosophy value: "900"
resources:
requests:
cpu: 25m
memory: 128Mi
limits:
memory: 256Mi
volumeMounts: volumeMounts:
- name: app - name: app
mountPath: /app mountPath: /app
@@ -193,7 +260,7 @@ spec:
command: ["/bin/sh","-lc"] command: ["/bin/sh","-lc"]
args: args:
- | - |
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml && pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml prometheus_client &&
uvicorn app:app --host 0.0.0.0 --port 8000 uvicorn app:app --host 0.0.0.0 --port 8000
volumes: volumes:
- name: app - name: app