wonder fi works
This commit is contained in:
+185
-246
@@ -1,283 +1,222 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: idokep-proxy
|
||||
namespace: glance-system
|
||||
data:
|
||||
app.py: |
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
from typing import Any, Dict, Optional, Tuple, List
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from fastapi import FastAPI, Query, Response
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from prometheus_client import Gauge, Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
IDOKEP_BASE = "https://www.idokep.hu"
|
||||
DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VIII. ker")
|
||||
USER_AGENT = os.getenv(
|
||||
"IDOKEP_UA",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
||||
)
|
||||
|
||||
CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "900")) # 15 minutes
|
||||
_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
|
||||
|
||||
# --- Prometheus metrics (low-cardinality, place as label) ---
|
||||
SCRAPES_TOTAL = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"])
|
||||
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
|
||||
CURRENT_TEMP_C = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
|
||||
DAILY_TMIN_C = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"])
|
||||
DAILY_TMAX_C = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
|
||||
DAILY_PREC_MM = Gauge("idokep_daily_precip_mm", "Daily precipitation in mm", ["place", "dow"])
|
||||
|
||||
def _num(s: str) -> Optional[float]:
|
||||
if s is None:
|
||||
return None
|
||||
m = re.search(r"-?\d+(\.\d+)?", s.replace(",", "."))
|
||||
return float(m.group(0)) if m else None
|
||||
|
||||
def _abs_url(url: Optional[str]) -> Optional[str]:
|
||||
if not url:
|
||||
return None
|
||||
if url.startswith("//"):
|
||||
return "https:" + url
|
||||
if url.startswith("/"):
|
||||
return IDOKEP_BASE + url
|
||||
return url
|
||||
|
||||
def _pick_text(el) -> Optional[str]:
|
||||
if not el:
|
||||
return None
|
||||
return el.get_text(" ", strip=True)
|
||||
|
||||
def _fetch_place_html(place: str) -> str:
|
||||
place_path = requests.utils.requote_uri(place)
|
||||
url = f"{IDOKEP_BASE}/idojaras/{place_path}"
|
||||
r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=20)
|
||||
r.raise_for_status()
|
||||
return r.text
|
||||
|
||||
def _parse_idokep(html: str, place: str) -> Dict[str, Any]:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
|
||||
# CURRENT
|
||||
temp_el = soup.select_one(".current-temperature")
|
||||
temp_c = _num(_pick_text(temp_el) or "")
|
||||
|
||||
icon_el = soup.select_one(".forecast-bigicon")
|
||||
icon_url = _abs_url(icon_el.get("src") if icon_el else None)
|
||||
|
||||
cond_hu_el = soup.select_one(".weather-short-desc")
|
||||
condition_hu = _pick_text(cond_hu_el)
|
||||
|
||||
# HOURLY (first 6)
|
||||
hourly_cards = soup.select(".new-hourly-forecast-card")
|
||||
hourly: List[Dict[str, Any]] = []
|
||||
for card in hourly_cards[:6]:
|
||||
hour_txt = _pick_text(card.select_one(".new-hourly-forecast-hour"))
|
||||
|
||||
htemp_c = _num(_pick_text(card.select_one(".tempValue .hover-over")) or "")
|
||||
|
||||
hicon_url = _abs_url((card.select_one(".forecast-icon") or {}).get("src")) if card.select_one(".forecast-icon") else None
|
||||
|
||||
hprec_pct = _num(_pick_text(card.select_one(".hourly-rain-chance a")) or "")
|
||||
|
||||
hourly.append(
|
||||
{
|
||||
"hour": hour_txt, # e.g. "15:00"
|
||||
"temp_c": htemp_c,
|
||||
"icon_url": hicon_url,
|
||||
"precip_pct": hprec_pct,
|
||||
}
|
||||
)
|
||||
|
||||
# DAILY (next 5, skip first like your HA template did)
|
||||
daily_cols = soup.select(".dailyForecastCol")
|
||||
cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols
|
||||
|
||||
daily_raw: List[Dict[str, Any]] = []
|
||||
for col in cols[:5]:
|
||||
dow = _pick_text(col.select_one(".dfDay"))
|
||||
daynum = _pick_text(col.select_one(".dfDayNum"))
|
||||
|
||||
dicon_url = _abs_url((col.select_one(".forecast") or {}).get("src")) if col.select_one(".forecast") else None
|
||||
|
||||
# various layouts: try a few
|
||||
max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)")
|
||||
min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)")
|
||||
tmax_c = _num(_pick_text(max_el) or "")
|
||||
tmin_c = _num(_pick_text(min_el) or "")
|
||||
|
||||
prec_mm = _num(_pick_text(col.select_one(".mm")) or "")
|
||||
|
||||
daily_raw.append(
|
||||
{
|
||||
"dow": dow, # e.g. "Sze"
|
||||
"daynum": daynum, # e.g. "14"
|
||||
"tmax_c": tmax_c,
|
||||
"tmin_c": tmin_c,
|
||||
"prec_mm": prec_mm,
|
||||
"icon_url": dicon_url,
|
||||
}
|
||||
)
|
||||
|
||||
# Compute weekly min/max for HA-like bars (left/width)
|
||||
mins = [d["tmin_c"] for d in daily_raw if d.get("tmin_c") is not None]
|
||||
maxs = [d["tmax_c"] for d in daily_raw if d.get("tmax_c") is not None]
|
||||
week_min = min(mins) if mins else None
|
||||
week_max = max(maxs) if maxs else None
|
||||
denom = (week_max - week_min) if (week_min is not None and week_max is not None and week_max != week_min) else None
|
||||
|
||||
daily: List[Dict[str, Any]] = []
|
||||
for d in daily_raw:
|
||||
left = None
|
||||
width = None
|
||||
if denom is not None and d.get("tmin_c") is not None and d.get("tmax_c") is not None:
|
||||
left = ((d["tmin_c"] - week_min) / denom) * 100.0
|
||||
width = ((d["tmax_c"] - d["tmin_c"]) / denom) * 100.0
|
||||
# clamp for safety
|
||||
left = max(0.0, min(100.0, left))
|
||||
width = max(1.0, min(100.0, width))
|
||||
d2 = dict(d)
|
||||
d2["bar_left_pct"] = left
|
||||
d2["bar_width_pct"] = width
|
||||
daily.append(d2)
|
||||
|
||||
return {
|
||||
"source": {
|
||||
"name": "Időkép",
|
||||
"url": f"{IDOKEP_BASE}/idojaras/{requests.utils.requote_uri(place)}",
|
||||
},
|
||||
"place": place,
|
||||
"current": {
|
||||
"temp_c": temp_c,
|
||||
"condition_hu": condition_hu,
|
||||
"icon_url": icon_url,
|
||||
},
|
||||
"hourly": hourly,
|
||||
"daily": daily,
|
||||
"weekly": {
|
||||
"tmin_c": week_min,
|
||||
"tmax_c": week_max,
|
||||
},
|
||||
"fetched_at_unix": int(time.time()),
|
||||
}
|
||||
|
||||
@app.get("/api/idokep")
|
||||
def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as in /idojaras/<place>")):
|
||||
now = time.time()
|
||||
|
||||
cached = _cache.get(place)
|
||||
if cached and cached[0] > now:
|
||||
return JSONResponse(cached[1])
|
||||
|
||||
with SCRAPE_SECONDS.labels(place=place).time():
|
||||
try:
|
||||
html = _fetch_place_html(place)
|
||||
payload = _parse_idokep(html, place)
|
||||
_cache[place] = (now + CACHE_TTL_SEC, payload)
|
||||
|
||||
# update metrics (best-effort)
|
||||
t = payload.get("current", {}).get("temp_c")
|
||||
if t is not None:
|
||||
CURRENT_TEMP_C.labels(place=place).set(float(t))
|
||||
|
||||
for d in payload.get("daily", []):
|
||||
dow = d.get("dow") or "?"
|
||||
if d.get("tmin_c") is not None:
|
||||
DAILY_TMIN_C.labels(place=place, dow=dow).set(float(d["tmin_c"]))
|
||||
if d.get("tmax_c") is not None:
|
||||
DAILY_TMAX_C.labels(place=place, dow=dow).set(float(d["tmax_c"]))
|
||||
if d.get("prec_mm") is not None:
|
||||
DAILY_PREC_MM.labels(place=place, dow=dow).set(float(d["prec_mm"]))
|
||||
|
||||
SCRAPES_TOTAL.labels(place=place, status="ok").inc()
|
||||
return JSONResponse(payload)
|
||||
|
||||
except Exception:
|
||||
SCRAPES_TOTAL.labels(place=place, status="error").inc()
|
||||
# return a structured error Glance can show
|
||||
return JSONResponse(
|
||||
{
|
||||
"place": place,
|
||||
"error": "Failed to scrape Időkép. Check the place string or Időkép page layout changes.",
|
||||
"fetched_at_unix": int(time.time()),
|
||||
},
|
||||
status_code=502,
|
||||
)
|
||||
|
||||
@app.get("/metrics")
|
||||
def metrics():
|
||||
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
|
||||
|
||||
name: glance-system
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: idokep-proxy
|
||||
name: idokep-scraper
|
||||
namespace: glance-system
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: idokep-proxy
|
||||
app: idokep-scraper
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: idokep-proxy
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8000"
|
||||
prometheus.io/path: "/metrics"
|
||||
app: idokep-scraper
|
||||
spec:
|
||||
containers:
|
||||
- name: idokep-proxy
|
||||
- name: idokep-scraper
|
||||
image: python:3.12-slim
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: IDOKEP_URL
|
||||
value: "https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker"
|
||||
- name: PLACE_NAME
|
||||
value: "Budapest VIII. ker"
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
env:
|
||||
- name: IDOKEP_PLACE
|
||||
value: "Budapest VIII. ker"
|
||||
- name: CACHE_TTL_SEC
|
||||
value: "900"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
memory: 256Mi
|
||||
command: ["/bin/sh", "-lc"]
|
||||
args:
|
||||
- |
|
||||
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 prometheus-client &&
|
||||
python -c "import uvicorn; uvicorn.run('app:APP', host='0.0.0.0', port=8000)"
|
||||
volumeMounts:
|
||||
- name: app
|
||||
mountPath: /app
|
||||
workingDir: /app
|
||||
command: ["/bin/sh","-lc"]
|
||||
args:
|
||||
- |
|
||||
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml prometheus_client &&
|
||||
uvicorn app:app --host 0.0.0.0 --port 8000
|
||||
volumes:
|
||||
- name: app
|
||||
configMap:
|
||||
name: idokep-proxy
|
||||
name: idokep-scraper-app
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: idokep-scraper-app
|
||||
namespace: glance-system
|
||||
data:
|
||||
app.py: |
|
||||
import os
|
||||
import time
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from fastapi import FastAPI, Response
|
||||
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
|
||||
|
||||
APP = FastAPI()
|
||||
|
||||
IDOKEP_URL = os.getenv(
|
||||
"IDOKEP_URL",
|
||||
"https://www.idokep.hu/idojaras/Budapest%20VIII.%20ker",
|
||||
)
|
||||
PLACE_NAME = os.getenv("PLACE_NAME", "Budapest VIII. ker")
|
||||
SOURCE_NAME = "Időkép"
|
||||
|
||||
UA = os.getenv(
|
||||
"USER_AGENT",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome Safari",
|
||||
)
|
||||
|
||||
# Prometheus metrics (optional)
|
||||
SCRAPES = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"])
|
||||
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
|
||||
CURRENT_TEMP = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
|
||||
DAILY_TMIN = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"])
|
||||
DAILY_TMAX = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
|
||||
HOURLY_TEMP = Gauge("idokep_hourly_temp_c", "Hourly temperature in Celsius", ["place", "time"])
|
||||
|
||||
|
||||
def _abs_url(maybe_relative: Optional[str]) -> Optional[str]:
|
||||
if not maybe_relative:
|
||||
return None
|
||||
if maybe_relative.startswith("http://") or maybe_relative.startswith("https://"):
|
||||
return maybe_relative
|
||||
# Időkép uses /assets/... paths
|
||||
return "https://www.idokep.hu" + maybe_relative
|
||||
|
||||
|
||||
def _to_int_temp(s: str) -> Optional[float]:
|
||||
if not s:
|
||||
return None
|
||||
s = s.strip().replace("˚C", "").replace("°C", "").replace("°", "")
|
||||
try:
|
||||
return float(s)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def scrape() -> Dict[str, Any]:
|
||||
headers = {"User-Agent": UA}
|
||||
r = requests.get(IDOKEP_URL, headers=headers, timeout=15)
|
||||
r.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
# Current
|
||||
cur_temp_el = soup.select_one(".current-temperature")
|
||||
cur_cond_el = soup.select_one(".current-weather")
|
||||
cur_icon_el = soup.select_one(".forecast-bigicon")
|
||||
|
||||
cur_temp = _to_int_temp(cur_temp_el.get_text(strip=True) if cur_temp_el else "")
|
||||
cur_cond = cur_cond_el.get_text(strip=True) if cur_cond_el else ""
|
||||
cur_icon = _abs_url(cur_icon_el.get("src") if cur_icon_el else None)
|
||||
|
||||
# Hourly cards (the block you highlighted in devtools: .ik.hourly-forecast-card)
|
||||
hourly: List[Dict[str, Any]] = []
|
||||
for card in soup.select(".ik.hourly-forecast-card")[:8]:
|
||||
t_el = card.select_one(".ik.hourly-forecast-hour")
|
||||
temp_el = card.select_one(".ik.temperature-circled")
|
||||
icon_el = card.select_one("img.ik.forecast-icon")
|
||||
|
||||
t = t_el.get_text(strip=True) if t_el else ""
|
||||
temp = _to_int_temp(temp_el.get_text(strip=True) if temp_el else "")
|
||||
icon = _abs_url(icon_el.get("src") if icon_el else None)
|
||||
|
||||
if t and temp is not None:
|
||||
hourly.append(
|
||||
{
|
||||
"time": t, # e.g. "18:00"
|
||||
"temp_c": temp, # e.g. -2
|
||||
"icon_url": icon, # absolute URL
|
||||
}
|
||||
)
|
||||
|
||||
# Daily columns (bottom forecast table: .ik.daily-forecast-container .ik.dailyForecastCol)
|
||||
daily: List[Dict[str, Any]] = []
|
||||
for col in soup.select(".ik.daily-forecast-container .ik.dailyForecastCol")[:7]:
|
||||
dow_el = col.select_one(".ik.dfDay")
|
||||
icon_el = col.select_one("img.ik.forecast-icon")
|
||||
tmax_el = col.select_one("div.ik.max")
|
||||
tmin_el = col.select_one("div.ik.min")
|
||||
|
||||
dow = dow_el.get_text(strip=True) if dow_el else ""
|
||||
icon = _abs_url(icon_el.get("src") if icon_el else None)
|
||||
tmax = _to_int_temp(tmax_el.get_text(strip=True) if tmax_el else "")
|
||||
tmin = _to_int_temp(tmin_el.get_text(strip=True) if tmin_el else "")
|
||||
|
||||
# Keep only rows that look valid
|
||||
if dow and (tmin is not None) and (tmax is not None):
|
||||
daily.append(
|
||||
{
|
||||
"dow": dow, # e.g. "Cs", "P", "Sz"
|
||||
"tmin_c": tmin,
|
||||
"tmax_c": tmax,
|
||||
"icon_url": icon,
|
||||
}
|
||||
)
|
||||
|
||||
# Limit to 5 days for your widget
|
||||
daily = daily[:5]
|
||||
|
||||
return {
|
||||
"source": {"name": SOURCE_NAME, "url": IDOKEP_URL},
|
||||
"location": {"name": PLACE_NAME},
|
||||
"current": {"temp_c": cur_temp, "condition": cur_cond, "icon_url": cur_icon},
|
||||
"hourly": hourly,
|
||||
"daily": daily,
|
||||
"fetched_at_unix": int(time.time()),
|
||||
}
|
||||
|
||||
|
||||
@APP.get("/api")
|
||||
def api():
|
||||
status = "ok"
|
||||
with SCRAPE_SECONDS.labels(place=PLACE_NAME).time():
|
||||
try:
|
||||
data = scrape()
|
||||
except Exception:
|
||||
status = "error"
|
||||
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
|
||||
raise
|
||||
|
||||
SCRAPES.labels(place=PLACE_NAME, status=status).inc()
|
||||
|
||||
# Update Prometheus gauges (best-effort)
|
||||
try:
|
||||
if data.get("current", {}).get("temp_c") is not None:
|
||||
CURRENT_TEMP.labels(place=PLACE_NAME).set(float(data["current"]["temp_c"]))
|
||||
for d in data.get("daily", []):
|
||||
DAILY_TMIN.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmin_c"]))
|
||||
DAILY_TMAX.labels(place=PLACE_NAME, dow=d["dow"]).set(float(d["tmax_c"]))
|
||||
for h in data.get("hourly", []):
|
||||
HOURLY_TEMP.labels(place=PLACE_NAME, time=h["time"]).set(float(h["temp_c"]))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# IMPORTANT: force JSON content-type so Glance exposes `.JSON`
|
||||
import json
|
||||
return Response(content=json.dumps(data, ensure_ascii=False), media_type="application/json; charset=utf-8")
|
||||
|
||||
|
||||
@APP.get("/metrics")
|
||||
def metrics():
|
||||
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: idokep-proxy
|
||||
name: idokep-scraper
|
||||
namespace: glance-system
|
||||
spec:
|
||||
selector:
|
||||
app: idokep-proxy
|
||||
app: idokep-scraper
|
||||
ports:
|
||||
- name: http
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
type: ClusterIP
|
||||
targetPort: 8000
|
||||
Reference in New Issue
Block a user