replaced
This commit is contained in:
+102
-35
@@ -12,22 +12,31 @@ data:
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from fastapi import FastAPI, Query
|
from fastapi import FastAPI, Query, Response
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
from prometheus_client import Gauge, Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
IDOKEP_BASE = "https://www.idokep.hu"
|
IDOKEP_BASE = "https://www.idokep.hu"
|
||||||
DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VII. ker")
|
DEFAULT_PLACE = os.getenv("IDOKEP_PLACE", "Budapest VIII. ker")
|
||||||
USER_AGENT = os.getenv(
|
USER_AGENT = os.getenv(
|
||||||
"IDOKEP_UA",
|
"IDOKEP_UA",
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
||||||
)
|
)
|
||||||
|
|
||||||
# simple in-memory cache: {place: (expires_epoch, payload)}
|
CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "900")) # 15 minutes
|
||||||
CACHE_TTL_SEC = int(os.getenv("CACHE_TTL_SEC", "600")) # 10 min
|
|
||||||
_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
|
_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
|
||||||
|
|
||||||
|
# --- Prometheus metrics (low-cardinality, place as label) ---
|
||||||
|
SCRAPES_TOTAL = Counter("idokep_scrapes_total", "Total Időkép scrapes", ["place", "status"])
|
||||||
|
SCRAPE_SECONDS = Histogram("idokep_scrape_seconds", "Időkép scrape duration in seconds", ["place"])
|
||||||
|
CURRENT_TEMP_C = Gauge("idokep_current_temp_c", "Current temperature in Celsius", ["place"])
|
||||||
|
DAILY_TMIN_C = Gauge("idokep_daily_tmin_c", "Daily minimum temperature in Celsius", ["place", "dow"])
|
||||||
|
DAILY_TMAX_C = Gauge("idokep_daily_tmax_c", "Daily maximum temperature in Celsius", ["place", "dow"])
|
||||||
|
DAILY_PREC_MM = Gauge("idokep_daily_precip_mm", "Daily precipitation in mm", ["place", "dow"])
|
||||||
|
|
||||||
def _num(s: str) -> Optional[float]:
|
def _num(s: str) -> Optional[float]:
|
||||||
if s is None:
|
if s is None:
|
||||||
return None
|
return None
|
||||||
@@ -49,11 +58,9 @@ data:
|
|||||||
return el.get_text(" ", strip=True)
|
return el.get_text(" ", strip=True)
|
||||||
|
|
||||||
def _fetch_place_html(place: str) -> str:
|
def _fetch_place_html(place: str) -> str:
|
||||||
# Időkép uses the place slug in path; requests will percent-encode automatically if we build it carefully.
|
|
||||||
# We'll do a conservative encoding by replacing spaces with %20, keep UTF-8.
|
|
||||||
place_path = requests.utils.requote_uri(place)
|
place_path = requests.utils.requote_uri(place)
|
||||||
url = f"{IDOKEP_BASE}/idojaras/{place_path}"
|
url = f"{IDOKEP_BASE}/idojaras/{place_path}"
|
||||||
r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=15)
|
r = requests.get(url, headers={"User-Agent": USER_AGENT}, timeout=20)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r.text
|
return r.text
|
||||||
|
|
||||||
@@ -70,21 +77,17 @@ data:
|
|||||||
cond_hu_el = soup.select_one(".weather-short-desc")
|
cond_hu_el = soup.select_one(".weather-short-desc")
|
||||||
condition_hu = _pick_text(cond_hu_el)
|
condition_hu = _pick_text(cond_hu_el)
|
||||||
|
|
||||||
# HOURLY (take first 6)
|
# HOURLY (first 6)
|
||||||
hourly_cards = soup.select(".new-hourly-forecast-card")
|
hourly_cards = soup.select(".new-hourly-forecast-card")
|
||||||
hourly: List[Dict[str, Any]] = []
|
hourly: List[Dict[str, Any]] = []
|
||||||
for card in hourly_cards[:6]:
|
for card in hourly_cards[:6]:
|
||||||
hour_el = card.select_one(".new-hourly-forecast-hour")
|
hour_txt = _pick_text(card.select_one(".new-hourly-forecast-hour"))
|
||||||
hour_txt = _pick_text(hour_el)
|
|
||||||
|
|
||||||
htemp_el = card.select_one(".tempValue .hover-over")
|
htemp_c = _num(_pick_text(card.select_one(".tempValue .hover-over")) or "")
|
||||||
htemp_c = _num(_pick_text(htemp_el) or "")
|
|
||||||
|
|
||||||
hicon_el = card.select_one(".forecast-icon")
|
hicon_url = _abs_url((card.select_one(".forecast-icon") or {}).get("src")) if card.select_one(".forecast-icon") else None
|
||||||
hicon_url = _abs_url(hicon_el.get("src") if hicon_el else None)
|
|
||||||
|
|
||||||
hprec_el = card.select_one(".hourly-rain-chance a")
|
hprec_pct = _num(_pick_text(card.select_one(".hourly-rain-chance a")) or "")
|
||||||
hprec_pct = _num(_pick_text(hprec_el) or "")
|
|
||||||
|
|
||||||
hourly.append(
|
hourly.append(
|
||||||
{
|
{
|
||||||
@@ -95,33 +98,26 @@ data:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# DAILY (take next 5 columns; Időkép layout usually has an extra leading column, HA used nth-child(2) as day1)
|
# DAILY (next 5, skip first like your HA template did)
|
||||||
daily_cols = soup.select(".dailyForecastCol")
|
daily_cols = soup.select(".dailyForecastCol")
|
||||||
daily: List[Dict[str, Any]] = []
|
|
||||||
|
|
||||||
# Skip first column if it looks like a header-ish column; keep behavior close to your HA selectors.
|
|
||||||
cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols
|
cols = daily_cols[1:] if len(daily_cols) >= 2 else daily_cols
|
||||||
|
|
||||||
|
daily_raw: List[Dict[str, Any]] = []
|
||||||
for col in cols[:5]:
|
for col in cols[:5]:
|
||||||
dow_el = col.select_one(".dfDay")
|
dow = _pick_text(col.select_one(".dfDay"))
|
||||||
dow = _pick_text(dow_el)
|
daynum = _pick_text(col.select_one(".dfDayNum"))
|
||||||
|
|
||||||
daynum_el = col.select_one(".dfDayNum")
|
dicon_url = _abs_url((col.select_one(".forecast") or {}).get("src")) if col.select_one(".forecast") else None
|
||||||
daynum = _pick_text(daynum_el)
|
|
||||||
|
|
||||||
dicon_el = col.select_one(".forecast")
|
# various layouts: try a few
|
||||||
dicon_url = _abs_url(dicon_el.get("src") if dicon_el else None)
|
|
||||||
|
|
||||||
# max/min sometimes show in different wrappers; try a few options
|
|
||||||
max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)")
|
max_el = col.select_one(".max a") or col.select_one(".min-max-close a:nth-child(1)")
|
||||||
min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)")
|
min_el = col.select_one(".min a") or col.select_one(".min-max-close a:nth-child(2)")
|
||||||
tmax_c = _num(_pick_text(max_el) or "")
|
tmax_c = _num(_pick_text(max_el) or "")
|
||||||
tmin_c = _num(_pick_text(min_el) or "")
|
tmin_c = _num(_pick_text(min_el) or "")
|
||||||
|
|
||||||
mm_el = col.select_one(".mm")
|
prec_mm = _num(_pick_text(col.select_one(".mm")) or "")
|
||||||
prec_mm = _num(_pick_text(mm_el) or "")
|
|
||||||
|
|
||||||
daily.append(
|
daily_raw.append(
|
||||||
{
|
{
|
||||||
"dow": dow, # e.g. "Sze"
|
"dow": dow, # e.g. "Sze"
|
||||||
"daynum": daynum, # e.g. "14"
|
"daynum": daynum, # e.g. "14"
|
||||||
@@ -132,6 +128,28 @@ data:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Compute weekly min/max for HA-like bars (left/width)
|
||||||
|
mins = [d["tmin_c"] for d in daily_raw if d.get("tmin_c") is not None]
|
||||||
|
maxs = [d["tmax_c"] for d in daily_raw if d.get("tmax_c") is not None]
|
||||||
|
week_min = min(mins) if mins else None
|
||||||
|
week_max = max(maxs) if maxs else None
|
||||||
|
denom = (week_max - week_min) if (week_min is not None and week_max is not None and week_max != week_min) else None
|
||||||
|
|
||||||
|
daily: List[Dict[str, Any]] = []
|
||||||
|
for d in daily_raw:
|
||||||
|
left = None
|
||||||
|
width = None
|
||||||
|
if denom is not None and d.get("tmin_c") is not None and d.get("tmax_c") is not None:
|
||||||
|
left = ((d["tmin_c"] - week_min) / denom) * 100.0
|
||||||
|
width = ((d["tmax_c"] - d["tmin_c"]) / denom) * 100.0
|
||||||
|
# clamp for safety
|
||||||
|
left = max(0.0, min(100.0, left))
|
||||||
|
width = max(1.0, min(100.0, width))
|
||||||
|
d2 = dict(d)
|
||||||
|
d2["bar_left_pct"] = left
|
||||||
|
d2["bar_width_pct"] = width
|
||||||
|
daily.append(d2)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"source": {
|
"source": {
|
||||||
"name": "Időkép",
|
"name": "Időkép",
|
||||||
@@ -145,21 +163,60 @@ data:
|
|||||||
},
|
},
|
||||||
"hourly": hourly,
|
"hourly": hourly,
|
||||||
"daily": daily,
|
"daily": daily,
|
||||||
|
"weekly": {
|
||||||
|
"tmin_c": week_min,
|
||||||
|
"tmax_c": week_max,
|
||||||
|
},
|
||||||
"fetched_at_unix": int(time.time()),
|
"fetched_at_unix": int(time.time()),
|
||||||
}
|
}
|
||||||
|
|
||||||
@app.get("/api/idokep")
|
@app.get("/api/idokep")
|
||||||
def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as used in the /idojaras/<place> URL")):
|
def api_idokep(place: str = Query(default=DEFAULT_PLACE, description="Időkép place name as in /idojaras/<place>")):
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
cached = _cache.get(place)
|
cached = _cache.get(place)
|
||||||
if cached and cached[0] > now:
|
if cached and cached[0] > now:
|
||||||
return JSONResponse(cached[1])
|
return JSONResponse(cached[1])
|
||||||
|
|
||||||
|
with SCRAPE_SECONDS.labels(place=place).time():
|
||||||
|
try:
|
||||||
html = _fetch_place_html(place)
|
html = _fetch_place_html(place)
|
||||||
payload = _parse_idokep(html, place)
|
payload = _parse_idokep(html, place)
|
||||||
_cache[place] = (now + CACHE_TTL_SEC, payload)
|
_cache[place] = (now + CACHE_TTL_SEC, payload)
|
||||||
|
|
||||||
|
# update metrics (best-effort)
|
||||||
|
t = payload.get("current", {}).get("temp_c")
|
||||||
|
if t is not None:
|
||||||
|
CURRENT_TEMP_C.labels(place=place).set(float(t))
|
||||||
|
|
||||||
|
for d in payload.get("daily", []):
|
||||||
|
dow = d.get("dow") or "?"
|
||||||
|
if d.get("tmin_c") is not None:
|
||||||
|
DAILY_TMIN_C.labels(place=place, dow=dow).set(float(d["tmin_c"]))
|
||||||
|
if d.get("tmax_c") is not None:
|
||||||
|
DAILY_TMAX_C.labels(place=place, dow=dow).set(float(d["tmax_c"]))
|
||||||
|
if d.get("prec_mm") is not None:
|
||||||
|
DAILY_PREC_MM.labels(place=place, dow=dow).set(float(d["prec_mm"]))
|
||||||
|
|
||||||
|
SCRAPES_TOTAL.labels(place=place, status="ok").inc()
|
||||||
return JSONResponse(payload)
|
return JSONResponse(payload)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
SCRAPES_TOTAL.labels(place=place, status="error").inc()
|
||||||
|
# return a structured error Glance can show
|
||||||
|
return JSONResponse(
|
||||||
|
{
|
||||||
|
"place": place,
|
||||||
|
"error": "Failed to scrape Időkép. Check the place string or Időkép page layout changes.",
|
||||||
|
"fetched_at_unix": int(time.time()),
|
||||||
|
},
|
||||||
|
status_code=502,
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get("/metrics")
|
||||||
|
def metrics():
|
||||||
|
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
@@ -175,6 +232,10 @@ spec:
|
|||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: idokep-proxy
|
app: idokep-proxy
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8000"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: idokep-proxy
|
- name: idokep-proxy
|
||||||
@@ -183,9 +244,15 @@ spec:
|
|||||||
- containerPort: 8000
|
- containerPort: 8000
|
||||||
env:
|
env:
|
||||||
- name: IDOKEP_PLACE
|
- name: IDOKEP_PLACE
|
||||||
value: "Budapest VII. ker"
|
value: "Budapest VIII. ker"
|
||||||
- name: CACHE_TTL_SEC
|
- name: CACHE_TTL_SEC
|
||||||
value: "900" # 15 minutes, matches your HA scan_interval philosophy
|
value: "900"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
memory: 256Mi
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: app
|
- name: app
|
||||||
mountPath: /app
|
mountPath: /app
|
||||||
@@ -193,7 +260,7 @@ spec:
|
|||||||
command: ["/bin/sh","-lc"]
|
command: ["/bin/sh","-lc"]
|
||||||
args:
|
args:
|
||||||
- |
|
- |
|
||||||
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml &&
|
pip install --no-cache-dir fastapi uvicorn requests beautifulsoup4 lxml prometheus_client &&
|
||||||
uvicorn app:app --host 0.0.0.0 --port 8000
|
uvicorn app:app --host 0.0.0.0 --port 8000
|
||||||
volumes:
|
volumes:
|
||||||
- name: app
|
- name: app
|
||||||
|
|||||||
Reference in New Issue
Block a user