v0.6.0: Sobors.hu parser, HTTP auth, recipe validation, UI polish
- New sobors.hu parser with ingredient groups and section headers - Incomplete recipe warnings (missing ingredients/instructions) - Optional HTTP Basic Auth (configurable on settings page) - Brand text: "Recept" in white, "Importáló" in blue - Larger logo (36px), favicon using logo_notext.svg Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -418,6 +418,98 @@ def _parse_nosalty_ingredient(li, ingredients: list):
|
||||
})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# sobors.hu
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@_register("sobors")
|
||||
def _parse_sobors(soup: BeautifulSoup, url: str) -> dict:
|
||||
# Title: h3.recept_nev
|
||||
title = ""
|
||||
title_el = soup.find("h3", class_="recept_nev")
|
||||
if title_el:
|
||||
title = title_el.get_text(strip=True)
|
||||
if not title:
|
||||
title = _og(soup, "og:title") or _text(soup.find("title"))
|
||||
if title:
|
||||
title = re.sub(r"\s*[-–|]\s*SóBors.*$", "", title, flags=re.IGNORECASE).strip()
|
||||
|
||||
description = _og(soup, "og:description") or ""
|
||||
image_url = _og(soup, "og:image")
|
||||
|
||||
# --- Ingredients ---
|
||||
# Container: div.hozzavalok-container
|
||||
# Groups: section > h4 (group header), section > ul > li
|
||||
# Each li > span > span.mennyiseg, span.mertekegyseg, span.hozzavalo
|
||||
ingredients = []
|
||||
ing_container = soup.find("div", class_="hozzavalok-container")
|
||||
if ing_container:
|
||||
for section in ing_container.find_all("section"):
|
||||
h4 = section.find("h4")
|
||||
if h4:
|
||||
group_name = h4.get_text(strip=True).rstrip(":")
|
||||
if group_name:
|
||||
ingredients.append({"group": group_name})
|
||||
for li in section.find_all("li"):
|
||||
qty_el = li.find("span", class_="mennyiseg")
|
||||
unit_el = li.find("span", class_="mertekegyseg")
|
||||
food_el = li.find("span", class_="hozzavalo")
|
||||
food = _text(food_el)
|
||||
if not food:
|
||||
continue
|
||||
qty = _text(qty_el)
|
||||
unit = _text(unit_el)
|
||||
ingredients.append({
|
||||
"quantity": qty,
|
||||
"unit": unit,
|
||||
"food": food,
|
||||
"extra": "",
|
||||
})
|
||||
|
||||
# --- Instructions ---
|
||||
# Container: div.recept_leiras.recept_he-elkeszites
|
||||
# Content: <p> tags for steps, <h3><strong>Section</strong></h3> for section headers
|
||||
instructions = []
|
||||
inst_container = soup.find("div", class_="recept_leiras")
|
||||
if inst_container:
|
||||
for el in inst_container.find_all(["h3", "p"]):
|
||||
if el.name == "h3":
|
||||
header = el.get_text(strip=True)
|
||||
if header:
|
||||
instructions.append(f"--- {header} ---")
|
||||
elif el.name == "p":
|
||||
txt = el.get_text(strip=True)
|
||||
if txt:
|
||||
# Strip leading numbering like "1. " from reader recipes
|
||||
txt = re.sub(r"^\d+\.\s+", "", txt)
|
||||
instructions.append(txt)
|
||||
|
||||
# --- Tags ---
|
||||
# Container: div.cikk-cimkek > ul.cikk-cimkek-list > li > a
|
||||
# Skip the generic "Receptek" category tag and "Olvasói receptek" tag
|
||||
tags = []
|
||||
tag_container = soup.find("div", class_="cikk-cimkek")
|
||||
if tag_container:
|
||||
tag_list = tag_container.find("ul", class_="cikk-cimkek-list")
|
||||
if tag_list:
|
||||
skip = {"receptek", "olvasói receptek"}
|
||||
for a in tag_list.find_all("a"):
|
||||
tag_text = a.get_text(strip=True)
|
||||
if tag_text and tag_text.lower() not in skip:
|
||||
tags.append(tag_text)
|
||||
|
||||
return {
|
||||
"title": title or "Ismeretlen recept",
|
||||
"description": description,
|
||||
"image_url": image_url,
|
||||
"ingredients": ingredients,
|
||||
"instructions": instructions,
|
||||
"tags": tags,
|
||||
"original_url": url,
|
||||
}
|
||||
|
||||
|
||||
def _split_qty_unit(raw: str) -> tuple[str, str]:
|
||||
"""Split a merged quantity+unit string like '200g' into ('200', 'g')."""
|
||||
raw = raw.strip()
|
||||
|
||||
Reference in New Issue
Block a user