Add nosalty.hu parser

Extracts ingredients (with groups), instructions (with section
headers), tags, and story-as-description from nosalty.hu recipe pages.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-24 16:40:16 +01:00
parent d948abf4f7
commit 73a2319f5a
+117
View File
@@ -298,6 +298,123 @@ def _parse_streetkitchen(soup: BeautifulSoup, url: str) -> dict:
} }
# ---------------------------------------------------------------------------
# nosalty.hu
# ---------------------------------------------------------------------------
@_register("nosalty")
def _parse_nosalty(soup: BeautifulSoup, url: str) -> dict:
title = _og(soup, "og:title") or _text(soup.find("title"))
if title:
title = re.sub(r"\s*\|.*$", "", title).strip()
# Story as description (no dedicated description on nosalty)
description = ""
story = soup.find("div", id="recipe-story")
if story:
paragraphs = [p.get_text(strip=True) for p in story.find_all("p")
if p.get_text(strip=True)]
description = " ".join(paragraphs)
image_url = _og(soup, "og:image")
# --- Ingredients ---
# Scoped to div#ingredients to avoid per-serving / nutrition duplicates.
# Structure: h3.m-list__title = group header, ul.m-list__list = ingredient rows.
ingredients = []
ing_container = soup.find("div", id="ingredients")
if ing_container:
for el in ing_container.find_all(["h3", "ul"]):
cls = el.get("class") or []
if el.name == "h3" and "m-list__title" in cls:
group_name = el.get_text(strip=True)
if group_name:
ingredients.append({"group": group_name})
elif el.name == "ul" and "m-list__list" in cls:
for li in el.find_all("li", class_="m-list__item"):
_parse_nosalty_ingredient(li, ingredients)
# --- Instructions ---
# Container: div#select inside div.p-recipe__directions.
# h4.m-list__title = section header, ol.m-list__list = steps.
instructions = []
dir_container = soup.find("div", id="select")
if dir_container:
for el in dir_container.find_all(["h4", "ol"]):
cls = el.get("class") or []
if el.name == "h4" and "m-list__title" in cls:
section_name = el.get_text(strip=True)
if section_name:
instructions.append(f"--- {section_name} ---")
elif el.name == "ol" and "m-list__list" in cls:
for li in el.find_all("li", class_="m-list__item"):
txt = li.get_text(strip=True)
if txt:
instructions.append(txt)
# --- Tags ---
tags = []
for a in soup.find_all("a", class_="m-tags__tagItem"):
tag_text = a.get_text(strip=True)
if tag_text:
tags.append(tag_text)
return {
"title": title or "Ismeretlen recept",
"description": description,
"image_url": image_url,
"ingredients": ingredients,
"instructions": instructions,
"tags": tags,
"original_url": url,
}
def _parse_nosalty_ingredient(li, ingredients: list):
"""Parse a single nosalty ingredient <li> into the ingredients list."""
inner = li.find("div")
if not inner:
return
food_el = inner.find("a", class_="a-link")
if not food_el:
return
food = food_el.get_text(strip=True)
if not food:
return
# Walk children of inner div in order.
# Spans before the <a> link = quantity, spans after = extra/note.
qty_raw = ""
extra_parts = []
before_link = True
for child in inner.children:
if child is food_el:
before_link = False
continue
if not hasattr(child, "get_text"):
continue
text = child.get_text(strip=True)
if not text:
continue
if before_link:
qty_raw = text
else:
extra_parts.append(text.strip("() "))
extra = "; ".join(p for p in extra_parts if p)
qty, unit = _split_qty_unit(qty_raw)
ingredients.append({
"quantity": qty,
"unit": unit,
"food": food,
"extra": extra,
})
def _split_qty_unit(raw: str) -> tuple[str, str]: def _split_qty_unit(raw: str) -> tuple[str, str]:
"""Split a merged quantity+unit string like '200g' into ('200', 'g').""" """Split a merged quantity+unit string like '200g' into ('200', 'g')."""
raw = raw.strip() raw = raw.strip()