Add nosalty.hu parser

Extracts ingredients (with groups), instructions (with section headers), tags, and story-as-description from nosalty.hu recipe pages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 16:40:16 +01:00
parent d948abf4f7
commit 73a2319f5a
1 changed files with 117 additions and 0 deletions
@@ -298,6 +298,123 @@ def _parse_streetkitchen(soup: BeautifulSoup, url: str) -> dict:
    }
 # ---------------------------------------------------------------------------
 # nosalty.hu
 # ---------------------------------------------------------------------------
@_register("nosalty")
 def _parse_nosalty(soup: BeautifulSoup, url: str) -> dict:
    title = _og(soup, "og:title") or _text(soup.find("title"))
    if title:
        title = re.sub(r"\s*\|.*$", "", title).strip()
    # Story as description (no dedicated description on nosalty)
    description = ""
    story = soup.find("div", id="recipe-story")
    if story:
        paragraphs = [p.get_text(strip=True) for p in story.find_all("p")
                      if p.get_text(strip=True)]
        description = " ".join(paragraphs)
    image_url = _og(soup, "og:image")
    # --- Ingredients ---
    # Scoped to div#ingredients to avoid per-serving / nutrition duplicates.
    # Structure: h3.m-list__title = group header, ul.m-list__list = ingredient rows.
    ingredients = []
    ing_container = soup.find("div", id="ingredients")
    if ing_container:
        for el in ing_container.find_all(["h3", "ul"]):
            cls = el.get("class") or []
            if el.name == "h3" and "m-list__title" in cls:
                group_name = el.get_text(strip=True)
                if group_name:
                    ingredients.append({"group": group_name})
            elif el.name == "ul" and "m-list__list" in cls:
                for li in el.find_all("li", class_="m-list__item"):
                    _parse_nosalty_ingredient(li, ingredients)
    # --- Instructions ---
    # Container: div#select inside div.p-recipe__directions.
    # h4.m-list__title = section header, ol.m-list__list = steps.
    instructions = []
    dir_container = soup.find("div", id="select")
    if dir_container:
        for el in dir_container.find_all(["h4", "ol"]):
            cls = el.get("class") or []
            if el.name == "h4" and "m-list__title" in cls:
                section_name = el.get_text(strip=True)
                if section_name:
                    instructions.append(f"--- {section_name} ---")
            elif el.name == "ol" and "m-list__list" in cls:
                for li in el.find_all("li", class_="m-list__item"):
                    txt = li.get_text(strip=True)
                    if txt:
                        instructions.append(txt)
    # --- Tags ---
    tags = []
    for a in soup.find_all("a", class_="m-tags__tagItem"):
        tag_text = a.get_text(strip=True)
        if tag_text:
            tags.append(tag_text)
    return {
        "title": title or "Ismeretlen recept",
        "description": description,
        "image_url": image_url,
        "ingredients": ingredients,
        "instructions": instructions,
        "tags": tags,
        "original_url": url,
    }
 def _parse_nosalty_ingredient(li, ingredients: list):
    """Parse a single nosalty ingredient <li> into the ingredients list."""
    inner = li.find("div")
    if not inner:
        return
    food_el = inner.find("a", class_="a-link")
    if not food_el:
        return
    food = food_el.get_text(strip=True)
    if not food:
        return
    # Walk children of inner div in order.
    # Spans before the <a> link = quantity, spans after = extra/note.
    qty_raw = ""
    extra_parts = []
    before_link = True
    for child in inner.children:
        if child is food_el:
            before_link = False
            continue
        if not hasattr(child, "get_text"):
            continue
        text = child.get_text(strip=True)
        if not text:
            continue
        if before_link:
            qty_raw = text
        else:
            extra_parts.append(text.strip("() "))
    extra = "; ".join(p for p in extra_parts if p)
    qty, unit = _split_qty_unit(qty_raw)
    ingredients.append({
        "quantity": qty,
        "unit": unit,
        "food": food,
        "extra": extra,
    })
 def _split_qty_unit(raw: str) -> tuple[str, str]:
    """Split a merged quantity+unit string like '200g' into ('200', 'g')."""
    raw = raw.strip()