diff --git a/app/scraper.py b/app/scraper.py index 5189acd..a2a8547 100644 --- a/app/scraper.py +++ b/app/scraper.py @@ -298,6 +298,123 @@ def _parse_streetkitchen(soup: BeautifulSoup, url: str) -> dict: } +# --------------------------------------------------------------------------- +# nosalty.hu +# --------------------------------------------------------------------------- + + +@_register("nosalty") +def _parse_nosalty(soup: BeautifulSoup, url: str) -> dict: + title = _og(soup, "og:title") or _text(soup.find("title")) + if title: + title = re.sub(r"\s*\|.*$", "", title).strip() + + # Story as description (no dedicated description on nosalty) + description = "" + story = soup.find("div", id="recipe-story") + if story: + paragraphs = [p.get_text(strip=True) for p in story.find_all("p") + if p.get_text(strip=True)] + description = " ".join(paragraphs) + + image_url = _og(soup, "og:image") + + # --- Ingredients --- + # Scoped to div#ingredients to avoid per-serving / nutrition duplicates. + # Structure: h3.m-list__title = group header, ul.m-list__list = ingredient rows. + ingredients = [] + ing_container = soup.find("div", id="ingredients") + if ing_container: + for el in ing_container.find_all(["h3", "ul"]): + cls = el.get("class") or [] + if el.name == "h3" and "m-list__title" in cls: + group_name = el.get_text(strip=True) + if group_name: + ingredients.append({"group": group_name}) + elif el.name == "ul" and "m-list__list" in cls: + for li in el.find_all("li", class_="m-list__item"): + _parse_nosalty_ingredient(li, ingredients) + + # --- Instructions --- + # Container: div#select inside div.p-recipe__directions. + # h4.m-list__title = section header, ol.m-list__list = steps. + instructions = [] + dir_container = soup.find("div", id="select") + if dir_container: + for el in dir_container.find_all(["h4", "ol"]): + cls = el.get("class") or [] + if el.name == "h4" and "m-list__title" in cls: + section_name = el.get_text(strip=True) + if section_name: + instructions.append(f"--- {section_name} ---") + elif el.name == "ol" and "m-list__list" in cls: + for li in el.find_all("li", class_="m-list__item"): + txt = li.get_text(strip=True) + if txt: + instructions.append(txt) + + # --- Tags --- + tags = [] + for a in soup.find_all("a", class_="m-tags__tagItem"): + tag_text = a.get_text(strip=True) + if tag_text: + tags.append(tag_text) + + return { + "title": title or "Ismeretlen recept", + "description": description, + "image_url": image_url, + "ingredients": ingredients, + "instructions": instructions, + "tags": tags, + "original_url": url, + } + + +def _parse_nosalty_ingredient(li, ingredients: list): + """Parse a single nosalty ingredient