fix: group title on first ingredient + multi-site parser registry

- Fix ingredient groups creating empty entries in Mealie: set title
  field on the first ingredient after the group marker instead
- Refactor scraper with @_register decorator for URL-based site dispatch
- Update README with structured ingredients, groups, MEALIE_INTERNAL_URL

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-24 08:51:14 +01:00
parent c235d5caa7
commit a27b322409
3 changed files with 52 additions and 22 deletions
+11 -12
View File
@@ -145,27 +145,26 @@ class MealieClient:
def _build_payload(self, recipe: dict) -> dict:
ingredients = []
pending_group = ""
for item in recipe.get("ingredients", []):
if isinstance(item, dict):
# Group header marker
# Group header marker — apply title to the next real ingredient
if "group" in item and "food" not in item:
ingredients.append({
"referenceId": str(uuid.uuid4()),
"title": item["group"],
"note": "",
"isFood": False,
"disableAmount": True,
})
else:
ingredients.append(self._build_ingredient(item))
pending_group = item["group"]
continue
ing = self._build_ingredient(item)
else:
# Legacy: plain string
ingredients.append({
ing = {
"referenceId": str(uuid.uuid4()),
"note": str(item),
"isFood": False,
"disableAmount": True,
})
}
if pending_group:
ing["title"] = pending_group
pending_group = ""
ingredients.append(ing)
instructions = []
for text in recipe.get("instructions", []):
+27 -6
View File
@@ -1,6 +1,7 @@
"""Recipe scraper — parses Hungarian recipe sites into a structured dict.
Currently supported: mindmegette.hu
Each supported site has a parser registered via _PARSERS.
Unsupported sites fall back to generic schema.org / og-tag extraction.
"""
import re
@@ -12,6 +13,19 @@ _HEADERS = {
"Accept-Language": "hu-HU,hu;q=0.9,en;q=0.5",
}
# Maps a substring of the hostname to a parser function.
# Order matters: first match wins.
_PARSERS: list[tuple[str, "callable"]] = []
def _register(host_substring: str):
"""Decorator: register a parser for URLs whose hostname contains *host_substring*."""
def decorator(fn):
_PARSERS.append((host_substring, fn))
return fn
return decorator
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
@@ -39,11 +53,17 @@ def scrape(url: str) -> dict:
soup = BeautifulSoup(resp.text, "lxml")
host = _host(url)
if "mindmegette" in host:
return _parse_mindmegette(soup, url)
else:
# Fallback: try generic schema.org / og-tag extraction
return _parse_generic(soup, url)
for substring, parser in _PARSERS:
if substring in host:
return parser(soup, url)
# Fallback: try generic schema.org / og-tag extraction
return _parse_generic(soup, url)
def supported_sites() -> list[str]:
"""Return list of supported site hostname substrings."""
return [s for s, _ in _PARSERS]
# ---------------------------------------------------------------------------
@@ -51,6 +71,7 @@ def scrape(url: str) -> dict:
# ---------------------------------------------------------------------------
@_register("mindmegette")
def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict:
title = _og(soup, "og:title") or _text(soup.find("title"))
# Strip " | Mindmegette.hu" suffix