diff --git a/app/scraper.py b/app/scraper.py index a61cc54..e705ce2 100644 --- a/app/scraper.py +++ b/app/scraper.py @@ -600,7 +600,8 @@ def _parse_kiskegyed(soup: BeautifulSoup, url: str) -> dict: ingredients.append({"group": group_text}) elif el.name == "ul" and "list" in (el.get("class") or []): for li in el.find_all("li"): - line = li.get_text(strip=True) + # Use separator to preserve spaces around tags + line = re.sub(r"\s+", " ", li.get_text(" ")).strip() if not line: continue qty, unit, food, extra = _parse_kiskegyed_ingredient(line) @@ -679,8 +680,9 @@ def _parse_kiskegyed_ingredient(line: str) -> tuple[str, str, str, str]: extras = [] # Try: qty unit (alt_measurement) food... + # Unit can be multi-word (e.g. "kis fej"), so use .+? (non-greedy) m = re.match( - r"^([0-9][0-9.,/½¼¾-]*)\s+(\S+)\s+\(([^)]+)\)\s+(.+)$", line + r"^([0-9][0-9.,/½¼¾-]*)\s+(.+?)\s+\(([^)]+)\)\s+(.+)$", line ) if m: qty = m.group(1).strip()