fix: ingredient parsing — match actual HTML elements and add spaces

The scraper looked for span.quantity/span.unit/span.name which don't
exist. The real HTML uses <strong> for qty, plain <span> for unit,
<a class="ingredients-link"> for name, and <small> for extras like
"(darált)". Also add referenceId to Mealie ingredients (required field).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-24 08:11:28 +01:00
parent 9a59b38fd6
commit 0192de5177
2 changed files with 22 additions and 8 deletions
+1
View File
@@ -69,6 +69,7 @@ class MealieClient:
ingredients = []
for line in recipe.get("ingredients", []):
ingredients.append({
"referenceId": uuid.uuid4().hex,
"note": line,
"isFood": False,
"disableAmount": True,
+21 -8
View File
@@ -66,11 +66,19 @@ def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict:
if ing_container:
for row in ing_container.find_all("div", class_="ingredients-meta"):
parts = []
# Quantity spans: <span class="quantity">1</span> <span class="unit">kg</span>
qty_el = row.find("span", class_="quantity")
unit_el = row.find("span", class_="unit")
name_el = row.find("span", class_="name")
extra_el = row.find("span", class_="extra")
# Actual HTML: <strong>qty</strong> <span>unit</span>
# <a class="ingredients-link">name</a>
qty_el = row.find("strong")
# Unit: first plain <span> (not one with a specific class like
# "ingredients-checkbox" etc.)
unit_el = None
for sp in row.find_all("span"):
if not sp.get("class"):
unit_el = sp
break
name_el = row.find("a", class_="ingredients-link")
# Extra info: <small>(darált)</small> or <span class="extra">
extra_el = row.find("small") or row.find("span", class_="extra")
if qty_el:
parts.append(_text(qty_el))
@@ -79,12 +87,17 @@ def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict:
if name_el:
parts.append(_text(name_el))
if extra_el:
parts.append(_text(extra_el))
extra = _text(extra_el)
if extra:
# Wrap in parens if not already
if not extra.startswith("("):
extra = f"({extra})"
parts.append(extra)
line = " ".join(p for p in parts if p)
if not line:
# Fallback: just grab the whole text of the row
line = _text(row)
# Fallback: grab whole row text with spaces between elements
line = row.get_text(separator=" ", strip=True)
if line:
ingredients.append(line)