fix: ingredient parsing — match actual HTML elements and add spaces
The scraper looked for span.quantity/span.unit/span.name which don't exist. The real HTML uses <strong> for qty, plain <span> for unit, <a class="ingredients-link"> for name, and <small> for extras like "(darált)". Also add referenceId to Mealie ingredients (required field). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -69,6 +69,7 @@ class MealieClient:
|
||||
ingredients = []
|
||||
for line in recipe.get("ingredients", []):
|
||||
ingredients.append({
|
||||
"referenceId": uuid.uuid4().hex,
|
||||
"note": line,
|
||||
"isFood": False,
|
||||
"disableAmount": True,
|
||||
|
||||
+21
-8
@@ -66,11 +66,19 @@ def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict:
|
||||
if ing_container:
|
||||
for row in ing_container.find_all("div", class_="ingredients-meta"):
|
||||
parts = []
|
||||
# Quantity spans: <span class="quantity">1</span> <span class="unit">kg</span>
|
||||
qty_el = row.find("span", class_="quantity")
|
||||
unit_el = row.find("span", class_="unit")
|
||||
name_el = row.find("span", class_="name")
|
||||
extra_el = row.find("span", class_="extra")
|
||||
# Actual HTML: <strong>qty</strong> <span>unit</span>
|
||||
# <a class="ingredients-link">name</a>
|
||||
qty_el = row.find("strong")
|
||||
# Unit: first plain <span> (not one with a specific class like
|
||||
# "ingredients-checkbox" etc.)
|
||||
unit_el = None
|
||||
for sp in row.find_all("span"):
|
||||
if not sp.get("class"):
|
||||
unit_el = sp
|
||||
break
|
||||
name_el = row.find("a", class_="ingredients-link")
|
||||
# Extra info: <small>(darált)</small> or <span class="extra">
|
||||
extra_el = row.find("small") or row.find("span", class_="extra")
|
||||
|
||||
if qty_el:
|
||||
parts.append(_text(qty_el))
|
||||
@@ -79,12 +87,17 @@ def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict:
|
||||
if name_el:
|
||||
parts.append(_text(name_el))
|
||||
if extra_el:
|
||||
parts.append(_text(extra_el))
|
||||
extra = _text(extra_el)
|
||||
if extra:
|
||||
# Wrap in parens if not already
|
||||
if not extra.startswith("("):
|
||||
extra = f"({extra})"
|
||||
parts.append(extra)
|
||||
|
||||
line = " ".join(p for p in parts if p)
|
||||
if not line:
|
||||
# Fallback: just grab the whole text of the row
|
||||
line = _text(row)
|
||||
# Fallback: grab whole row text with spaces between elements
|
||||
line = row.get_text(separator=" ", strip=True)
|
||||
if line:
|
||||
ingredients.append(line)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user