fix: ingredient parsing — match actual HTML elements and add spaces
The scraper looked for span.quantity/span.unit/span.name which don't exist. The real HTML uses <strong> for qty, plain <span> for unit, <a class="ingredients-link"> for name, and <small> for extras like "(darált)". Also add referenceId to Mealie ingredients (required field). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -69,6 +69,7 @@ class MealieClient:
|
|||||||
ingredients = []
|
ingredients = []
|
||||||
for line in recipe.get("ingredients", []):
|
for line in recipe.get("ingredients", []):
|
||||||
ingredients.append({
|
ingredients.append({
|
||||||
|
"referenceId": uuid.uuid4().hex,
|
||||||
"note": line,
|
"note": line,
|
||||||
"isFood": False,
|
"isFood": False,
|
||||||
"disableAmount": True,
|
"disableAmount": True,
|
||||||
|
|||||||
+21
-8
@@ -66,11 +66,19 @@ def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict:
|
|||||||
if ing_container:
|
if ing_container:
|
||||||
for row in ing_container.find_all("div", class_="ingredients-meta"):
|
for row in ing_container.find_all("div", class_="ingredients-meta"):
|
||||||
parts = []
|
parts = []
|
||||||
# Quantity spans: <span class="quantity">1</span> <span class="unit">kg</span>
|
# Actual HTML: <strong>qty</strong> <span>unit</span>
|
||||||
qty_el = row.find("span", class_="quantity")
|
# <a class="ingredients-link">name</a>
|
||||||
unit_el = row.find("span", class_="unit")
|
qty_el = row.find("strong")
|
||||||
name_el = row.find("span", class_="name")
|
# Unit: first plain <span> (not one with a specific class like
|
||||||
extra_el = row.find("span", class_="extra")
|
# "ingredients-checkbox" etc.)
|
||||||
|
unit_el = None
|
||||||
|
for sp in row.find_all("span"):
|
||||||
|
if not sp.get("class"):
|
||||||
|
unit_el = sp
|
||||||
|
break
|
||||||
|
name_el = row.find("a", class_="ingredients-link")
|
||||||
|
# Extra info: <small>(darált)</small> or <span class="extra">
|
||||||
|
extra_el = row.find("small") or row.find("span", class_="extra")
|
||||||
|
|
||||||
if qty_el:
|
if qty_el:
|
||||||
parts.append(_text(qty_el))
|
parts.append(_text(qty_el))
|
||||||
@@ -79,12 +87,17 @@ def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict:
|
|||||||
if name_el:
|
if name_el:
|
||||||
parts.append(_text(name_el))
|
parts.append(_text(name_el))
|
||||||
if extra_el:
|
if extra_el:
|
||||||
parts.append(_text(extra_el))
|
extra = _text(extra_el)
|
||||||
|
if extra:
|
||||||
|
# Wrap in parens if not already
|
||||||
|
if not extra.startswith("("):
|
||||||
|
extra = f"({extra})"
|
||||||
|
parts.append(extra)
|
||||||
|
|
||||||
line = " ".join(p for p in parts if p)
|
line = " ".join(p for p in parts if p)
|
||||||
if not line:
|
if not line:
|
||||||
# Fallback: just grab the whole text of the row
|
# Fallback: grab whole row text with spaces between elements
|
||||||
line = _text(row)
|
line = row.get_text(separator=" ", strip=True)
|
||||||
if line:
|
if line:
|
||||||
ingredients.append(line)
|
ingredients.append(line)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user