diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2504b21..bcfccba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
# Changelog
+## v0.8.1 (2026-02-24)
+
+### Fixed
+- Mindmegette.hu: support alternative page format (e.g. `/alapetelek/` pages) where ingredients are in `
` and instructions in `` inside the wysiwyg box, instead of structured `div.ingredients` containers
+- Mindmegette.hu: title suffix stripping now handles both `|` and `-` separators
+
## v0.8.0 (2026-02-24)
### Added
diff --git a/app/scraper.py b/app/scraper.py
index a177514..fafe1ba 100644
--- a/app/scraper.py
+++ b/app/scraper.py
@@ -91,27 +91,26 @@ def supported_sites() -> list[dict]:
@_register("mindmegette")
def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict:
title = _og(soup, "og:title") or _text(soup.find("title"))
- # Strip " | Mindmegette.hu" suffix
+ # Strip " | Mindmegette.hu" or " - Mindmegette.hu" suffix
if title:
- title = re.sub(r"\s*\|\s*Mindmegette\.hu$", "", title).strip()
+ title = re.sub(r"\s*[-–|]\s*Mindmegette\.hu$", "", title).strip()
description = _og(soup, "og:description") or ""
image_url = _og(soup, "og:image")
# --- Ingredients ---
- # Multiple div.ingredients containers may exist (one per group).
- # Group title: A habaráshoz:
+ # Format A (regular /recept/ pages): div.ingredients containers with structured rows
+ # Format B (alt /alapetelek/ pages): h3 "Hozzávalók" → - inside wysiwyg box
ingredients = []
+ wysiwyg = soup.find("mindmegette-wysiwyg-box")
+
for ing_container in soup.find_all("div", class_="ingredients"):
- # Check for a group title
group_el = ing_container.find("strong", class_="ingredients-group")
group_name = _text(group_el).rstrip(":").strip() if group_el else ""
if group_name:
ingredients.append({"group": group_name})
for row in ing_container.find_all("div", class_="ingredients-meta"):
- # Actual HTML: qty unit
- # name (extra)
qty_el = row.find("strong")
unit_el = None
for sp in row.find_all("span"):
@@ -127,26 +126,56 @@ def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict:
extra = _text(extra_el).strip("() ")
if not food:
- # Fallback: grab whole row text
food = row.get_text(separator=" ", strip=True)
if food:
ingredients.append({
- "quantity": qty,
- "unit": unit,
- "food": food,
- "extra": extra,
+ "quantity": qty, "unit": unit, "food": food, "extra": extra,
})
+ # Fallback: h3 "Hozzávalók" →
inside wysiwyg box (alt page format)
+ if not ingredients and wysiwyg:
+ hozz_h3 = None
+ for h3 in wysiwyg.find_all("h3"):
+ if "hozzávalók" in h3.get_text(strip=True).lower():
+ hozz_h3 = h3
+ break
+ if hozz_h3:
+ ul = hozz_h3.find_next_sibling("ul")
+ if ul:
+ for li in ul.find_all("li"):
+ line = li.get_text(strip=True)
+ if not line:
+ continue
+ qty, unit, food = _parse_ingredient_line(line)
+ ingredients.append({
+ "quantity": qty, "unit": unit, "food": food, "extra": "",
+ })
+
# --- Instructions ---
instructions = []
- wysiwyg = soup.find("mindmegette-wysiwyg-box")
if wysiwyg:
- for li in wysiwyg.find_all("li"):
- txt = _text(li)
- if txt:
- instructions.append(txt)
- # Fallback: look for block-content divs
+ # Look for h3 "Elkészítés" → (alt format)
+ elk_h3 = None
+ for h3 in wysiwyg.find_all("h3"):
+ if "elkészítés" in h3.get_text(strip=True).lower():
+ elk_h3 = h3
+ break
+ if elk_h3:
+ ol = elk_h3.find_next_sibling("ol")
+ if ol:
+ for li in ol.find_all("li"):
+ txt = _text(li)
+ if txt:
+ instructions.append(txt)
+ # Regular format: instructions in block-content (no h3 header)
+ if not instructions:
+ for ol in wysiwyg.find_all("ol"):
+ for li in ol.find_all("li"):
+ txt = _text(li)
+ if txt:
+ instructions.append(txt)
+ # Fallback: look for block-content divs outside wysiwyg
if not instructions:
for div in soup.find_all("div", class_="block-content"):
ol = div.find("ol")