Extract ingredient comments from food field, add import-to-both button

- Global post-processing in scrape() extracts trailing (comment) from ingredient food names into the extra/comment field. Works for all parsers. - Added "Importálás mindkettőbe" button on single import page when both Mealie and Tandoor are configured. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 18:27:58 +01:00
parent 4467265168
commit 2408984421
2 changed files with 80 additions and 4 deletions
@@ -55,12 +55,19 @@ def scrape(url: str) -> dict:
    soup = BeautifulSoup(resp.text, "lxml")

    host = _host(url)
+    result = None
    for substring, parser in _PARSERS:
        if substring in host:
-            return parser(soup, url)
+            result = parser(soup, url)
+            break

-    # Fallback: try generic schema.org / og-tag extraction
-    return _parse_generic(soup, url)
+    if result is None:
+        # Fallback: try generic schema.org / og-tag extraction
+        result = _parse_generic(soup, url)
+
+    # Post-process: extract parenthesized comments from food into extra
+    _extract_ingredient_comments(result)
+    return result


 def supported_sites() -> list[str]:
@@ -642,6 +649,20 @@ def _parse_generic(soup: BeautifulSoup, url: str) -> dict:
 # ---------------------------------------------------------------------------


+def _extract_ingredient_comments(data: dict):
+    """Move trailing (comment) from food field to extra field for all ingredients."""
+    for ing in data.get("ingredients", []):
+        if "group" in ing:
+            continue
+        food = ing.get("food", "")
+        extra = ing.get("extra", "")
+        if food and not extra:
+            m = re.match(r"^(.+?)\s*\(([^)]+)\)\s*$", food)
+            if m:
+                ing["food"] = m.group(1).strip()
+                ing["extra"] = m.group(2).strip()
+
+
 def _host(url: str) -> str:
    from urllib.parse import urlparse
    return urlparse(url).hostname or ""