diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..f605991 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +.git +.gitignore +mindmegette-examples +*.md +__pycache__ +*.pyc +.env diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..991dfe1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +*.pyc +*.pyo +.env +*.egg-info/ +dist/ +build/ +.venv/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d772e3a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,15 @@ +# Changelog + +## v0.1.0 (2026-02-23) + +### Added +- Initial release +- Web UI with dark theme for recipe importing +- Mindmegette.hu scraper (ingredients, instructions, image, description) +- Generic fallback scraper using schema.org JSON-LD and OpenGraph tags +- Mealie API integration (create recipe, upload image) +- Editable preview: modify title, description, ingredients, and instructions before importing +- Settings page with Mealie connection configuration and test button +- Persistent configuration via JSON file in `/data` volume +- Docker image based on `python:3.12-slim` with Gunicorn +- Health check endpoint at `/health` diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4f7c6da --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.12-slim + +ARG VERSION=dev +ENV VERSION=${VERSION} +ENV PYTHONUNBUFFERED=1 +ENV DATA_DIR=/data + +WORKDIR /app + +# Install dependencies first (layer caching) +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application +COPY app/ app/ + +EXPOSE 8000 + +CMD ["gunicorn", "-b", "0.0.0.0:8000", "-w", "2", "--access-logfile", "-", "app.main:app"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..4de56fb --- /dev/null +++ b/README.md @@ -0,0 +1,132 @@ +# Recipe Importer + +Docker container for importing recipes from Hungarian websites into [Mealie](https://mealie.io/) (Tandoor support planned). + +**Problem**: Mealie's built-in URL import cannot parse ingredients and instructions from Hungarian recipe sites like mindmegette.hu — it imports the title and image but shows "Could not detect ingredients / instructions". + +**Solution**: This container provides a web UI that scrapes Hungarian recipe pages with site-specific parsers, lets you review and edit the extracted data, then pushes it to Mealie via its REST API. + +## Architecture + +``` +┌─────────────────────────────────────────────────┐ +│ recipe-importer container (:8000) │ +│ │ +│ Flask + Gunicorn │ +│ ├── /settings → Configure Mealie connection │ +│ ├── /import → Paste URL, scrape, review │ +│ ├── /scrape → AJAX: parse recipe HTML │ +│ ├── /send → AJAX: push to Mealie API │ +│ └── /health → Health check │ +│ │ +│ Modules: │ +│ ├── app/config.py → JSON config persistence │ +│ ├── app/scraper.py → Site-specific parsers │ +│ └── app/mealie.py → Mealie REST API client │ +└───────────────────┬─────────────────────────────┘ + │ HTTP + ▼ + ┌──────────────────┐ + │ Mealie instance │ + │ POST /api/recipes│ + │ PATCH /api/... │ + │ PUT /api/.../img │ + └──────────────────┘ +``` + +## Supported Sites + +| Site | Ingredients | Instructions | Image | +|------|:-----------:|:------------:|:-----:| +| mindmegette.hu | Yes | Yes | Yes | +| *Other sites* | Fallback (schema.org JSON-LD) | Fallback (schema.org JSON-LD) | Yes (og:image) | + +### Mindmegette.hu Parser + +Extracts data from the Angular-rendered HTML: + +- **Title**: `og:title` meta tag, with ` | Mindmegette.hu` suffix stripped +- **Description**: `og:description` meta tag +- **Image**: `og:image` meta tag +- **Ingredients**: `div.ingredients` → `div.ingredients-meta` rows, each containing `span.quantity`, `span.unit`, `span.name`, `span.extra` +- **Instructions**: `mindmegette-wysiwyg-box` → `ol > li` elements + +### Generic Fallback Parser + +For unsupported sites, attempts extraction via: +1. Schema.org JSON-LD `@type: Recipe` blocks (`recipeIngredient`, `recipeInstructions`) +2. OpenGraph meta tags for title, description, image + +## Mealie API Integration + +The importer uses the Mealie REST API: + +1. **POST** `/api/recipes` — create a stub recipe (returns slug) +2. **PATCH** `/api/recipes/{slug}` — populate ingredients, instructions, description, orgURL +3. **PUT** `/api/recipes/{slug}/image` — upload the recipe image + +Authentication uses a long-lived API token (Bearer header), created in Mealie at *Profile → API Tokens*. + +## Configuration + +All settings are persisted to `/data/config.json` (mounted as a Docker volume). + +| Setting | Description | +|---------|-------------| +| `mealie_url` | Full URL to Mealie instance (e.g. `https://mealie.example.com`) | +| `mealie_api_key` | Mealie API token | + +## Deployment + +### Docker Compose + +```yaml +services: + recipe-importer: + image: gitea.dooplex.hu/admin/recipe-importer:0.1.0 + container_name: recipe-importer + restart: unless-stopped + ports: + - "8011:8000" + volumes: + - recipe-data:/data + environment: + - SECRET_KEY=change-me-in-production + +volumes: + recipe-data: +``` + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `SECRET_KEY` | `recipe-importer-dev-key` | Flask session secret | +| `DATA_DIR` | `/data` | Persistent storage path | +| `VERSION` | `dev` | Shown in the UI navbar | + +## Building + +On the build server (192.168.0.180): + +```bash +cd ~/build/recipe-importer +./build.sh 0.1.0 --push +``` + +## Web UI + +The UI is in Hungarian and uses a dark theme. The workflow is: + +1. **Settings** (`/settings`) — Enter Mealie URL and API key, test connection +2. **Import** (`/import`) — Paste a recipe URL, click "Beolvasás" (Scrape) +3. **Review** — Edit the title, description, ingredients, instructions in the preview +4. **Send** — Click "Importálás Mealie-be" to push to Mealie + +## Tech Stack + +- **Runtime**: Python 3.12 (slim) +- **Web framework**: Flask 3.1 + Gunicorn +- **HTML parsing**: BeautifulSoup 4 + lxml +- **HTTP client**: requests +- **Container**: ~60 MB image diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..6e9511e --- /dev/null +++ b/app/config.py @@ -0,0 +1,38 @@ +"""Configuration management — persists Mealie connection settings to a JSON file.""" + +import json +import os +from pathlib import Path + +DATA_DIR = Path(os.environ.get("DATA_DIR", "/data")) +CONFIG_FILE = DATA_DIR / "config.json" + +_DEFAULTS = { + "mealie_url": "", + "mealie_api_key": "", +} + + +def _ensure_dir(): + DATA_DIR.mkdir(parents=True, exist_ok=True) + + +def load() -> dict: + """Return the current config dict, merged with defaults.""" + cfg = dict(_DEFAULTS) + if CONFIG_FILE.exists(): + try: + with open(CONFIG_FILE, "r", encoding="utf-8") as f: + cfg.update(json.load(f)) + except (json.JSONDecodeError, OSError): + pass + return cfg + + +def save(cfg: dict): + """Atomically persist *cfg* to disk.""" + _ensure_dir() + tmp = CONFIG_FILE.with_suffix(".tmp") + with open(tmp, "w", encoding="utf-8") as f: + json.dump(cfg, f, indent=2, ensure_ascii=False) + tmp.replace(CONFIG_FILE) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..18ce347 --- /dev/null +++ b/app/main.py @@ -0,0 +1,115 @@ +"""Flask application — recipe importer web UI.""" + +import os +import traceback + +from flask import Flask, render_template, request, redirect, url_for, flash, jsonify + +from app import config +from app.scraper import scrape +from app.mealie import MealieClient + +app = Flask( + __name__, + template_folder=os.path.join(os.path.dirname(__file__), "templates"), + static_folder=os.path.join(os.path.dirname(__file__), "static"), +) +app.secret_key = os.environ.get("SECRET_KEY", "recipe-importer-dev-key") + +VERSION = os.environ.get("VERSION", "dev") + + +# --------------------------------------------------------------------------- +# Routes +# --------------------------------------------------------------------------- + + +@app.route("/") +def index(): + """Redirect to the import page (or settings if not configured).""" + cfg = config.load() + if not cfg.get("mealie_url") or not cfg.get("mealie_api_key"): + return redirect(url_for("settings")) + return redirect(url_for("import_page")) + + +@app.route("/settings", methods=["GET", "POST"]) +def settings(): + """Configure Mealie connection.""" + cfg = config.load() + + if request.method == "POST": + cfg["mealie_url"] = request.form.get("mealie_url", "").strip().rstrip("/") + cfg["mealie_api_key"] = request.form.get("mealie_api_key", "").strip() + config.save(cfg) + flash("Beállítások mentve.", "success") + return redirect(url_for("settings")) + + return render_template("settings.html", cfg=cfg, version=VERSION) + + +@app.route("/settings/test", methods=["POST"]) +def settings_test(): + """AJAX endpoint — test Mealie connection.""" + cfg = config.load() + if not cfg.get("mealie_url") or not cfg.get("mealie_api_key"): + return jsonify({"ok": False, "error": "Nincs megadva Mealie URL vagy API kulcs."}) + try: + client = MealieClient(cfg["mealie_url"], cfg["mealie_api_key"]) + info = client.test_connection() + return jsonify({"ok": True, "data": info}) + except Exception as exc: + return jsonify({"ok": False, "error": str(exc)}) + + +@app.route("/import", methods=["GET"]) +def import_page(): + """Show the import form.""" + cfg = config.load() + if not cfg.get("mealie_url") or not cfg.get("mealie_api_key"): + flash("Először állítsd be a Mealie kapcsolatot.", "warning") + return redirect(url_for("settings")) + return render_template("import.html", cfg=cfg, version=VERSION) + + +@app.route("/scrape", methods=["POST"]) +def scrape_url(): + """AJAX — scrape a recipe URL and return structured data.""" + url = request.form.get("url", "").strip() + if not url: + return jsonify({"ok": False, "error": "Nincs URL megadva."}) + try: + data = scrape(url) + return jsonify({"ok": True, "data": data}) + except Exception as exc: + return jsonify({"ok": False, "error": str(exc), "trace": traceback.format_exc()}) + + +@app.route("/send", methods=["POST"]) +def send_to_mealie(): + """AJAX — send edited recipe data to Mealie.""" + cfg = config.load() + if not cfg.get("mealie_url") or not cfg.get("mealie_api_key"): + return jsonify({"ok": False, "error": "Mealie nincs beállítva."}) + + payload = request.get_json(silent=True) + if not payload: + return jsonify({"ok": False, "error": "Érvénytelen kérés."}) + + try: + client = MealieClient(cfg["mealie_url"], cfg["mealie_api_key"]) + slug = client.create_recipe(payload) + recipe_url = f"{cfg['mealie_url']}/g/home/r/{slug}" + return jsonify({"ok": True, "slug": slug, "url": recipe_url}) + except Exception as exc: + return jsonify({"ok": False, "error": str(exc), "trace": traceback.format_exc()}) + + +# --------------------------------------------------------------------------- +# Health +# --------------------------------------------------------------------------- + + +@app.route("/health") +def health(): + return jsonify({"status": "ok", "version": VERSION}) diff --git a/app/mealie.py b/app/mealie.py new file mode 100644 index 0000000..7088005 --- /dev/null +++ b/app/mealie.py @@ -0,0 +1,114 @@ +"""Mealie API client — creates recipes and uploads images.""" + +import io +import uuid +import requests + + +class MealieClient: + """Thin wrapper around the Mealie REST API.""" + + def __init__(self, base_url: str, api_key: str): + self.base_url = base_url.rstrip("/") + self.session = requests.Session() + self.session.headers.update({ + "Authorization": f"Bearer {api_key}", + "Accept": "application/json", + }) + + # ------------------------------------------------------------------ + # Public + # ------------------------------------------------------------------ + + def test_connection(self) -> dict: + """Return Mealie app info or raise on failure.""" + r = self.session.get(f"{self.base_url}/api/app/about", timeout=10) + r.raise_for_status() + return r.json() + + def create_recipe(self, recipe: dict) -> str: + """Create a recipe in Mealie from a scraper result dict. + + *recipe* keys: title, description, image_url, ingredients, instructions, original_url. + Returns the recipe slug. + """ + # Step 1: create stub + r = self.session.post( + f"{self.base_url}/api/recipes", + json={"name": recipe["title"]}, + timeout=15, + ) + r.raise_for_status() + slug = r.json() # Mealie returns the slug as a plain string + + # Step 2: build full payload and PATCH + payload = self._build_payload(recipe) + r = self.session.patch( + f"{self.base_url}/api/recipes/{slug}", + json=payload, + timeout=15, + ) + r.raise_for_status() + + # Step 3: upload image if available + image_url = recipe.get("image_url") + if image_url: + try: + self._upload_image(slug, image_url) + except Exception: + pass # non-fatal — recipe is still created + + return slug + + # ------------------------------------------------------------------ + # Internal + # ------------------------------------------------------------------ + + def _build_payload(self, recipe: dict) -> dict: + ingredients = [] + for line in recipe.get("ingredients", []): + ingredients.append({ + "note": line, + "isFood": False, + "disableAmount": True, + }) + + instructions = [] + for text in recipe.get("instructions", []): + instructions.append({ + "id": uuid.uuid4().hex, + "text": text, + }) + + return { + "name": recipe["title"], + "description": recipe.get("description", ""), + "recipeIngredient": ingredients, + "recipeInstructions": instructions, + "orgURL": recipe.get("original_url", ""), + "recipeYield": "", + } + + def _upload_image(self, slug: str, image_url: str): + """Download image from *image_url* and upload it to the recipe.""" + img_resp = requests.get(image_url, timeout=30, headers={ + "User-Agent": "RecipeImporter/1.0", + }) + img_resp.raise_for_status() + + content_type = img_resp.headers.get("Content-Type", "image/jpeg") + ext = "jpg" + if "png" in content_type: + ext = "png" + elif "webp" in content_type: + ext = "webp" + + files = { + "image": (f"recipe.{ext}", io.BytesIO(img_resp.content), content_type), + } + r = self.session.put( + f"{self.base_url}/api/recipes/{slug}/image", + files=files, + timeout=30, + ) + r.raise_for_status() diff --git a/app/scraper.py b/app/scraper.py new file mode 100644 index 0000000..a40adc0 --- /dev/null +++ b/app/scraper.py @@ -0,0 +1,181 @@ +"""Recipe scraper — parses Hungarian recipe sites into a structured dict. + +Currently supported: mindmegette.hu +""" + +import re +import requests +from bs4 import BeautifulSoup + +_HEADERS = { + "User-Agent": "RecipeImporter/1.0 (Hungarian recipe scraper)", + "Accept-Language": "hu-HU,hu;q=0.9,en;q=0.5", +} + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def scrape(url: str) -> dict: + """Fetch *url* and return a recipe dict. + + Returns:: + + { + "title": str, + "description": str, + "image_url": str | None, + "ingredients": [str, ...], + "instructions": [str, ...], + "original_url": str, + } + + Raises ValueError on unsupported sites or parse failures. + """ + resp = requests.get(url, headers=_HEADERS, timeout=30) + resp.raise_for_status() + resp.encoding = resp.apparent_encoding or "utf-8" + soup = BeautifulSoup(resp.text, "lxml") + + host = _host(url) + if "mindmegette" in host: + return _parse_mindmegette(soup, url) + else: + # Fallback: try generic schema.org / og-tag extraction + return _parse_generic(soup, url) + + +# --------------------------------------------------------------------------- +# mindmegette.hu +# --------------------------------------------------------------------------- + + +def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict: + title = _og(soup, "og:title") or _text(soup.find("title")) + # Strip " | Mindmegette.hu" suffix + if title: + title = re.sub(r"\s*\|\s*Mindmegette\.hu$", "", title).strip() + + description = _og(soup, "og:description") or "" + image_url = _og(soup, "og:image") + + # --- Ingredients --- + ingredients = [] + ing_container = soup.find("div", class_="ingredients") + if ing_container: + for row in ing_container.find_all("div", class_="ingredients-meta"): + parts = [] + # Quantity spans: 1 kg + qty_el = row.find("span", class_="quantity") + unit_el = row.find("span", class_="unit") + name_el = row.find("span", class_="name") + extra_el = row.find("span", class_="extra") + + if qty_el: + parts.append(_text(qty_el)) + if unit_el: + parts.append(_text(unit_el)) + if name_el: + parts.append(_text(name_el)) + if extra_el: + parts.append(_text(extra_el)) + + line = " ".join(p for p in parts if p) + if not line: + # Fallback: just grab the whole text of the row + line = _text(row) + if line: + ingredients.append(line) + + # --- Instructions --- + instructions = [] + wysiwyg = soup.find("mindmegette-wysiwyg-box") + if wysiwyg: + for li in wysiwyg.find_all("li"): + txt = _text(li) + if txt: + instructions.append(txt) + # Fallback: look for block-content divs + if not instructions: + for div in soup.find_all("div", class_="block-content"): + ol = div.find("ol") + if ol: + for li in ol.find_all("li"): + txt = _text(li) + if txt: + instructions.append(txt) + + return { + "title": title or "Ismeretlen recept", + "description": description, + "image_url": image_url, + "ingredients": ingredients, + "instructions": instructions, + "original_url": url, + } + + +# --------------------------------------------------------------------------- +# Generic fallback (og-tags + schema.org microdata) +# --------------------------------------------------------------------------- + + +def _parse_generic(soup: BeautifulSoup, url: str) -> dict: + title = _og(soup, "og:title") or _text(soup.find("title")) or "Ismeretlen recept" + description = _og(soup, "og:description") or "" + image_url = _og(soup, "og:image") + + ingredients = [] + instructions = [] + + # Try schema.org JSON-LD + for script in soup.find_all("script", type="application/ld+json"): + try: + import json + data = json.loads(script.string or "") + if isinstance(data, list): + data = data[0] + if data.get("@type") == "Recipe": + ingredients = data.get("recipeIngredient", []) + raw_instructions = data.get("recipeInstructions", []) + for item in raw_instructions: + if isinstance(item, str): + instructions.append(item) + elif isinstance(item, dict): + instructions.append(item.get("text", "")) + break + except (json.JSONDecodeError, TypeError, AttributeError): + continue + + return { + "title": title, + "description": description, + "image_url": image_url, + "ingredients": ingredients, + "instructions": instructions, + "original_url": url, + } + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _host(url: str) -> str: + from urllib.parse import urlparse + return urlparse(url).hostname or "" + + +def _og(soup: BeautifulSoup, prop: str) -> str | None: + tag = soup.find("meta", property=prop) + if tag and tag.get("content"): + return tag["content"] + return None + + +def _text(el) -> str: + if el is None: + return "" + return el.get_text(strip=True) diff --git a/app/templates/base.html b/app/templates/base.html new file mode 100644 index 0000000..4457b81 --- /dev/null +++ b/app/templates/base.html @@ -0,0 +1,191 @@ + + +
+ + +