diff --git a/README.md b/README.md index 540d71a..9798b2d 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Docker container for importing recipes from Hungarian websites into [Mealie](htt │ ├── /scrape → AJAX: parse recipe HTML │ │ ├── /send → AJAX: push to Mealie API │ │ ├── /send-tandoor → AJAX: push to Tandoor API │ +│ ├── /tags → AJAX: list tags from both │ │ └── /health → Health check │ │ │ │ Modules: │ @@ -37,10 +38,10 @@ Docker container for importing recipes from Hungarian websites into [Mealie](htt ## Supported Sites -| Site | Ingredients | Instructions | Image | -|------|:-----------:|:------------:|:-----:| -| mindmegette.hu | Yes | Yes | Yes | -| *Other sites* | Fallback (schema.org JSON-LD) | Fallback (schema.org JSON-LD) | Yes (og:image) | +| Site | Ingredients | Instructions | Image | Tags | +|------|:-----------:|:------------:|:-----:|:----:| +| mindmegette.hu | Yes | Yes | Yes | Yes | +| *Other sites* | Fallback (schema.org JSON-LD) | Fallback (schema.org JSON-LD) | Yes (og:image) | Fallback (schema.org keywords) | ### Mindmegette.hu Parser @@ -52,11 +53,12 @@ Extracts data from the Angular-rendered HTML: - **Ingredients**: `div.ingredients` → `div.ingredients-meta` rows, each containing `` (qty), `` (unit), `` (food), `` (extra) - **Ingredient groups**: Multiple `div.ingredients` containers; group title via `` - **Instructions**: `mindmegette-wysiwyg-box` → `ol > li` elements +- **Tags**: `` elements inside `div.desktop-wrapper` ### Generic Fallback Parser For unsupported sites, attempts extraction via: -1. Schema.org JSON-LD `@type: Recipe` blocks (`recipeIngredient`, `recipeInstructions`) +1. Schema.org JSON-LD `@type: Recipe` blocks (`recipeIngredient`, `recipeInstructions`, `keywords`) 2. OpenGraph meta tags for title, description, image ### Adding a New Site Parser @@ -90,6 +92,17 @@ The importer uses the Tandoor REST API: Authentication uses an API token (Bearer header), created in Tandoor at *Settings → API Browser → Auth Token*. +## Tag Management + +Tags are scraped from recipe pages and shown as editable chips in the UI. Users can: +- **Remove** scraped tags that are irrelevant +- **Search** existing tags from Mealie and Tandoor (fetched via `GET /tags` endpoint) +- **Add** custom tags by typing and pressing Enter + +Tags are sent to both services on import: +- **Mealie**: Tags are created via `POST /api/organizers/tags` if they don't exist, then attached to the recipe in the PATCH payload +- **Tandoor**: Keywords are auto-created by including `keywords: [{"name": "..."}]` in the recipe POST + ## Configuration All settings are persisted to `/data/config.json` (mounted as a Docker volume). @@ -108,7 +121,7 @@ All settings are persisted to `/data/config.json` (mounted as a Docker volume). ```yaml services: recipe-importer: - image: gitea.dooplex.hu/admin/recipe-importer:0.1.9 + image: gitea.dooplex.hu/admin/recipe-importer:0.2.0 container_name: recipe-importer restart: unless-stopped ports: @@ -149,7 +162,7 @@ The UI is in Hungarian and uses a dark theme. The workflow is: 1. **Settings** (`/settings`) — Configure Mealie and/or Tandoor connection (URL + API key), test each connection 2. **Import** (`/import`) — Paste a recipe URL, click "Beolvasás" (Scrape) -3. **Review** — Edit structured ingredients (4-column: quantity, unit, food, note), add/remove ingredient groups, edit instructions +3. **Review** — Edit structured ingredients (4-column: quantity, unit, food, note), add/remove ingredient groups, edit instructions, manage tags (add/remove/search existing) 4. **Send** — Click "Importálás Mealie-be" and/or "Importálás Tandoor-ba" to push to your configured services ## Tech Stack diff --git a/app/main.py b/app/main.py index 9327bc0..1d7ab07 100644 --- a/app/main.py +++ b/app/main.py @@ -171,6 +171,29 @@ def send_to_tandoor(): return jsonify({"ok": False, "error": str(exc), "trace": traceback.format_exc()}) +@app.route("/tags", methods=["GET"]) +def list_all_tags(): + """Return existing tags from Mealie and Tandoor for autocomplete.""" + cfg = config.load() + mealie_tags = [] + tandoor_tags = [] + if cfg.get("mealie_url") and cfg.get("mealie_api_key"): + try: + client = MealieClient(cfg["mealie_url"], cfg["mealie_api_key"], + api_url=config.MEALIE_INTERNAL_URL) + mealie_tags = [t["name"] for t in client.list_tags()] + except Exception: + pass + if cfg.get("tandoor_url") and cfg.get("tandoor_api_key"): + try: + client = TandoorClient(cfg["tandoor_url"], cfg["tandoor_api_key"], + api_url=config.TANDOOR_INTERNAL_URL) + tandoor_tags = [t["name"] for t in client.list_keywords()] + except Exception: + pass + return jsonify({"mealie": mealie_tags, "tandoor": tandoor_tags}) + + # --------------------------------------------------------------------------- # Health # --------------------------------------------------------------------------- diff --git a/app/mealie.py b/app/mealie.py index f31419c..2fa937b 100644 --- a/app/mealie.py +++ b/app/mealie.py @@ -63,10 +63,22 @@ class MealieClient: } return None + def list_tags(self) -> list[dict]: + """Return all tags as [{name, slug, id}].""" + r = self.session.get( + f"{self.api_url}/api/organizers/tags", + params={"page": 1, "perPage": -1}, + timeout=10, + ) + if not r.ok: + return [] + return [{"name": t["name"], "slug": t["slug"], "id": t["id"]} + for t in r.json().get("items", [])] + def create_recipe(self, recipe: dict) -> str: """Create a recipe in Mealie from a scraper result dict. - *recipe* keys: title, description, image_url, ingredients, instructions, original_url. + *recipe* keys: title, description, image_url, ingredients, instructions, tags, original_url. Returns the recipe slug. """ # Step 1: create stub @@ -80,6 +92,12 @@ class MealieClient: # Step 2: build full payload and PATCH payload = self._build_payload(recipe) + + # Step 2b: resolve tags (create if needed, get {id, name, slug}) + tag_names = recipe.get("tags", []) + if tag_names: + payload["tags"] = self._ensure_tags(tag_names) + r = self.session.patch( f"{self.api_url}/api/recipes/{slug}", json=payload, @@ -173,6 +191,27 @@ class MealieClient: return entry return None + def _ensure_tags(self, tag_names: list[str]) -> list[dict]: + """Create tags that don't exist yet, return [{id, name, slug}] for all.""" + existing = {t["name"].lower(): t for t in self.list_tags()} + result = [] + for name in tag_names: + key = name.lower() + if key in existing: + result.append(existing[key]) + else: + r = self.session.post( + f"{self.api_url}/api/organizers/tags", + json={"name": name}, + timeout=10, + ) + if r.ok: + t = r.json() + tag = {"id": t["id"], "name": t["name"], "slug": t["slug"]} + existing[key] = tag + result.append(tag) + return result + # ------------------------------------------------------------------ # Internal # ------------------------------------------------------------------ diff --git a/app/scraper.py b/app/scraper.py index 898fe57..4a0ae6d 100644 --- a/app/scraper.py +++ b/app/scraper.py @@ -42,6 +42,7 @@ def scrape(url: str) -> dict: "image_url": str | None, "ingredients": [{"quantity": str, "unit": str, "food": str, "extra": str}, ...], "instructions": [str, ...], + "tags": [str, ...], "original_url": str, } @@ -139,12 +140,22 @@ def _parse_mindmegette(soup: BeautifulSoup, url: str) -> dict: if txt: instructions.append(txt) + # --- Tags --- + tags = [] + tag_wrapper = soup.select_one("div.desktop-wrapper") + if tag_wrapper: + for a in tag_wrapper.select("a.tag"): + tag_text = a.get_text(strip=True) + if tag_text: + tags.append(tag_text) + return { "title": title or "Ismeretlen recept", "description": description, "image_url": image_url, "ingredients": ingredients, "instructions": instructions, + "tags": tags, "original_url": url, } @@ -161,6 +172,7 @@ def _parse_generic(soup: BeautifulSoup, url: str) -> dict: ingredients = [] instructions = [] + tags = [] # Try schema.org JSON-LD for script in soup.find_all("script", type="application/ld+json"): @@ -180,6 +192,12 @@ def _parse_generic(soup: BeautifulSoup, url: str) -> dict: instructions.append(item) elif isinstance(item, dict): instructions.append(item.get("text", "")) + # Extract keywords + kw = data.get("keywords", "") + if isinstance(kw, str): + tags = [k.strip() for k in kw.split(",") if k.strip()] + elif isinstance(kw, list): + tags = [str(k).strip() for k in kw if str(k).strip()] break except (json.JSONDecodeError, TypeError, AttributeError): continue @@ -190,6 +208,7 @@ def _parse_generic(soup: BeautifulSoup, url: str) -> dict: "image_url": image_url, "ingredients": ingredients, "instructions": instructions, + "tags": tags, "original_url": url, } diff --git a/app/tandoor.py b/app/tandoor.py index c2685cd..52565cc 100644 --- a/app/tandoor.py +++ b/app/tandoor.py @@ -45,6 +45,22 @@ class TandoorClient: return {"version": version} + def list_keywords(self) -> list[dict]: + """Return all keywords as [{name, id}].""" + results = [] + page_url = f"{self.api_url}/api/keyword/" + params = {"limit": 100, "format": "json"} + while page_url: + r = self.session.get(page_url, params=params, timeout=10) + if not r.ok: + break + data = r.json() + results.extend({"name": k["name"], "id": k["id"]} + for k in data.get("results", [])) + page_url = data.get("next") + params = {} # next URL already has params + return results + def find_duplicate(self, url: str, title: str = "") -> dict | None: """Check if a recipe with this source URL already exists.""" if not url and not title: @@ -145,10 +161,15 @@ class TandoorClient: "order": 0, }) + # Keywords (tags) + tag_names = recipe.get("tags", []) + keywords = [{"name": t} for t in tag_names] if tag_names else [] + return { "name": recipe["title"], "description": description, "source_url": original_url, + "keywords": keywords, "steps": steps, "servings": 1, } diff --git a/app/templates/import.html b/app/templates/import.html index 6bed04f..a7b9df6 100644 --- a/app/templates/import.html +++ b/app/templates/import.html @@ -123,6 +123,70 @@ } .add-btn:hover { border-color: var(--accent); color: var(--text); } + /* Tags */ + .tag-chips { + display: flex; + flex-wrap: wrap; + gap: 0.4rem; + min-height: 32px; + } + .tag-chip { + display: inline-flex; + align-items: center; + gap: 0.3rem; + background: var(--accent); + color: #fff; + padding: 0.25rem 0.5rem; + border-radius: 999px; + font-size: 0.85rem; + line-height: 1.2; + } + .tag-chip button { + background: none; + border: none; + color: rgba(255,255,255,0.7); + cursor: pointer; + font-size: 0.9rem; + padding: 0; + line-height: 1; + } + .tag-chip button:hover { color: #fff; } + .tag-search-wrap { + position: relative; + margin-top: 0.5rem; + } + .tag-search-wrap input { + margin-bottom: 0; + width: 100%; + } + .tag-dropdown { + position: absolute; + top: 100%; + left: 0; + right: 0; + background: var(--surface2); + border: 1px solid var(--border); + border-radius: var(--radius); + max-height: 200px; + overflow-y: auto; + z-index: 10; + display: none; + } + .tag-dropdown.open { display: block; } + .tag-dropdown-item { + padding: 0.4rem 0.6rem; + cursor: pointer; + font-size: 0.9rem; + display: flex; + justify-content: space-between; + } + .tag-dropdown-item:hover { background: var(--surface); } + .tag-dropdown-item .tag-source { + color: var(--text-dim); + font-size: 0.8rem; + } + .tag-dropdown-item.tag-add-new { color: var(--accent); font-style: italic; } + .result-card { display: none; } .result-card.visible { display: block; } @@ -179,6 +243,15 @@
+ + +
+
+ +
+
+
{% if has_mealie %} '; + chips.appendChild(chip); +} + +function getActiveTags() { + const tags = []; + document.querySelectorAll('#tagChips .tag-chip').forEach(el => tags.push(el.dataset.tag.toLowerCase())); + return tags; +} + +function onTagSearch() { + const input = document.getElementById('tagSearch'); + const dropdown = document.getElementById('tagDropdown'); + const q = input.value.trim().toLowerCase(); + + if (!q) { dropdown.classList.remove('open'); return; } + + const active = getActiveTags(); + // Merge tags from both sources, track origin + const seen = {}; + for (const t of existingTags.mealie || []) { + const k = t.toLowerCase(); + if (!seen[k]) seen[k] = { name: t, sources: [] }; + seen[k].sources.push('M'); + } + for (const t of existingTags.tandoor || []) { + const k = t.toLowerCase(); + if (!seen[k]) seen[k] = { name: t, sources: [] }; + seen[k].sources.push('T'); + } + + // Filter by query, exclude already-added + const matches = Object.values(seen) + .filter(e => e.name.toLowerCase().includes(q) && !active.includes(e.name.toLowerCase())) + .slice(0, 10); + + let html = ''; + for (const m of matches) { + const src = m.sources.join('+'); + html += '
' + + '' + escHtml(m.name) + '' + + '' + src + '
'; + } + + // "Add new" option if exact match not found + const exactExists = matches.some(m => m.name.toLowerCase() === q) || active.includes(q); + if (!exactExists && q) { + html += '
' + + '+ "' + escHtml(input.value.trim()) + '" hozzáadása
'; + } + + dropdown.innerHTML = html; + dropdown.classList.toggle('open', html.length > 0); +} + +function selectTag(name) { + addTagChip(name); + const input = document.getElementById('tagSearch'); + input.value = ''; + document.getElementById('tagDropdown').classList.remove('open'); + input.focus(); +} + +function onTagKeydown(e) { + if (e.key === 'Enter') { + e.preventDefault(); + const val = e.target.value.trim(); + if (val) { + addTagChip(val); + e.target.value = ''; + document.getElementById('tagDropdown').classList.remove('open'); + } + } else if (e.key === 'Escape') { + document.getElementById('tagDropdown').classList.remove('open'); + } +} + +// Close dropdown when clicking outside +document.addEventListener('click', function(e) { + if (!e.target.closest('.tag-search-wrap')) { + document.getElementById('tagDropdown').classList.remove('open'); + } +}); + +// Load existing tags on page load +loadExistingTags(); + function escHtml(s) { const d = document.createElement('div'); d.textContent = s;