From f5b00011d036d928b9a7f8444db887be041dbbb8 Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Sun, 25 Jan 2026 21:17:40 +0100 Subject: [PATCH] libri fix --- calibre-system/cwa.yaml | 54 +++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/calibre-system/cwa.yaml b/calibre-system/cwa.yaml index a3b05be..77a37f6 100644 --- a/calibre-system/cwa.yaml +++ b/calibre-system/cwa.yaml @@ -1,6 +1,5 @@ --- # Calibre-Web-Automated - All-in-one eBook library solution -# Replaces Calibre + Calibre-web with automation features # Namespace apiVersion: v1 kind: Namespace @@ -460,7 +459,8 @@ data: BASE_URL = "https://www.libri.hu" BOOK_URL = BASE_URL + "/konyv" - SEARCH_URL = BASE_URL + "/talalati-lista" + # Detailed search URL format + SEARCH_URL = BASE_URL + "/talalati_lista/?reszletes=1&s_det=1&cim=" headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0', @@ -482,8 +482,8 @@ data: query_title = query.strip() try: - # Libri.hu search URL format - search_url = f"{self.SEARCH_URL}?kereses={requests.utils.quote(query)}" + # Libri.hu detailed search URL - search by title + search_url = f"{self.SEARCH_URL}{requests.utils.quote(query)}" log.info(f"Libri.hu searching: {search_url}") response = self.session.get(search_url, timeout=15) @@ -527,14 +527,17 @@ data: """Parse search results page""" book_data = [] - # Try multiple possible XPath selectors for Libri's search results - book_links = root.xpath('//*[@id="book-list-result-items"]//h4[@class="book"]/a/@href') + # Libri.hu book URLs end with .html and have author.title pattern + # e.g., /konyv/orvos-toth_noemi.Orokolt-sors-514.html + all_links = root.xpath("//a[contains(@href, '/konyv/') and contains(@href, '.html')]/@href") - if not book_links: - # Alternative selector - book_links = root.xpath('//a[contains(@href, "/konyv/")]/@href') - # Filter to unique book URLs - book_links = list(set([l for l in book_links if '/konyv/' in l and '.html' in l])) + # Deduplicate and filter + seen = set() + book_links = [] + for href in all_links: + if href not in seen and '.html' in href: + seen.add(href) + book_links.append(href) for href in book_links[:10]: # Limit to 10 results if not href.startswith('http'): @@ -542,9 +545,20 @@ data: else: url = href - # Can't calculate preliminary relevance without title info from search page - # so use index-based scoring - book_data.append((url, len(book_data) * 10)) + # Extract title from URL for preliminary relevance + # URL format: /konyv/author_name.Book-Title-123.html + url_title = "" + if '.' in href: + parts = href.split('.') + if len(parts) >= 2: + # Get the title part (between first . and .html) + url_title = parts[1].replace('-', ' ').replace('_', ' ') + + relevance = calculate_relevance(query_title, query_author, url_title, []) + book_data.append((url, relevance)) + + # Sort by relevance + book_data.sort(key=lambda x: x[1]) log.info(f"Libri.hu found {len(book_data)} results") return book_data @@ -648,10 +662,8 @@ data: def _parse_libri_id(self, url: str) -> Optional[str]: try: - m = re.search(r'/konyv/(.*)\.html', url) - if m: - return m.group(1) - m = re.search(r'/konyv/([^/]+)', url) + # URL format: /konyv/author_name.Book-Title-123.html + m = re.search(r'/konyv/(.+)\.html', url) if m: return m.group(1) except: @@ -785,9 +797,7 @@ spec: app.kubernetes.io/instance: calibre app.kubernetes.io/name: calibre-web-automated annotations: - # Version checker pattern - CWA uses semantic versioning match-regex.version-checker.io/calibre-web-automated: '^V?[0-9]+\.[0-9]+\.[0-9]+$' - # Force rollout when ConfigMap changes configmap.reloader.stakater.com/reload: "calibre-custom-metadata-providers" spec: containers: @@ -848,7 +858,6 @@ spec: mountPath: /cwa-book-ingest - name: library mountPath: /calibre-library - # Hungarian metadata providers - name: custom-metadata-providers mountPath: /app/calibre-web-automated/cps/metadata_provider/moly_hu.py subPath: moly_hu.py @@ -873,7 +882,6 @@ spec: configMap: name: calibre-custom-metadata-providers --- -# Calibre-Web-Automated Service apiVersion: v1 kind: Service metadata: @@ -893,7 +901,6 @@ spec: app.kubernetes.io/instance: calibre app.kubernetes.io/name: calibre-web-automated --- -# Main Ingress (books.dooplex.hu) apiVersion: networking.k8s.io/v1 kind: Ingress metadata: @@ -947,7 +954,6 @@ spec: port: number: 8083 --- -# Config PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: