libri fix

2026-01-25 21:17:40 +01:00
parent 5aaacbb753
commit f5b00011d0
1 changed files with 30 additions and 24 deletions
@@ -1,6 +1,5 @@
 ---
 # Calibre-Web-Automated - All-in-one eBook library solution
 # Replaces Calibre + Calibre-web with automation features
 # Namespace
 apiVersion: v1
 kind: Namespace
@@ -460,7 +459,8 @@ data:
        BASE_URL = "https://www.libri.hu"
        BOOK_URL = BASE_URL + "/konyv"
-        SEARCH_URL = BASE_URL + "/talalati-lista"
+        # Detailed search URL format
        SEARCH_URL = BASE_URL + "/talalati_lista/?reszletes=1&s_det=1&cim="
        headers = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0',
@@ -482,8 +482,8 @@ data:
            query_title = query.strip()
            try:
-                # Libri.hu search URL format
+                # Libri.hu detailed search URL - search by title
-                search_url = f"{self.SEARCH_URL}?kereses={requests.utils.quote(query)}"
+                search_url = f"{self.SEARCH_URL}{requests.utils.quote(query)}"
                log.info(f"Libri.hu searching: {search_url}")
                response = self.session.get(search_url, timeout=15)
@@ -527,14 +527,17 @@ data:
            """Parse search results page"""
            book_data = []
-            # Try multiple possible XPath selectors for Libri's search results
+            # Libri.hu book URLs end with .html and have author.title pattern
-            book_links = root.xpath('//*[@id="book-list-result-items"]//h4[@class="book"]/a/@href')
+            # e.g., /konyv/orvos-toth_noemi.Orokolt-sors-514.html
            all_links = root.xpath("//a[contains(@href, '/konyv/') and contains(@href, '.html')]/@href")
-            if not book_links:
+            # Deduplicate and filter
-                # Alternative selector
+            seen = set()
-                book_links = root.xpath('//a[contains(@href, "/konyv/")]/@href')
+            book_links = []
-                # Filter to unique book URLs
+            for href in all_links:
-                book_links = list(set([l for l in book_links if '/konyv/' in l and '.html' in l]))
+                if href not in seen and '.html' in href:
                    seen.add(href)
                    book_links.append(href)
            for href in book_links[:10]:  # Limit to 10 results
                if not href.startswith('http'):
@@ -542,9 +545,20 @@ data:
                else:
                    url = href
-                # Can't calculate preliminary relevance without title info from search page
+                # Extract title from URL for preliminary relevance
-                # so use index-based scoring
+                # URL format: /konyv/author_name.Book-Title-123.html
-                book_data.append((url, len(book_data) * 10))
+                url_title = ""
                if '.' in href:
                    parts = href.split('.')
                    if len(parts) >= 2:
                        # Get the title part (between first . and .html)
                        url_title = parts[1].replace('-', ' ').replace('_', ' ')
                relevance = calculate_relevance(query_title, query_author, url_title, [])
                book_data.append((url, relevance))
            # Sort by relevance
            book_data.sort(key=lambda x: x[1])
            log.info(f"Libri.hu found {len(book_data)} results")
            return book_data
@@ -648,10 +662,8 @@ data:
        def _parse_libri_id(self, url: str) -> Optional[str]:
            try:
-                m = re.search(r'/konyv/(.*)\.html', url)
+                # URL format: /konyv/author_name.Book-Title-123.html
-                if m:
+                m = re.search(r'/konyv/(.+)\.html', url)
                    return m.group(1)
                m = re.search(r'/konyv/([^/]+)', url)
                if m:
                    return m.group(1)
            except:
@@ -785,9 +797,7 @@ spec:
        app.kubernetes.io/instance: calibre
        app.kubernetes.io/name: calibre-web-automated
      annotations:
        # Version checker pattern - CWA uses semantic versioning
        match-regex.version-checker.io/calibre-web-automated: '^V?[0-9]+\.[0-9]+\.[0-9]+$'
        # Force rollout when ConfigMap changes
        configmap.reloader.stakater.com/reload: "calibre-custom-metadata-providers"
    spec:
      containers:
@@ -848,7 +858,6 @@ spec:
              mountPath: /cwa-book-ingest
            - name: library
              mountPath: /calibre-library
            # Hungarian metadata providers
            - name: custom-metadata-providers
              mountPath: /app/calibre-web-automated/cps/metadata_provider/moly_hu.py
              subPath: moly_hu.py
@@ -873,7 +882,6 @@ spec:
          configMap:
            name: calibre-custom-metadata-providers
 ---
 # Calibre-Web-Automated Service
 apiVersion: v1
 kind: Service
 metadata:
@@ -893,7 +901,6 @@ spec:
    app.kubernetes.io/instance: calibre
    app.kubernetes.io/name: calibre-web-automated
 ---
 # Main Ingress (books.dooplex.hu)
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
@@ -947,7 +954,6 @@ spec:
                port:
                  number: 8083
 ---
 # Config PVC
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata: