libri fix

This commit is contained in:
2026-01-25 21:17:40 +01:00
parent 5aaacbb753
commit f5b00011d0
+30 -24
View File
@@ -1,6 +1,5 @@
--- ---
# Calibre-Web-Automated - All-in-one eBook library solution # Calibre-Web-Automated - All-in-one eBook library solution
# Replaces Calibre + Calibre-web with automation features
# Namespace # Namespace
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
@@ -460,7 +459,8 @@ data:
BASE_URL = "https://www.libri.hu" BASE_URL = "https://www.libri.hu"
BOOK_URL = BASE_URL + "/konyv" BOOK_URL = BASE_URL + "/konyv"
SEARCH_URL = BASE_URL + "/talalati-lista" # Detailed search URL format
SEARCH_URL = BASE_URL + "/talalati_lista/?reszletes=1&s_det=1&cim="
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0',
@@ -482,8 +482,8 @@ data:
query_title = query.strip() query_title = query.strip()
try: try:
# Libri.hu search URL format # Libri.hu detailed search URL - search by title
search_url = f"{self.SEARCH_URL}?kereses={requests.utils.quote(query)}" search_url = f"{self.SEARCH_URL}{requests.utils.quote(query)}"
log.info(f"Libri.hu searching: {search_url}") log.info(f"Libri.hu searching: {search_url}")
response = self.session.get(search_url, timeout=15) response = self.session.get(search_url, timeout=15)
@@ -527,14 +527,17 @@ data:
"""Parse search results page""" """Parse search results page"""
book_data = [] book_data = []
# Try multiple possible XPath selectors for Libri's search results # Libri.hu book URLs end with .html and have author.title pattern
book_links = root.xpath('//*[@id="book-list-result-items"]//h4[@class="book"]/a/@href') # e.g., /konyv/orvos-toth_noemi.Orokolt-sors-514.html
all_links = root.xpath("//a[contains(@href, '/konyv/') and contains(@href, '.html')]/@href")
if not book_links: # Deduplicate and filter
# Alternative selector seen = set()
book_links = root.xpath('//a[contains(@href, "/konyv/")]/@href') book_links = []
# Filter to unique book URLs for href in all_links:
book_links = list(set([l for l in book_links if '/konyv/' in l and '.html' in l])) if href not in seen and '.html' in href:
seen.add(href)
book_links.append(href)
for href in book_links[:10]: # Limit to 10 results for href in book_links[:10]: # Limit to 10 results
if not href.startswith('http'): if not href.startswith('http'):
@@ -542,9 +545,20 @@ data:
else: else:
url = href url = href
# Can't calculate preliminary relevance without title info from search page # Extract title from URL for preliminary relevance
# so use index-based scoring # URL format: /konyv/author_name.Book-Title-123.html
book_data.append((url, len(book_data) * 10)) url_title = ""
if '.' in href:
parts = href.split('.')
if len(parts) >= 2:
# Get the title part (between first . and .html)
url_title = parts[1].replace('-', ' ').replace('_', ' ')
relevance = calculate_relevance(query_title, query_author, url_title, [])
book_data.append((url, relevance))
# Sort by relevance
book_data.sort(key=lambda x: x[1])
log.info(f"Libri.hu found {len(book_data)} results") log.info(f"Libri.hu found {len(book_data)} results")
return book_data return book_data
@@ -648,10 +662,8 @@ data:
def _parse_libri_id(self, url: str) -> Optional[str]: def _parse_libri_id(self, url: str) -> Optional[str]:
try: try:
m = re.search(r'/konyv/(.*)\.html', url) # URL format: /konyv/author_name.Book-Title-123.html
if m: m = re.search(r'/konyv/(.+)\.html', url)
return m.group(1)
m = re.search(r'/konyv/([^/]+)', url)
if m: if m:
return m.group(1) return m.group(1)
except: except:
@@ -785,9 +797,7 @@ spec:
app.kubernetes.io/instance: calibre app.kubernetes.io/instance: calibre
app.kubernetes.io/name: calibre-web-automated app.kubernetes.io/name: calibre-web-automated
annotations: annotations:
# Version checker pattern - CWA uses semantic versioning
match-regex.version-checker.io/calibre-web-automated: '^V?[0-9]+\.[0-9]+\.[0-9]+$' match-regex.version-checker.io/calibre-web-automated: '^V?[0-9]+\.[0-9]+\.[0-9]+$'
# Force rollout when ConfigMap changes
configmap.reloader.stakater.com/reload: "calibre-custom-metadata-providers" configmap.reloader.stakater.com/reload: "calibre-custom-metadata-providers"
spec: spec:
containers: containers:
@@ -848,7 +858,6 @@ spec:
mountPath: /cwa-book-ingest mountPath: /cwa-book-ingest
- name: library - name: library
mountPath: /calibre-library mountPath: /calibre-library
# Hungarian metadata providers
- name: custom-metadata-providers - name: custom-metadata-providers
mountPath: /app/calibre-web-automated/cps/metadata_provider/moly_hu.py mountPath: /app/calibre-web-automated/cps/metadata_provider/moly_hu.py
subPath: moly_hu.py subPath: moly_hu.py
@@ -873,7 +882,6 @@ spec:
configMap: configMap:
name: calibre-custom-metadata-providers name: calibre-custom-metadata-providers
--- ---
# Calibre-Web-Automated Service
apiVersion: v1 apiVersion: v1
kind: Service kind: Service
metadata: metadata:
@@ -893,7 +901,6 @@ spec:
app.kubernetes.io/instance: calibre app.kubernetes.io/instance: calibre
app.kubernetes.io/name: calibre-web-automated app.kubernetes.io/name: calibre-web-automated
--- ---
# Main Ingress (books.dooplex.hu)
apiVersion: networking.k8s.io/v1 apiVersion: networking.k8s.io/v1
kind: Ingress kind: Ingress
metadata: metadata:
@@ -947,7 +954,6 @@ spec:
port: port:
number: 8083 number: 8083
--- ---
# Config PVC
apiVersion: v1 apiVersion: v1
kind: PersistentVolumeClaim kind: PersistentVolumeClaim
metadata: metadata: