libri fix

This commit is contained in:
2026-01-25 21:17:40 +01:00
parent 5aaacbb753
commit f5b00011d0
+30 -24
View File
@@ -1,6 +1,5 @@
---
# Calibre-Web-Automated - All-in-one eBook library solution
# Replaces Calibre + Calibre-web with automation features
# Namespace
apiVersion: v1
kind: Namespace
@@ -460,7 +459,8 @@ data:
BASE_URL = "https://www.libri.hu"
BOOK_URL = BASE_URL + "/konyv"
SEARCH_URL = BASE_URL + "/talalati-lista"
# Detailed search URL format
SEARCH_URL = BASE_URL + "/talalati_lista/?reszletes=1&s_det=1&cim="
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0',
@@ -482,8 +482,8 @@ data:
query_title = query.strip()
try:
# Libri.hu search URL format
search_url = f"{self.SEARCH_URL}?kereses={requests.utils.quote(query)}"
# Libri.hu detailed search URL - search by title
search_url = f"{self.SEARCH_URL}{requests.utils.quote(query)}"
log.info(f"Libri.hu searching: {search_url}")
response = self.session.get(search_url, timeout=15)
@@ -527,14 +527,17 @@ data:
"""Parse search results page"""
book_data = []
# Try multiple possible XPath selectors for Libri's search results
book_links = root.xpath('//*[@id="book-list-result-items"]//h4[@class="book"]/a/@href')
# Libri.hu book URLs end with .html and have author.title pattern
# e.g., /konyv/orvos-toth_noemi.Orokolt-sors-514.html
all_links = root.xpath("//a[contains(@href, '/konyv/') and contains(@href, '.html')]/@href")
if not book_links:
# Alternative selector
book_links = root.xpath('//a[contains(@href, "/konyv/")]/@href')
# Filter to unique book URLs
book_links = list(set([l for l in book_links if '/konyv/' in l and '.html' in l]))
# Deduplicate and filter
seen = set()
book_links = []
for href in all_links:
if href not in seen and '.html' in href:
seen.add(href)
book_links.append(href)
for href in book_links[:10]: # Limit to 10 results
if not href.startswith('http'):
@@ -542,9 +545,20 @@ data:
else:
url = href
# Can't calculate preliminary relevance without title info from search page
# so use index-based scoring
book_data.append((url, len(book_data) * 10))
# Extract title from URL for preliminary relevance
# URL format: /konyv/author_name.Book-Title-123.html
url_title = ""
if '.' in href:
parts = href.split('.')
if len(parts) >= 2:
# Get the title part (between first . and .html)
url_title = parts[1].replace('-', ' ').replace('_', ' ')
relevance = calculate_relevance(query_title, query_author, url_title, [])
book_data.append((url, relevance))
# Sort by relevance
book_data.sort(key=lambda x: x[1])
log.info(f"Libri.hu found {len(book_data)} results")
return book_data
@@ -648,10 +662,8 @@ data:
def _parse_libri_id(self, url: str) -> Optional[str]:
try:
m = re.search(r'/konyv/(.*)\.html', url)
if m:
return m.group(1)
m = re.search(r'/konyv/([^/]+)', url)
# URL format: /konyv/author_name.Book-Title-123.html
m = re.search(r'/konyv/(.+)\.html', url)
if m:
return m.group(1)
except:
@@ -785,9 +797,7 @@ spec:
app.kubernetes.io/instance: calibre
app.kubernetes.io/name: calibre-web-automated
annotations:
# Version checker pattern - CWA uses semantic versioning
match-regex.version-checker.io/calibre-web-automated: '^V?[0-9]+\.[0-9]+\.[0-9]+$'
# Force rollout when ConfigMap changes
configmap.reloader.stakater.com/reload: "calibre-custom-metadata-providers"
spec:
containers:
@@ -848,7 +858,6 @@ spec:
mountPath: /cwa-book-ingest
- name: library
mountPath: /calibre-library
# Hungarian metadata providers
- name: custom-metadata-providers
mountPath: /app/calibre-web-automated/cps/metadata_provider/moly_hu.py
subPath: moly_hu.py
@@ -873,7 +882,6 @@ spec:
configMap:
name: calibre-custom-metadata-providers
---
# Calibre-Web-Automated Service
apiVersion: v1
kind: Service
metadata:
@@ -893,7 +901,6 @@ spec:
app.kubernetes.io/instance: calibre
app.kubernetes.io/name: calibre-web-automated
---
# Main Ingress (books.dooplex.hu)
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
@@ -947,7 +954,6 @@ spec:
port:
number: 8083
---
# Config PVC
apiVersion: v1
kind: PersistentVolumeClaim
metadata: