libri fix
This commit is contained in:
+30
-24
@@ -1,6 +1,5 @@
|
||||
---
|
||||
# Calibre-Web-Automated - All-in-one eBook library solution
|
||||
# Replaces Calibre + Calibre-web with automation features
|
||||
# Namespace
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
@@ -460,7 +459,8 @@ data:
|
||||
|
||||
BASE_URL = "https://www.libri.hu"
|
||||
BOOK_URL = BASE_URL + "/konyv"
|
||||
SEARCH_URL = BASE_URL + "/talalati-lista"
|
||||
# Detailed search URL format
|
||||
SEARCH_URL = BASE_URL + "/talalati_lista/?reszletes=1&s_det=1&cim="
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0',
|
||||
@@ -482,8 +482,8 @@ data:
|
||||
query_title = query.strip()
|
||||
|
||||
try:
|
||||
# Libri.hu search URL format
|
||||
search_url = f"{self.SEARCH_URL}?kereses={requests.utils.quote(query)}"
|
||||
# Libri.hu detailed search URL - search by title
|
||||
search_url = f"{self.SEARCH_URL}{requests.utils.quote(query)}"
|
||||
log.info(f"Libri.hu searching: {search_url}")
|
||||
|
||||
response = self.session.get(search_url, timeout=15)
|
||||
@@ -527,14 +527,17 @@ data:
|
||||
"""Parse search results page"""
|
||||
book_data = []
|
||||
|
||||
# Try multiple possible XPath selectors for Libri's search results
|
||||
book_links = root.xpath('//*[@id="book-list-result-items"]//h4[@class="book"]/a/@href')
|
||||
# Libri.hu book URLs end with .html and have author.title pattern
|
||||
# e.g., /konyv/orvos-toth_noemi.Orokolt-sors-514.html
|
||||
all_links = root.xpath("//a[contains(@href, '/konyv/') and contains(@href, '.html')]/@href")
|
||||
|
||||
if not book_links:
|
||||
# Alternative selector
|
||||
book_links = root.xpath('//a[contains(@href, "/konyv/")]/@href')
|
||||
# Filter to unique book URLs
|
||||
book_links = list(set([l for l in book_links if '/konyv/' in l and '.html' in l]))
|
||||
# Deduplicate and filter
|
||||
seen = set()
|
||||
book_links = []
|
||||
for href in all_links:
|
||||
if href not in seen and '.html' in href:
|
||||
seen.add(href)
|
||||
book_links.append(href)
|
||||
|
||||
for href in book_links[:10]: # Limit to 10 results
|
||||
if not href.startswith('http'):
|
||||
@@ -542,9 +545,20 @@ data:
|
||||
else:
|
||||
url = href
|
||||
|
||||
# Can't calculate preliminary relevance without title info from search page
|
||||
# so use index-based scoring
|
||||
book_data.append((url, len(book_data) * 10))
|
||||
# Extract title from URL for preliminary relevance
|
||||
# URL format: /konyv/author_name.Book-Title-123.html
|
||||
url_title = ""
|
||||
if '.' in href:
|
||||
parts = href.split('.')
|
||||
if len(parts) >= 2:
|
||||
# Get the title part (between first . and .html)
|
||||
url_title = parts[1].replace('-', ' ').replace('_', ' ')
|
||||
|
||||
relevance = calculate_relevance(query_title, query_author, url_title, [])
|
||||
book_data.append((url, relevance))
|
||||
|
||||
# Sort by relevance
|
||||
book_data.sort(key=lambda x: x[1])
|
||||
|
||||
log.info(f"Libri.hu found {len(book_data)} results")
|
||||
return book_data
|
||||
@@ -648,10 +662,8 @@ data:
|
||||
|
||||
def _parse_libri_id(self, url: str) -> Optional[str]:
|
||||
try:
|
||||
m = re.search(r'/konyv/(.*)\.html', url)
|
||||
if m:
|
||||
return m.group(1)
|
||||
m = re.search(r'/konyv/([^/]+)', url)
|
||||
# URL format: /konyv/author_name.Book-Title-123.html
|
||||
m = re.search(r'/konyv/(.+)\.html', url)
|
||||
if m:
|
||||
return m.group(1)
|
||||
except:
|
||||
@@ -785,9 +797,7 @@ spec:
|
||||
app.kubernetes.io/instance: calibre
|
||||
app.kubernetes.io/name: calibre-web-automated
|
||||
annotations:
|
||||
# Version checker pattern - CWA uses semantic versioning
|
||||
match-regex.version-checker.io/calibre-web-automated: '^V?[0-9]+\.[0-9]+\.[0-9]+$'
|
||||
# Force rollout when ConfigMap changes
|
||||
configmap.reloader.stakater.com/reload: "calibre-custom-metadata-providers"
|
||||
spec:
|
||||
containers:
|
||||
@@ -848,7 +858,6 @@ spec:
|
||||
mountPath: /cwa-book-ingest
|
||||
- name: library
|
||||
mountPath: /calibre-library
|
||||
# Hungarian metadata providers
|
||||
- name: custom-metadata-providers
|
||||
mountPath: /app/calibre-web-automated/cps/metadata_provider/moly_hu.py
|
||||
subPath: moly_hu.py
|
||||
@@ -873,7 +882,6 @@ spec:
|
||||
configMap:
|
||||
name: calibre-custom-metadata-providers
|
||||
---
|
||||
# Calibre-Web-Automated Service
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
@@ -893,7 +901,6 @@ spec:
|
||||
app.kubernetes.io/instance: calibre
|
||||
app.kubernetes.io/name: calibre-web-automated
|
||||
---
|
||||
# Main Ingress (books.dooplex.hu)
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
@@ -947,7 +954,6 @@ spec:
|
||||
port:
|
||||
number: 8083
|
||||
---
|
||||
# Config PVC
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
|
||||
Reference in New Issue
Block a user