This commit is contained in:
2026-01-25 21:22:34 +01:00
parent f5b00011d0
commit f1fabbf503
+17 -4
View File
@@ -217,7 +217,7 @@ data:
identifiers={"moly_hu": moly_id}, identifiers={"moly_hu": moly_id},
) )
match._relevance_score = calculate_relevance(query_title, query_author, title, authors) match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + index
match.description = self._parse_description(root) match.description = self._parse_description(root)
match.cover = self._parse_cover(root) match.cover = self._parse_cover(root)
@@ -568,8 +568,21 @@ data:
response = self.session.get(url, timeout=15) response = self.session.get(url, timeout=15)
response.raise_for_status() response.raise_for_status()
# Parse with lxml # Libri.hu uses ISO-8859-2 (Latin-2) encoding for Hungarian
root = lh.document_fromstring(response.content.decode('utf-8', errors='replace')) # Try to detect and decode properly
content = response.content
try:
# First try UTF-8
text = content.decode('utf-8')
except UnicodeDecodeError:
try:
# Try Latin-2 (Hungarian)
text = content.decode('iso-8859-2')
except UnicodeDecodeError:
# Fallback to Latin-1 with replacement
text = content.decode('latin-1', errors='replace')
root = lh.document_fromstring(text)
# Parse book properties table # Parse book properties table
book_props = self._parse_book_properties(root) book_props = self._parse_book_properties(root)
@@ -595,7 +608,7 @@ data:
identifiers={"libri_hu": libri_id}, identifiers={"libri_hu": libri_id},
) )
match._relevance_score = calculate_relevance(query_title, query_author, title, authors) match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + index
# ISBN # ISBN
isbn = book_props.get('ISBN', '').strip() isbn = book_props.get('ISBN', '').strip()