This commit is contained in:
2026-01-25 21:22:34 +01:00
parent f5b00011d0
commit f1fabbf503
+17 -4
View File
@@ -217,7 +217,7 @@ data:
identifiers={"moly_hu": moly_id},
)
match._relevance_score = calculate_relevance(query_title, query_author, title, authors)
match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + index
match.description = self._parse_description(root)
match.cover = self._parse_cover(root)
@@ -568,8 +568,21 @@ data:
response = self.session.get(url, timeout=15)
response.raise_for_status()
# Parse with lxml
root = lh.document_fromstring(response.content.decode('utf-8', errors='replace'))
# Libri.hu uses ISO-8859-2 (Latin-2) encoding for Hungarian
# Try to detect and decode properly
content = response.content
try:
# First try UTF-8
text = content.decode('utf-8')
except UnicodeDecodeError:
try:
# Try Latin-2 (Hungarian)
text = content.decode('iso-8859-2')
except UnicodeDecodeError:
# Fallback to Latin-1 with replacement
text = content.decode('latin-1', errors='replace')
root = lh.document_fromstring(text)
# Parse book properties table
book_props = self._parse_book_properties(root)
@@ -595,7 +608,7 @@ data:
identifiers={"libri_hu": libri_id},
)
match._relevance_score = calculate_relevance(query_title, query_author, title, authors)
match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + index
# ISBN
isbn = book_props.get('ISBN', '').strip()