next
This commit is contained in:
+17
-4
@@ -217,7 +217,7 @@ data:
|
|||||||
identifiers={"moly_hu": moly_id},
|
identifiers={"moly_hu": moly_id},
|
||||||
)
|
)
|
||||||
|
|
||||||
match._relevance_score = calculate_relevance(query_title, query_author, title, authors)
|
match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + index
|
||||||
|
|
||||||
match.description = self._parse_description(root)
|
match.description = self._parse_description(root)
|
||||||
match.cover = self._parse_cover(root)
|
match.cover = self._parse_cover(root)
|
||||||
@@ -568,8 +568,21 @@ data:
|
|||||||
response = self.session.get(url, timeout=15)
|
response = self.session.get(url, timeout=15)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
# Parse with lxml
|
# Libri.hu uses ISO-8859-2 (Latin-2) encoding for Hungarian
|
||||||
root = lh.document_fromstring(response.content.decode('utf-8', errors='replace'))
|
# Try to detect and decode properly
|
||||||
|
content = response.content
|
||||||
|
try:
|
||||||
|
# First try UTF-8
|
||||||
|
text = content.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
try:
|
||||||
|
# Try Latin-2 (Hungarian)
|
||||||
|
text = content.decode('iso-8859-2')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# Fallback to Latin-1 with replacement
|
||||||
|
text = content.decode('latin-1', errors='replace')
|
||||||
|
|
||||||
|
root = lh.document_fromstring(text)
|
||||||
|
|
||||||
# Parse book properties table
|
# Parse book properties table
|
||||||
book_props = self._parse_book_properties(root)
|
book_props = self._parse_book_properties(root)
|
||||||
@@ -595,7 +608,7 @@ data:
|
|||||||
identifiers={"libri_hu": libri_id},
|
identifiers={"libri_hu": libri_id},
|
||||||
)
|
)
|
||||||
|
|
||||||
match._relevance_score = calculate_relevance(query_title, query_author, title, authors)
|
match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + index
|
||||||
|
|
||||||
# ISBN
|
# ISBN
|
||||||
isbn = book_props.get('ISBN', '').strip()
|
isbn = book_props.get('ISBN', '').strip()
|
||||||
|
|||||||
Reference in New Issue
Block a user