diff --git a/calibre-system/cwa.yaml b/calibre-system/cwa.yaml index 77a37f6..f776830 100644 --- a/calibre-system/cwa.yaml +++ b/calibre-system/cwa.yaml @@ -217,7 +217,7 @@ data: identifiers={"moly_hu": moly_id}, ) - match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + index match.description = self._parse_description(root) match.cover = self._parse_cover(root) @@ -568,8 +568,21 @@ data: response = self.session.get(url, timeout=15) response.raise_for_status() - # Parse with lxml - root = lh.document_fromstring(response.content.decode('utf-8', errors='replace')) + # Libri.hu uses ISO-8859-2 (Latin-2) encoding for Hungarian + # Try to detect and decode properly + content = response.content + try: + # First try UTF-8 + text = content.decode('utf-8') + except UnicodeDecodeError: + try: + # Try Latin-2 (Hungarian) + text = content.decode('iso-8859-2') + except UnicodeDecodeError: + # Fallback to Latin-1 with replacement + text = content.decode('latin-1', errors='replace') + + root = lh.document_fromstring(text) # Parse book properties table book_props = self._parse_book_properties(root) @@ -595,7 +608,7 @@ data: identifiers={"libri_hu": libri_id}, ) - match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + index # ISBN isbn = book_props.get('ISBN', '').strip()