From 5aaacbb753302b68e1a0b77139c0ff6e9944888a Mon Sep 17 00:00:00 2001 From: kisfenyo Date: Sun, 25 Jan 2026 20:59:41 +0100 Subject: [PATCH] added libri too --- calibre-system/cwa.yaml | 665 +++++++++++++++++++++++++++++++--------- 1 file changed, 518 insertions(+), 147 deletions(-) diff --git a/calibre-system/cwa.yaml b/calibre-system/cwa.yaml index d6ac1db..a3b05be 100644 --- a/calibre-system/cwa.yaml +++ b/calibre-system/cwa.yaml @@ -8,7 +8,7 @@ metadata: name: calibre-system --- # Custom Metadata Providers ConfigMap -# Contains moly.hu provider for Hungarian book metadata +# Contains Hungarian metadata providers: moly.hu and libri.hu apiVersion: v1 kind: ConfigMap metadata: @@ -24,20 +24,78 @@ data: # Based on Calibre plugin by Hokutya # Adapted for CWA # SPDX-License-Identifier: GPL-3.0-or-later - + import concurrent.futures import re import requests from lxml.html import fromstring - from typing import List, Optional - from datetime import datetime - + from typing import List, Optional, Tuple + from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata import cps.logger as logger - + log = logger.create() - - + + + def strip_accents(s: str) -> str: + """Remove accents from Hungarian text for comparison""" + if not s: + return "" + symbols = "öÖüÜóÓőŐúÚéÉáÁűŰíÍ" + replacements = "oOuUoOoOuUeEaAuUiI" + trans = str.maketrans(symbols, replacements) + return s.translate(trans).lower() + + + def normalize_title(title: str) -> str: + """Normalize title for comparison""" + if not title: + return "" + title = re.sub(r'\([^)]*\)', '', title) + title = re.sub(r'\[[^\]]*\]', '', title) + title = re.sub(r'[^\w\s]', ' ', title) + title = re.sub(r'\s+', ' ', title).strip() + return strip_accents(title) + + + def calculate_relevance(query_title: str, query_author: str, + result_title: str, result_authors: List[str]) -> int: + """Calculate relevance score (lower is better, 0 is exact match)""" + score = 500 + + norm_query_title = normalize_title(query_title) + norm_result_title = normalize_title(result_title) + + if norm_query_title == norm_result_title: + score -= 300 + elif norm_query_title in norm_result_title or norm_result_title in norm_query_title: + score -= 200 + elif any(word in norm_result_title for word in norm_query_title.split() if len(word) > 2): + score -= 100 + else: + score += 200 + + if query_author and result_authors: + norm_query_author = strip_accents(query_author) + result_authors_norm = [strip_accents(a) for a in result_authors] + + query_parts = norm_query_author.split() + reversed_author = f"{query_parts[-1]} {' '.join(query_parts[:-1])}" if len(query_parts) >= 2 else norm_query_author + + for author_norm in result_authors_norm: + if norm_query_author == author_norm or reversed_author == author_norm: + score -= 200 + break + elif norm_query_author in author_norm or author_norm in norm_query_author: + score -= 100 + break + elif any(part in author_norm for part in query_parts if len(part) > 2): + score -= 50 + break + + return max(0, score) + + class Moly_hu(Metadata): __name__ = "Moly.hu" __id__ = "moly_hu" @@ -50,43 +108,39 @@ data: 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'hu-HU,hu;q=0.9,en;q=0.8', - 'Accept-Encoding': 'gzip, deflate, br', } session = requests.Session() session.headers.update(headers) - + def search( self, query: str, generic_cover: str = "", locale: str = "hu" ) -> Optional[List[MetaRecord]]: - """Search moly.hu for books matching the query""" - if not self.active: return [] val = [] + query_author = "" + query_title = query.strip() try: - # Search for books search_url = self.SEARCH_URL + requests.utils.quote(query) log.info(f"Moly.hu searching: {search_url}") response = self.session.get(search_url, timeout=15) response.raise_for_status() - # Parse search results root = fromstring(response.text) - book_links = self._parse_search_results(root, query) + book_data = self._parse_search_results(root, query_title, query_author) - if not book_links: + if not book_data: log.info(f"Moly.hu: No results found for '{query}'") return [] - # Fetch details for each book (max 5) with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: futures = { - executor.submit(self._get_book_details, link, idx): idx - for idx, link in enumerate(book_links[:5]) + executor.submit(self._get_book_details, url, idx, query_title, query_author): idx + for idx, (url, _) in enumerate(book_data[:5]) } for future in concurrent.futures.as_completed(futures, timeout=20): @@ -107,35 +161,42 @@ data: log.error_or_exception(f"Moly.hu search error: {e}") return [] - # Sort by relevance (order from search results) - val.sort(key=lambda x: x.source.id if hasattr(x, 'source') else 0) + val.sort(key=lambda x: getattr(x, '_relevance_score', 500)) return val - - def _parse_search_results(self, root, query: str) -> List[str]: - """Extract book URLs from search results page""" - results = root.xpath('//a[@class="book_selector"]/@href') - book_urls = [] + + def _parse_search_results(self, root, query_title: str, query_author: str) -> List[Tuple[str, int]]: + results = root.xpath('//a[@class="book_selector"]') + book_data = [] - for href in results: - if href and href not in book_urls: - book_urls.append(self.BASE_URL + href) + for result in results: + href = result.get('href') + if not href: + continue + + text = result.text_content().strip() if result.text_content() else "" + result_author = "" + result_title = text + if ':' in text: + parts = text.split(':', 1) + result_author = parts[0].strip() + result_title = parts[1].strip() + + relevance = calculate_relevance(query_title, query_author, result_title, [result_author]) + url = self.BASE_URL + href + book_data.append((url, relevance)) - log.info(f"Moly.hu found {len(book_urls)} results") - return book_urls - - def _get_book_details(self, url: str, index: int) -> Optional[MetaRecord]: - """Fetch and parse book details from a moly.hu book page""" + book_data.sort(key=lambda x: x[1]) + log.info(f"Moly.hu found {len(book_data)} results") + return book_data + + def _get_book_details(self, url: str, index: int, query_title: str, query_author: str) -> Optional[MetaRecord]: try: response = self.session.get(url, timeout=15) response.raise_for_status() - # Clean up HTML - raw = response.text - raw = raw.replace('', '').replace('', '') - + raw = response.text.replace('', '').replace('', '') root = fromstring(raw) - # Parse all fields title = self._parse_title(root) authors = self._parse_authors(root) @@ -157,7 +218,8 @@ data: identifiers={"moly_hu": moly_id}, ) - # Optional fields + match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + match.description = self._parse_description(root) match.cover = self._parse_cover(root) match.publisher = self._parse_publisher(root) @@ -165,7 +227,6 @@ data: match.rating = self._parse_rating(root) match.tags = self._parse_tags(root) - # Series info series_info = self._parse_series(root) if series_info: match.series = series_info[0] @@ -174,7 +235,6 @@ data: except (ValueError, IndexError): match.series_index = 1 - # ISBN isbn = self._parse_isbn(root) if isbn: match.identifiers["isbn"] = isbn @@ -184,9 +244,8 @@ data: except Exception as e: log.warning(f"Moly.hu error fetching {url}: {e}") return None - + def _parse_moly_id(self, url: str) -> Optional[str]: - """Extract moly.hu book ID from URL""" try: m = re.search(r'/konyvek/(.*)', url) if m: @@ -194,45 +253,36 @@ data: except: pass return None - + def _parse_title(self, root) -> Optional[str]: - """Parse book title""" title_node = root.xpath('//*[@id="content"]//*[@class="fn"]/text()') if not title_node: title_node = root.xpath('//*[@id="content"]//*[@class="item"]/text()') if title_node: return title_node[0].strip().replace('\u200b', '') return None - + def _parse_authors(self, root) -> List[str]: - """Parse author names""" author_nodes = root.xpath('//*[@id="content"]//div[@class="authors"]/a/text()') if author_nodes: return [str(author).strip() for author in author_nodes] return [] - + def _parse_description(self, root) -> Optional[str]: - """Parse book description/comments""" - description_node = root.xpath( - '//*[@id="content"]//*[@class="text" and @id="full_description"]/p/text()' - ) + description_node = root.xpath('//*[@id="content"]//*[@class="text" and @id="full_description"]/p/text()') if not description_node: description_node = root.xpath('//*[@id="content"]//*[@class="text"]/p/text()') if not description_node: - description_node = root.xpath( - '//*[@id="content"]//*[@class="text shrinkable"]/p/text()' - ) + description_node = root.xpath('//*[@id="content"]//*[@class="text shrinkable"]/p/text()') if description_node: - # Clean up description desc = '\n'.join(description_node) desc = desc.replace('\n\n', '\n').replace('\n \n', '\n') desc = desc.replace('Vigyázat! Cselekményleírást tartalmaz.\n', '') return desc.strip() return None - + def _parse_cover(self, root) -> Optional[str]: - """Parse cover image URL""" cover_nodes = root.xpath('(//*[@class="coverbox"]//a/@href)[1]') if cover_nodes: cover_url = cover_nodes[0] @@ -240,7 +290,6 @@ data: cover_url = self.BASE_URL + cover_url return cover_url - # Fallback: try img src directly img_nodes = root.xpath('//*[@class="coverbox"]//img/@src') if img_nodes: img_url = img_nodes[0] @@ -248,32 +297,22 @@ data: img_url = self.BASE_URL + img_url return img_url return None - + def _parse_publisher(self, root) -> Optional[str]: - """Parse publisher name""" - publisher_node_1 = root.xpath( - '//*[@id="content"]//*[@class="items"]/div/div[1]/a/text()' - ) + publisher_node_1 = root.xpath('//*[@id="content"]//*[@class="items"]/div/div[1]/a/text()') if publisher_node_1 and publisher_node_1[0] == '+': - publisher_node = root.xpath( - '//*[@id="content"]//*[@class="items"]/div/div[2]/a/text()' - ) + publisher_node = root.xpath('//*[@id="content"]//*[@class="items"]/div/div[2]/a/text()') else: publisher_node = publisher_node_1 if publisher_node: return publisher_node[0].strip() return None - + def _parse_published_date(self, root) -> Optional[str]: - """Parse publication date (year)""" - publication_node_1 = root.xpath( - '//*[@id="content"]//*[@class="items"]/div/div[1]/text()' - ) + publication_node_1 = root.xpath('//*[@id="content"]//*[@class="items"]/div/div[1]/text()') if not publication_node_1: - publication_node = root.xpath( - '//*[@id="content"]//*[@class="items"]/div/div[2]/text()' - ) + publication_node = root.xpath('//*[@id="content"]//*[@class="items"]/div/div[2]/text()') else: publication_node = publication_node_1 @@ -282,35 +321,27 @@ data: if m: return m.group(1) return None - + def _parse_rating(self, root) -> int: - """Parse rating (converted to 0-5 scale)""" - rating_node = root.xpath( - '//*[@id="content"]//*[@class="rating"]//*[@class="like_count"]/text()' - ) + rating_node = root.xpath('//*[@id="content"]//*[@class="rating"]//*[@class="like_count"]/text()') if rating_node: try: - # Moly.hu uses percentage, convert to 0-5 scale percentage = float(rating_node[0].strip('%').strip()) return round(percentage * 0.05) except (ValueError, IndexError): pass return 0 - + def _parse_tags(self, root) -> List[str]: - """Parse tags/genres""" - # Genre tags (in brackets) tags_genre = root.xpath('//*[@id="book_tags"]//*[@class="tag genre"]/text()') tags_genre = [f"[{str(t).strip()}]" for t in tags_genre if str(t).strip()] - # Regular tags tags_regular = root.xpath('//*[@id="book_tags"]//*[@class="tag"]/text()') tags_regular = [str(t).strip() for t in tags_regular if str(t).strip()] return tags_genre + tags_regular - + def _parse_series(self, root) -> Optional[List[str]]: - """Parse series name and index""" series_node = root.xpath('//*[@id="content"]//*[@class="action"]/text()') if not series_node: @@ -319,7 +350,6 @@ data: series_text = series_node[0].strip('().') parts = series_text.rsplit(' ', 1) - # Check if it's actually edition info, not series if len(parts) > 1 and parts[1] == 'kiadás': return None @@ -329,28 +359,408 @@ data: return [parts[0], "1"] return None - + def _parse_isbn(self, root) -> Optional[str]: - """Parse ISBN""" - # Try first location - isbn_nodes = root.xpath( - '//*[@id="content"]//*[@class="items"]/div/div[2]/text()' - ) + isbn_nodes = root.xpath('//*[@id="content"]//*[@class="items"]/div/div[2]/text()') for value in isbn_nodes: m = re.search(r'(\d{13}|\d{10})', value) if m: return m.group(1) - # Try second location - isbn_nodes = root.xpath( - '//*[@id="content"]//*[@class="items"]/div/div[3]/text()' - ) + isbn_nodes = root.xpath('//*[@id="content"]//*[@class="items"]/div/div[3]/text()') for value in isbn_nodes: m = re.search(r'(\d{13}|\d{10})', value) if m: return m.group(1) return None + + libri_hu.py: | + # -*- coding: utf-8 -*- + # Calibre-Web Automated - Libri.hu Metadata Provider + # Based on Calibre plugin by Hoffer Csaba, Kloon & Hokutya + # Adapted for CWA + # SPDX-License-Identifier: GPL-3.0-or-later + + import concurrent.futures + import re + import requests + from lxml.html import fromstring, tostring + from lxml import html as lh + from typing import List, Optional, Tuple, Dict + + from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata + import cps.logger as logger + + log = logger.create() + + + def strip_accents(s: str) -> str: + """Remove accents from Hungarian text for comparison""" + if not s: + return "" + symbols = "öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃśŚźŹżŻ" + replacements = "oOuUoOoOuUeEaAuUiIaAcCeElLnNsSzZzZ" + trans = str.maketrans(symbols, replacements) + return s.translate(trans).lower() + + + def normalize_title(title: str) -> str: + """Normalize title for comparison""" + if not title: + return "" + title = re.sub(r'\([^)]*\)', '', title) + title = re.sub(r'\[[^\]]*\]', '', title) + title = re.sub(r'[^\w\s]', ' ', title) + title = re.sub(r'\s+', ' ', title).strip() + return strip_accents(title) + + + def calculate_relevance(query_title: str, query_author: str, + result_title: str, result_authors: List[str]) -> int: + """Calculate relevance score (lower is better, 0 is exact match)""" + score = 500 + + norm_query_title = normalize_title(query_title) + norm_result_title = normalize_title(result_title) + + if norm_query_title == norm_result_title: + score -= 300 + elif norm_query_title in norm_result_title or norm_result_title in norm_query_title: + score -= 200 + elif any(word in norm_result_title for word in norm_query_title.split() if len(word) > 2): + score -= 100 + else: + score += 200 + + if query_author and result_authors: + norm_query_author = strip_accents(query_author) + result_authors_norm = [strip_accents(a) for a in result_authors] + + query_parts = norm_query_author.split() + reversed_author = f"{query_parts[-1]} {' '.join(query_parts[:-1])}" if len(query_parts) >= 2 else norm_query_author + + for author_norm in result_authors_norm: + if norm_query_author == author_norm or reversed_author == author_norm: + score -= 200 + break + elif norm_query_author in author_norm or author_norm in norm_query_author: + score -= 100 + break + elif any(part in author_norm for part in query_parts if len(part) > 2): + score -= 50 + break + + return max(0, score) + + + class Libri_hu(Metadata): + __name__ = "Libri.hu" + __id__ = "libri_hu" + + BASE_URL = "https://www.libri.hu" + BOOK_URL = BASE_URL + "/konyv" + SEARCH_URL = BASE_URL + "/talalati-lista" + + headers = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'hu-HU,hu;q=0.9,en;q=0.8', + } + + session = requests.Session() + session.headers.update(headers) + + def search( + self, query: str, generic_cover: str = "", locale: str = "hu" + ) -> Optional[List[MetaRecord]]: + if not self.active: + return [] + + val = [] + query_author = "" + query_title = query.strip() + + try: + # Libri.hu search URL format + search_url = f"{self.SEARCH_URL}?kereses={requests.utils.quote(query)}" + log.info(f"Libri.hu searching: {search_url}") + + response = self.session.get(search_url, timeout=15) + response.raise_for_status() + + root = fromstring(response.text) + book_data = self._parse_search_results(root, query_title, query_author) + + if not book_data: + log.info(f"Libri.hu: No results found for '{query}'") + return [] + + with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: + futures = { + executor.submit(self._get_book_details, url, idx, query_title, query_author): idx + for idx, (url, _) in enumerate(book_data[:5]) + } + + for future in concurrent.futures.as_completed(futures, timeout=20): + try: + result = future.result() + if result: + val.append(result) + except Exception as e: + log.warning(f"Libri.hu worker error: {e}") + + except requests.exceptions.Timeout: + log.warning("Libri.hu search timed out") + return [] + except requests.exceptions.HTTPError as e: + log.error(f"Libri.hu HTTP error: {e}") + return [] + except Exception as e: + log.error_or_exception(f"Libri.hu search error: {e}") + return [] + + val.sort(key=lambda x: getattr(x, '_relevance_score', 500)) + return val + + def _parse_search_results(self, root, query_title: str, query_author: str) -> List[Tuple[str, int]]: + """Parse search results page""" + book_data = [] + + # Try multiple possible XPath selectors for Libri's search results + book_links = root.xpath('//*[@id="book-list-result-items"]//h4[@class="book"]/a/@href') + + if not book_links: + # Alternative selector + book_links = root.xpath('//a[contains(@href, "/konyv/")]/@href') + # Filter to unique book URLs + book_links = list(set([l for l in book_links if '/konyv/' in l and '.html' in l])) + + for href in book_links[:10]: # Limit to 10 results + if not href.startswith('http'): + url = self.BASE_URL + href + else: + url = href + + # Can't calculate preliminary relevance without title info from search page + # so use index-based scoring + book_data.append((url, len(book_data) * 10)) + + log.info(f"Libri.hu found {len(book_data)} results") + return book_data + + def _get_book_details(self, url: str, index: int, query_title: str, query_author: str) -> Optional[MetaRecord]: + try: + response = self.session.get(url, timeout=15) + response.raise_for_status() + + # Parse with lxml + root = lh.document_fromstring(response.content.decode('utf-8', errors='replace')) + + # Parse book properties table + book_props = self._parse_book_properties(root) + + title = self._parse_title(root) + authors = self._parse_authors(root) + + if not title: + return None + + libri_id = self._parse_libri_id(url) + + match = MetaRecord( + id=libri_id, + title=title, + authors=authors if authors else [""], + source=MetaSourceInfo( + id=self.__id__, + description="Libri.hu - Könyvesbolt", + link=self.BASE_URL + ), + url=url, + identifiers={"libri_hu": libri_id}, + ) + + match._relevance_score = calculate_relevance(query_title, query_author, title, authors) + + # ISBN + isbn = book_props.get('ISBN', '').strip() + if isbn: + match.identifiers["isbn"] = isbn + + # Publisher + publisher = book_props.get('Kiadó', '').strip() + if publisher: + match.publisher = publisher + + # Publication date + pub_year = book_props.get('Kiadás éve', '').strip() + if pub_year: + match.publishedDate = pub_year + + # Series + series = book_props.get('Sorozat', '').strip() + if series: + match.series = series + + # Language + lang = book_props.get('Nyelv', '').strip().lower() + if lang: + match.languages = [self._translate_language(lang)] + + # Description + match.description = self._parse_description(root) + + # Cover + match.cover = self._parse_cover(root) + + # Rating + match.rating = self._parse_rating(root) + + # Tags from breadcrumbs + match.tags = self._parse_tags(root) + + return match + + except Exception as e: + log.warning(f"Libri.hu error fetching {url}: {e}") + return None + + def _parse_book_properties(self, root) -> Dict[str, str]: + """Parse the book properties table""" + book_properties = {} + + # Try to find the properties table + tables = root.xpath('//*[@id="productPageMainItem"]//table') + if not tables: + tables = root.xpath('//table[contains(@class, "product")]') + + for table in tables: + for row in table.findall('.//tr'): + cells = row.findall('.//th') + row.findall('.//td') + if len(cells) >= 2: + key = cells[0].text_content().strip().rstrip(':') + value = cells[1].text_content().strip() + if key and value: + book_properties[key] = value + + return book_properties + + def _parse_libri_id(self, url: str) -> Optional[str]: + try: + m = re.search(r'/konyv/(.*)\.html', url) + if m: + return m.group(1) + m = re.search(r'/konyv/([^/]+)', url) + if m: + return m.group(1) + except: + pass + return None + + def _parse_title(self, root) -> Optional[str]: + # Try multiple selectors + selectors = [ + '//*[@id="productPageMainItem"]//*[@class="h2 mb-2"]/text()', + '//*[@id="productPageMainItem"]//h1/text()', + '//h1[@class="book-title"]/text()', + '//meta[@property="og:title"]/@content', + ] + + for selector in selectors: + nodes = root.xpath(selector) + if nodes: + title = nodes[0].strip() + if title: + # Check for subtitle + subtitle_nodes = root.xpath('//*[@id="productPageMainItem"]//*[@class="subtitle"]/text()') + if subtitle_nodes: + title = f"{title} – {subtitle_nodes[0].strip()}" + return title + return None + + def _parse_authors(self, root) -> List[str]: + selectors = [ + '//*[@id="productPageMainItem"]/div/div/div[2]/p[1]/a/text()', + '//*[@id="productPageMainItem"]//a[contains(@href, "/szerzo/")]/text()', + '//a[@class="author"]/text()', + ] + + for selector in selectors: + nodes = root.xpath(selector) + if nodes: + authors = [str(a).strip().replace('-', '') for a in nodes if str(a).strip()] + if authors: + return authors + return [] + + def _parse_description(self, root) -> Optional[str]: + selectors = [ + '//*[@id="product-description"]', + '//*[@class="description"]', + '//*[@itemprop="description"]', + ] + + for selector in selectors: + nodes = root.xpath(selector) + if nodes: + text = nodes[0].text_content().strip() + if text: + return text + return None + + def _parse_cover(self, root) -> Optional[str]: + selectors = [ + '//*[@property="og:image"]/@content', + '//*[@class="cover"]//img/@src', + '//*[@id="productPageMainItem"]//img/@src', + ] + + for selector in selectors: + nodes = root.xpath(selector) + if nodes: + url = nodes[0].strip() + if url: + if not url.startswith('http'): + url = self.BASE_URL + url + return url + return None + + def _parse_rating(self, root) -> int: + nodes = root.xpath('//*[@id="productPageMainItem"]//*[@itemprop="ratingValue"]/@content') + if nodes: + try: + rating = float(nodes[0].strip()) + return round(rating) + except: + pass + return 0 + + def _parse_tags(self, root) -> List[str]: + nodes = root.xpath('//*[@id="navigationBar"]//text()') + if nodes: + tags = [tag.strip().lower() for tag in nodes if tag.strip()] + # Filter out navigation elements + tags = [t for t in tags if t and t not in ['>', '/', 'főoldal', 'home']] + return tags + return [] + + def _translate_language(self, lang: str) -> str: + lang_map = { + 'magyar': 'hu', + 'angol': 'en', + 'amerikai': 'en', + 'német': 'de', + 'francia': 'fr', + 'olasz': 'it', + 'spanyol': 'es', + 'orosz': 'ru', + 'török': 'tr', + 'görög': 'el', + 'kínai': 'zh', + 'japán': 'ja', + } + return lang_map.get(lang.lower(), 'hu') --- # Calibre-Web-Automated Deployment apiVersion: apps/v1 @@ -377,7 +787,7 @@ spec: annotations: # Version checker pattern - CWA uses semantic versioning match-regex.version-checker.io/calibre-web-automated: '^V?[0-9]+\.[0-9]+\.[0-9]+$' - # Force rollout when ConfigMap changes (update this hash when modifying providers) + # Force rollout when ConfigMap changes configmap.reloader.stakater.com/reload: "calibre-custom-metadata-providers" spec: containers: @@ -391,13 +801,10 @@ spec: value: "1000" - name: TZ value: Europe/Budapest - # Use default port 8083 - name: CWA_PORT_OVERRIDE value: "8083" - # Disable WAL mode if on network share (set to true if using NFS) - name: NETWORK_SHARE_MODE value: "false" - # Number of proxies in chain (Cloudflare -> nginx-ingress -> app) - name: TRUSTED_PROXY_COUNT value: "2" ports: @@ -433,38 +840,35 @@ spec: port: http periodSeconds: 10 timeoutSeconds: 5 - # CWA can take time to initialize, especially first run failureThreshold: 60 volumeMounts: - # Config directory for app database, logs, processed books backup - name: config mountPath: /config - # Book ingest folder - files here are DELETED after processing - name: ingest mountPath: /cwa-book-ingest - # Calibre library - your existing library location - name: library mountPath: /calibre-library - # Custom metadata providers (moly.hu) + # Hungarian metadata providers - name: custom-metadata-providers mountPath: /app/calibre-web-automated/cps/metadata_provider/moly_hu.py subPath: moly_hu.py readOnly: true + - name: custom-metadata-providers + mountPath: /app/calibre-web-automated/cps/metadata_provider/libri_hu.py + subPath: libri_hu.py + readOnly: true volumes: - name: config persistentVolumeClaim: claimName: calibre-web-automated-config - # Ingest folder on hostPath for easy file dropping - name: ingest hostPath: path: /mnt/4_hdd/data/calibre-ingest type: DirectoryOrCreate - # Your existing Calibre library location - name: library hostPath: path: /mnt/4_hdd/data/calibre type: DirectoryOrCreate - # Custom metadata providers from ConfigMap - name: custom-metadata-providers configMap: name: calibre-custom-metadata-providers @@ -489,7 +893,7 @@ spec: app.kubernetes.io/instance: calibre app.kubernetes.io/name: calibre-web-automated --- -# Main Ingress (books.dooplex.hu - primary reading interface) +# Main Ingress (books.dooplex.hu) apiVersion: networking.k8s.io/v1 kind: Ingress metadata: @@ -505,7 +909,6 @@ metadata: nginx.ingress.kubernetes.io/proxy-read-timeout: "600" nginx.ingress.kubernetes.io/proxy-send-timeout: "600" nginx.ingress.kubernetes.io/ssl-redirect: "true" - # Forward auth headers for Authentik integration nginx.ingress.kubernetes.io/auth-response-headers: Set-Cookie,X-authentik-username,X-authentik-groups,X-authentik-email,X-authentik-name,X-authentik-uid nginx.ingress.kubernetes.io/auth-snippet: proxy_set_header X-Forwarded-Host $http_host; nginx.ingress.kubernetes.io/configuration-snippet: | @@ -544,7 +947,7 @@ spec: port: number: 8083 --- -# Config PVC - stores app.db, logs, processed_books backup +# Config PVC apiVersion: v1 kind: PersistentVolumeClaim metadata: @@ -561,36 +964,4 @@ spec: storageClassName: longhorn resources: requests: - # Larger than typical - stores backup of processed books by default - storage: 10Gi ---- -# Optional: Authentik integration for SSO -# Uncomment and configure if using Authentik proxy authentication -# apiVersion: networking.k8s.io/v1 -# kind: Ingress -# metadata: -# name: calibre-web-automated-auth -# namespace: calibre-system -# annotations: -# cert-manager.io/cluster-issuer: letsencrypt-prod -# nginx.ingress.kubernetes.io/auth-url: http://authentik-outpost-proxy.authentik-system.svc.cluster.local:9000/outpost.goauthentik.io/auth/nginx -# nginx.ingress.kubernetes.io/auth-signin: https://auth.dooplex.hu/outpost.goauthentik.io/start?rd=$escaped_request_uri -# nginx.ingress.kubernetes.io/auth-response-headers: Set-Cookie,X-authentik-username,X-authentik-groups,X-authentik-email,X-authentik-name,X-authentik-uid -# nginx.ingress.kubernetes.io/auth-snippet: proxy_set_header X-Forwarded-Host $http_host; -# spec: -# ingressClassName: nginx-internal -# tls: -# - hosts: -# - books.dooplex.hu -# secretName: calibre-web-automated-tls -# rules: -# - host: books.dooplex.hu -# http: -# paths: -# - path: / -# pathType: Prefix -# backend: -# service: -# name: calibre-web-automated -# port: -# number: 8083 \ No newline at end of file + storage: 10Gi \ No newline at end of file