diff --git a/src/api.py b/src/api.py index a3be558..864c804 100644 --- a/src/api.py +++ b/src/api.py @@ -682,6 +682,12 @@ async def refresh_meta(url: str, current_user: dict = Depends(get_current_user)) def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_status: str) -> tuple[int, int]: updated = failed = 0 url = manga["url"] + summary = manga.get("description") or "" + tags_raw = manga.get("tags") or "" + try: + tags_str = ", ".join(json.loads(tags_raw)) if tags_raw else "" + except Exception: + tags_str = "" for ch in chapters: for fmt_col in ("output_cbz", "output_pdf", "output_epub"): fpath = ch.get(fmt_col) @@ -699,6 +705,8 @@ def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_statu chapters_total=chapters_total, pub_status=pub_status, source_url=url, + summary=summary, + tags=tags_str, ) if patch_meta(p, meta): updated += 1 @@ -706,6 +714,43 @@ def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_statu failed += 1 return updated, failed +def _refresh_cover_sync(manga: dict, manga_dir: Path) -> None: + """Скачивает или обновляет обложку через urllib (синхронно, для asyncio.to_thread).""" + import urllib.request as _urllib_req + import re as _re + + cover_url = manga.get("cover_url") or "" + if not cover_url: + return + + # Определяем Referer по URL обложки (MangaLib CDN — cdnlibs / mangalib) + if any(pat in cover_url for pat in ("mangalib", "cdnlibs", "imglib")): + referer = "https://mangalib.me/" + else: + from urllib.parse import urlparse as _up + parsed = _up(manga.get("url") or "") + referer = f"{parsed.scheme}://{parsed.netloc}/" if parsed.netloc else "https://readmanga.ru/" + + try: + req = _urllib_req.Request(cover_url, headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/124.0.0.0", + "Referer": referer, + "Accept": "image/png,image/jpeg,image/webp,image/*,*/*", + }) + with _urllib_req.urlopen(req, timeout=30) as resp: + body = resp.read() + if len(body) < 500: + logger.warning("refresh_cover: слишком малый ответ ({} байт)", len(body)) + return + m = _re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", cover_url, _re.IGNORECASE) + ext = ("." + (m.group(1).lower() if m else "jpg")).replace(".jpeg", ".jpg") + cover_path = manga_dir / f"cover{ext}" + cover_path.write_bytes(body) + logger.info("Обложка обновлена: {} ({} байт)", cover_path.name, len(body)) + except Exception as e: + logger.warning("refresh_cover error {}: {}", cover_url, e) + + async def _do_refresh_meta(url: str): db = StateDB() try: @@ -721,6 +766,14 @@ async def _do_refresh_meta(url: str): await ws_manager.broadcast({"type": "meta_refresh_started", "url": url}) updated, failed = await asyncio.to_thread(_patch_meta_sync, manga, chapters, chapters_total, pub_status) logger.info("refresh_meta {}: обновлено {}, ошибок {}", url, updated, failed) + + # Обновляем обложку если у манги формат cbz + manga_fmt = manga.get("format", "cbz") or "cbz" + if manga_fmt in ("cbz", "all") and manga.get("cover_url"): + manga_dir = _manga_folder(manga) + if manga_dir.exists(): + await asyncio.to_thread(_refresh_cover_sync, manga, manga_dir) + await ws_manager.broadcast({"type": "meta_refreshed", "url": url, "updated": updated, "failed": failed}) except Exception as e: diff --git a/src/exporter.py b/src/exporter.py index d4614f8..f17be23 100644 --- a/src/exporter.py +++ b/src/exporter.py @@ -26,6 +26,7 @@ class MangaMeta: language: str = "ru" summary: str = "" # Описание/синопсис серии genre: str = "" # Жанры через запятую (для ComicInfo Genre) + tags: str = "" # Теги через запятую (для ComicInfo Tags) series_group: str = "" # Группа/коллекция (для ComicInfo SeriesGroup) @@ -89,6 +90,7 @@ def _make_comic_info(meta: MangaMeta) -> str: add("Count", meta.chapters_total) add("Genre", meta.genre) + add("Tags", meta.tags) add("LanguageISO", meta.language) # Manga = YesAndRightToLeft — стандартная японская манга diff --git a/src/sources/base.py b/src/sources/base.py index 3fe1bbd..ccc5bdb 100644 --- a/src/sources/base.py +++ b/src/sources/base.py @@ -37,6 +37,8 @@ class MangaInfo: title_full: str = "" description: str = "" genres: list[str] = field(default_factory=list) + tags: list[str] = field(default_factory=list) + cover_url: str = "" # ────────────────────────────────────────────── diff --git a/src/sources/mangalib.py b/src/sources/mangalib.py index c9e08a2..acffc57 100644 --- a/src/sources/mangalib.py +++ b/src/sources/mangalib.py @@ -134,6 +134,18 @@ class MangalibSource: description = await _extract_description(page) genres = await _extract_genres(page) + # Получаем обложку, описание и теги из API + async with lock: + manga_meta_for_extras = dict(manga_api_data) + + cover_url, extra_description, tags = await _fetch_extra_meta( + page, manga_meta_for_extras, url, self.auth_token + ) + if extra_description: + description = extra_description + if not description: + description = await _extract_description(page) + async with lock: raw_chapters = list(chapters_api_data) @@ -154,6 +166,8 @@ class MangalibSource: title_full=title_full, description=description, genres=genres, + tags=tags, + cover_url=cover_url, ) # ────────────────────────────────────────────── @@ -611,6 +625,85 @@ async def _extract_genres(page: Page) -> list[str]: return [] +def _parse_summary_doc(doc) -> str: + """Конвертирует ProseMirror JSON-документ в plain text.""" + if not doc or not isinstance(doc, dict): + return "" + if doc.get("type") == "text": + return doc.get("text", "") + parts = [] + for node in doc.get("content", []): + text = _parse_summary_doc(node) + if text: + parts.append(text) + return " ".join(parts) + + +async def _fetch_extra_meta( + page: Page, + manga_api_data: dict, + manga_url: str, + auth_token: str | None, +) -> tuple[str, str, list[str]]: + """ + Возвращает (cover_url, description, tags) из уже полученных данных API или, + если нужных полей нет, делает явный supplementary-запрос к API. + """ + def _extract_from_data(data: dict) -> tuple[str, str, list[str]]: + cover_url = "" + cover_obj = data.get("cover") + if isinstance(cover_obj, dict): + cover_url = cover_obj.get("default") or cover_obj.get("thumbnail") or "" + + description = "" + summary = data.get("summary") + if summary: + if isinstance(summary, dict): + description = _parse_summary_doc(summary).strip() + elif isinstance(summary, str): + description = summary.strip() + + tags: list[str] = [] + for t in data.get("tags") or []: + name = (t.get("name") or t.get("label") or "").strip() + if name: + tags.append(name) + + return cover_url, description, tags + + cover_url, description, tags = _extract_from_data(manga_api_data) + + # Если хотя бы одного поля нет — делаем явный supplementary-запрос + if not cover_url or not description or not tags: + slug = _manga_slug_from_url(manga_url) + referer = _base_url(manga_url) + "/" + api_url = ( + f"https://api.cdnlibs.org/api/manga/{slug}" + "?fields[]=summary&fields[]=tags&fields[]=cover" + ) + try: + headers: dict = {"Referer": referer, "Accept": "application/json"} + if auth_token: + headers["Authorization"] = f"Bearer {auth_token}" + resp = await page.context.request.get(api_url, headers=headers) + if resp.ok: + body = await resp.body() + data = _json.loads(body).get("data", {}) + extra_cover, extra_desc, extra_tags = _extract_from_data(data) + if not cover_url: + cover_url = extra_cover + if not description: + description = extra_desc + if not tags: + tags = extra_tags + logger.debug("Supplementary API: cover={}, desc_len={}, tags={}", + bool(cover_url), len(description), len(tags)) + except Exception as e: + logger.debug("Supplementary API error: {}", e) + + return cover_url, description, tags + + async def _detect_server(page: Page, servers_list: list[str]) -> str: """Определяет CDN-сервер из img src на странице или из constants API.""" try: diff --git a/src/sources/readmanga.py b/src/sources/readmanga.py index e745b60..a6d1255 100644 --- a/src/sources/readmanga.py +++ b/src/sources/readmanga.py @@ -47,6 +47,8 @@ class ReadmangaSource: description = await _extract_description(page) genres = await _extract_genres(page) + tags = await _extract_tags(page) + cover_url = await _get_cover_url(page) await _expand_chapters(page) chapters = await _extract_chapters(page) @@ -63,6 +65,8 @@ class ReadmangaSource: title_full=title_full, description=description, genres=genres, + tags=tags, + cover_url=cover_url, ) # ────────────────────────────────────────────── @@ -474,6 +478,18 @@ async def _extract_description(page: Page) -> str: try: result = await page.evaluate(""" () => { + // Приоритетный селектор — новый сайт ReadManga + const crDesc = document.querySelector('.cr-description__content'); + if (crDesc) { + const parts = []; + crDesc.querySelectorAll('p, span, div').forEach(el => { + const t = el.textContent.trim(); + if (t) parts.push(t); + }); + if (parts.length) return parts.join(' '); + const t = crDesc.textContent.trim(); + if (t) return t; + } const selectors = [ '.manga-description', '.elem_descr .value', '#tab-description .description-text', '.description', @@ -491,6 +507,42 @@ async def _extract_description(page: Page) -> str: return "" +async def _extract_tags(page: Page) -> list[str]: + try: + result = await page.evaluate(""" + () => { + const crTags = document.querySelector('.cr-tags'); + if (crTags) { + const els = crTags.querySelectorAll('a, span, li'); + if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean); + const t = crTags.textContent.trim(); + if (t) return t.split(/[,;]/).map(s => s.trim()).filter(Boolean); + } + return []; + } + """) + return result or [] + except Exception: + return [] + + +async def _get_cover_url(page: Page) -> str: + try: + result = await page.evaluate(""" + () => { + const wrapper = document.querySelector('.cr-hero-poster-wrapper'); + if (wrapper) { + const img = wrapper.querySelector('img'); + if (img) return img.src || img.dataset.src || ''; + } + return ''; + } + """) + return (result or "").strip() + except Exception: + return "" + + async def _extract_genres(page: Page) -> list[str]: try: result = await page.evaluate(""" diff --git a/src/state.py b/src/state.py index 2c3a603..58abe0d 100644 --- a/src/state.py +++ b/src/state.py @@ -160,6 +160,9 @@ class StateDB: ("mangas", "added_by", "INTEGER REFERENCES users(id)"), ("mangas", "last_error", "TEXT"), ("users", "is_env_admin", "INTEGER NOT NULL DEFAULT 0"), + ("mangas", "description", "TEXT"), + ("mangas", "tags", "TEXT"), + ("mangas", "cover_url", "TEXT"), ] for table, col, typedef in migrations: try: @@ -370,11 +373,16 @@ class StateDB: def update_manga_info(self, url: str, title: str, chapters_total: int, title_ru: str = "", title_full: str = "", - pub_status: str = "unknown"): + pub_status: str = "unknown", + description: str = "", tags: str = "", + cover_url: str = ""): self.conn.execute(""" UPDATE mangas SET title=?, title_ru=?, title_full=?, pub_status=?, - chapters_total=?, updated_at=? WHERE url=? - """, (title, title_ru, title_full, pub_status, chapters_total, _now(), url)) + chapters_total=?, updated_at=?, + description=?, tags=?, cover_url=? + WHERE url=? + """, (title, title_ru, title_full, pub_status, chapters_total, _now(), + description or None, tags or None, cover_url or None, url)) self.conn.commit() def set_folder_name(self, url: str, folder_name: str): diff --git a/src/worker.py b/src/worker.py index aa63715..cb41226 100644 --- a/src/worker.py +++ b/src/worker.py @@ -91,14 +91,15 @@ async def download_manga( await emit({"type": "auth_required", "url": url, "source_slug": e.source_slug, "finished_at": finished_ts}) return - await info_page.close() if not manga: + await info_page.close() await db_call(db.update_manga_status, url, "failed") await emit({"type": "manga_failed", "url": url, "error": "Не удалось получить информацию о манге"}) return + import json as _json_mod await db_call( db.update_manga_info, url, @@ -107,6 +108,9 @@ async def download_manga( title_ru=manga.title_ru, title_full=manga.title_full, pub_status=manga.pub_status, + description=manga.description, + tags=_json_mod.dumps(manga.tags, ensure_ascii=False) if manga.tags else "", + cover_url=manga.cover_url, ) await emit({ "type": "manga_info", @@ -127,6 +131,12 @@ async def download_manga( manga_dir = output_dir / folder_name manga_dir.mkdir(parents=True, exist_ok=True) + # Скачиваем обложку для CBZ-формата (info_page ещё открыта — контекст браузера жив) + if manga.cover_url and fmt in ("cbz", "all"): + await _download_cover(manga.cover_url, manga_dir, url, info_page) + + await info_page.close() + for ch in manga.chapters: await db_call(db.upsert_chapter, url, ch.url, ch.title, ch.number, ch.volume) @@ -250,6 +260,7 @@ async def download_manga( source_url=url, summary=manga.description, genre=", ".join(manga.genres) if manga.genres else "", + tags=", ".join(manga.tags) if manga.tags else "", ) for f in formats: out_file = manga_dir / f"{ch_name}.{f}" @@ -350,6 +361,43 @@ async def download_manga( db.close() +def _cover_ext_from_url(url: str) -> str: + import re as _re + m = _re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, _re.IGNORECASE) + if m: + ext = m.group(1).lower() + return ".jpg" if ext == "jpeg" else f".{ext}" + return ".jpg" + + +async def _download_cover(cover_url: str, manga_dir: Path, manga_url: str, page) -> Optional[Path]: + """Скачивает обложку в manga_dir/cover.{ext}. Использует существующий Playwright page.""" + from urllib.parse import urlparse as _urlparse + try: + parsed = _urlparse(manga_url) + referer = f"{parsed.scheme}://{parsed.netloc}/" + headers = { + "Accept": "image/png,image/jpeg,image/webp,image/*,*/*", + "Referer": referer, + } + response = await page.context.request.get(cover_url, headers=headers) + if not response.ok: + logger.warning("Обложка: HTTP {} для {}", response.status, cover_url) + return None + body = await response.body() + if len(body) < 500: + logger.warning("Обложка: слишком малый ответ ({} байт)", len(body)) + return None + ext = _cover_ext_from_url(cover_url) + cover_path = manga_dir / f"cover{ext}" + cover_path.write_bytes(body) + logger.info("Обложка сохранена: {} ({} байт)", cover_path.name, len(body)) + return cover_path + except Exception as e: + logger.warning("Ошибка скачивания обложки {}: {}", cover_url, e) + return None + + async def check_for_updates( url: str, on_event: Optional[Callable] = None, @@ -390,11 +438,12 @@ async def check_for_updates( async with BrowserManager(headless=True) as bm: _, page = await bm.new_page() manga = await source.get_manga_info(page, url) - await page.close() if not manga: + await page.close() return [] - # Обновляем pub_status и количество глав + import json as _json_mod + # Обновляем pub_status, количество глав и мета-поля await db_call( db.update_manga_info, url, @@ -403,8 +452,25 @@ async def check_for_updates( title_ru=manga.title_ru, title_full=manga.title_full, pub_status=manga.pub_status, + description=manga.description, + tags=_json_mod.dumps(manga.tags, ensure_ascii=False) if manga.tags else "", + cover_url=manga.cover_url, ) + # Обновляем обложку если манга сохраняется как cbz + manga_row = await db_call(db.get_manga, url) + manga_fmt = (manga_row or {}).get("format", "cbz") + if manga.cover_url and manga_fmt in ("cbz", "all"): + folder_name = ( + (manga_row.get("folder_name") if manga_row else None) + or safe_name(manga.title_ru or manga.title) + ) + manga_dir = OUTPUT_DIR / folder_name + if manga_dir.exists(): + await _download_cover(manga.cover_url, manga_dir, url, page) + + await page.close() + # Находим главы которых ещё нет в БД known = {ch["chapter_url"] for ch in await db_call(db.get_all_chapters, url)} new_chapters = [ch for ch in manga.chapters if ch.url not in known]