""" Воркер скачивания манги с поддержкой событий прогресса. """ import asyncio import os import re import tempfile from pathlib import Path from typing import Callable, Optional from loguru import logger from .browser import BrowserManager from .scraper import get_manga_info, get_chapter_images_and_download, Chapter from .exporter import export, MangaMeta from .state import StateDB OUTPUT_DIR = Path("/app/output") # Читаем из переменных окружения; можно переопределить в docker-compose CHAPTER_CONCURRENCY = int(os.getenv("CHAPTER_CONCURRENCY", "3")) def _safe_name(s: str) -> str: return re.sub(r'[^\w\s\-]', '', s).strip().replace(" ", "_")[:80] def _safe_chapter_name(ch: Chapter) -> str: vol = f"v{ch.volume:02d}_" if ch.volume else "" return f"{vol}ch{ch.number:06.1f}" async def download_manga( url: str, fmt: str = "cbz", output_dir: Path = OUTPUT_DIR, resume: bool = True, is_update: bool = False, on_event: Optional[Callable] = None, chapter_concurrency: int = CHAPTER_CONCURRENCY, ): """Скачать мангу. Главы обрабатываются параллельно (chapter_concurrency штук).""" async def emit(event: dict): if on_event: try: await on_event(event) except Exception as e: logger.debug("on_event error: {}", e) db = StateDB() db_lock = asyncio.Lock() # защита от параллельных записей в SQLite async def db_call(fn, *args, **kwargs): """Обёртка: все обращения к db идут через общий asyncio.Lock.""" async with db_lock: return fn(*args, **kwargs) try: await db_call(db.update_manga_status, url, "downloading") started_ts = await db_call(db.mark_started, url) await emit({"type": "manga_start", "url": url, "started_at": started_ts}) async with BrowserManager(headless=True) as bm: ctx, info_page = await bm.new_page() manga = await get_manga_info(info_page, url) await info_page.close() if not manga: await db_call(db.update_manga_status, url, "failed") await emit({"type": "manga_failed", "url": url, "error": "Не удалось получить информацию о манге"}) return await db_call( db.update_manga_info, url, title=manga.title_ru or manga.title, chapters_total=len(manga.chapters), title_ru=manga.title_ru, title_full=manga.title_full, pub_status=manga.pub_status, ) await emit({ "type": "manga_info", "url": url, "title": manga.title_ru or manga.title, "title_ru": manga.title_ru, "title_full": manga.title_full, "pub_status": manga.pub_status, "chapters_total": len(manga.chapters), }) # Используем кастомное имя папки из БД, если задано _db_manga = await db_call(db.get_manga, url) folder_name = ( (_db_manga.get("folder_name") if _db_manga else None) or _safe_name(manga.title_ru or manga.title) ) manga_dir = output_dir / folder_name manga_dir.mkdir(parents=True, exist_ok=True) for ch in manga.chapters: await db_call(db.upsert_chapter, url, ch.url, ch.title, ch.number, ch.volume) formats = ["cbz", "pdf", "epub"] if fmt == "all" else [fmt] # ── Разделяем главы: пропустить / скачать ──────────────────── to_skip = [] to_download = [] for ch in manga.chapters: if resume and (await db_call(db.chapter_status, ch.url)) == "done": to_skip.append(ch) else: to_download.append(ch) # Счётчик и блокировка для безопасного обновления из параллельных задач counter_lock = asyncio.Lock() # Начинаем с 0: to_skip-цикл сам доберёт до len(to_skip), # иначе sync_chapters_done() + len(to_skip) = двойной счёт chapters_done = 0 # Сообщаем о пропущенных главах (уже скачаны) for ch in to_skip: chapters_done += 1 await emit({ "type": "chapter_skipped", "url": url, "chapter_url": ch.url, "chapter_number": ch.number, "chapter_title": ch.title, "volume": ch.volume, "chapters_done": chapters_done, "chapters_total": len(manga.chapters), }) logger.info( "Параллельность: {} гл одновременно. Пропущено: {}, скачать: {}", chapter_concurrency, len(to_skip), len(to_download), ) # ── Семафор ограничивает одновременно открытые страницы ─────── sem = asyncio.Semaphore(chapter_concurrency) async def process_chapter(ch: Chapter) -> None: nonlocal chapters_done async with sem: # Повторная проверка (другая горутина могла скачать) if (await db_call(db.chapter_status, ch.url)) == "done": async with counter_lock: chapters_done += 1 done_snap = chapters_done await emit({ "type": "chapter_skipped", "url": url, "chapter_url": ch.url, "chapter_number": ch.number, "chapter_title": ch.title, "volume": ch.volume, "chapters_done": done_snap, "chapters_total": len(manga.chapters), }) return await emit({ "type": "chapter_start", "url": url, "chapter_url": ch.url, "chapter_title": ch.title, "chapter_number": ch.number, "volume": ch.volume, "chapters_done": chapters_done, "chapters_total": len(manga.chapters), }) ch_page = await ctx.new_page() try: with tempfile.TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) pages_done_count = [0] async def on_page(page_idx: int, pages_total: int): pages_done_count[0] += 1 await db_call(db.update_chapter_pages, ch.url, pages_total, pages_done_count[0]) await emit({ "type": "page_done", "url": url, "chapter_url": ch.url, "page_idx": page_idx, "pages_done": pages_done_count[0], "pages_total": pages_total, }) image_paths = await get_chapter_images_and_download( ch_page, ch.url, dest_dir=tmp_path, manga_url=url, on_page=on_page, ) if not image_paths: logger.error( "Т{} Гл.{} '{}' — get_chapter_images вернул пустой список. " "URL: {}", ch.volume, ch.number, ch.title, ch.url, ) await db_call(db.mark_failed, ch.url) await emit({"type": "chapter_failed", "url": url, "chapter_url": ch.url}) return ch_name = _safe_chapter_name(ch) ch_meta = MangaMeta( series=manga.title_ru or manga.title, series_full=manga.title_full or "", chapter_title=ch.title, number=ch.number, volume=ch.volume, chapters_total=len(manga.chapters), pub_status=manga.pub_status, source_url=url, summary=manga.description, genre=", ".join(manga.genres) if manga.genres else "", ) for f in formats: out_file = manga_dir / f"{ch_name}.{f}" try: export(image_paths, out_file, f, meta=ch_meta) await db_call(db.mark_done, ch.url, f, str(out_file)) except Exception as e: logger.exception( "Ошибка экспорта Т{} Гл.{} → {} | {}: {}", ch.volume, ch.number, f, out_file.name, e, ) event_type = "auto_downloaded" if is_update else "downloaded" await db_call( db.add_history, manga_url=url, event_type=event_type, chapter_url=ch.url, chapter_title=ch.title, chapter_number=ch.number, volume=ch.volume, ) async with counter_lock: chapters_done += 1 done_snap = chapters_done await emit({ "type": "chapter_done", "url": url, "chapter_url": ch.url, "chapter_title": ch.title, "chapter_number": ch.number, "volume": ch.volume, "chapters_done": done_snap, "chapters_total": len(manga.chapters), }) except Exception as e: logger.exception( "Необработанное исключение в Т{} Гл.{} '{}' | {}: {}", ch.volume, ch.number, ch.title, ch.url, e, ) await db_call(db.mark_failed, ch.url) await emit({"type": "chapter_failed", "url": url, "chapter_url": ch.url, "error": str(e)}) finally: await ch_page.close() # ── Запускаем все задачи сразу; семафор дозирует параллельность ── tasks = [process_chapter(ch) for ch in to_download] results = await asyncio.gather(*tasks, return_exceptions=True) # Логируем неожиданные исключения из gather for ch, res in zip(to_download, results): if isinstance(res, Exception) and not isinstance(res, asyncio.CancelledError): logger.exception( "gather: необработанное исключение Т{} Гл.{} '{}': {}", ch.volume, ch.number, ch.title, res, ) real_done = await db_call(db.sync_chapters_done, url) await db_call(db.update_manga_status, url, "done") finished_ts = await db_call(db.mark_finished, url) await db_call(db.set_last_checked, url) await emit({ "type": "manga_done", "url": url, "chapters_done": real_done, "chapters_total": len(manga.chapters), "finished_at": finished_ts, }) await ctx.close() except asyncio.CancelledError: raise except Exception as e: logger.error("Manga worker error {}: {}", url, e) await db_call(db.update_manga_status, url, "failed") finished_ts = await db_call(db.mark_finished, url) await emit({"type": "manga_failed", "url": url, "error": str(e), "finished_at": finished_ts}) finally: db.close() async def check_for_updates( url: str, on_event: Optional[Callable] = None, ) -> list[str]: """ Проверяет наличие новых глав для манги. Возвращает список новых chapter_url. """ async def emit(event: dict): if on_event: try: await on_event(event) except Exception: pass db = StateDB() try: db.set_last_checked(url) db.add_history(manga_url=url, event_type="check_started") await emit({"type": "check_started", "url": url}) async with BrowserManager(headless=True) as bm: _, page = await bm.new_page() manga = await get_manga_info(page, url) await page.close() if not manga: return [] # Обновляем pub_status и количество глав db.update_manga_info( url, title=manga.title_ru or manga.title, chapters_total=len(manga.chapters), title_ru=manga.title_ru, title_full=manga.title_full, pub_status=manga.pub_status, ) # Находим главы которых ещё нет в БД known = {ch["chapter_url"] for ch in db.get_all_chapters(url)} new_chapters = [ch for ch in manga.chapters if ch.url not in known] for ch in new_chapters: db.upsert_chapter(url, ch.url, ch.title, ch.number, ch.volume) db.add_history( manga_url=url, event_type="new_chapter_found", chapter_url=ch.url, chapter_title=ch.title, chapter_number=ch.number, volume=ch.volume, ) await emit({ "type": "new_chapter_found", "url": url, "chapter_url": ch.url, "chapter_title": ch.title, "chapter_number": ch.number, }) db.add_history( manga_url=url, event_type="check_done", details=f"Найдено новых: {len(new_chapters)}", ) await emit({ "type": "check_done", "url": url, "new_chapters": len(new_chapters), }) return [ch.url for ch in new_chapters] finally: db.close()