From 419614d295948d54bda30e74b1e20b75c0790da8 Mon Sep 17 00:00:00 2001 From: StenFredd Date: Sat, 2 May 2026 20:15:36 +0300 Subject: [PATCH] upd --- src/sources/readmanga.py | 102 ++++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 29 deletions(-) diff --git a/src/sources/readmanga.py b/src/sources/readmanga.py index 74be341..e745b60 100644 --- a/src/sources/readmanga.py +++ b/src/sources/readmanga.py @@ -115,13 +115,22 @@ class ReadmangaSource: route_errors: dict[str, str] = {} route_statuses: dict[str, int] = {} lock = asyncio.Lock() + # Имена файлов из readerInit — заполняются после парсинга страницы. + # Позволяет перехватывать картинки с незнакомых CDN-доменов (например, при VPN). + expected_filenames: set[str] = set() async def route_handler(route, request): url = request.url base = _base(url) + fname = base.split("/")[-1] if not _is_manga_image(url): - await route.continue_() - return + # Fallback: домен не в cdn_patterns, но имя файла совпадает с readerInit — + # значит CDN сменился (VPN, балансировка). Перехватываем. + if not expected_filenames or fname not in expected_filenames: + await route.continue_() + return + logger.debug("[{}] CDN fallback: {} (unknown domain: {})", + ch_id, fname, url.split("/")[2]) if BANNER_RE.search(base): await route.continue_() return @@ -201,6 +210,8 @@ class ReadmangaSource: url_to_idx = {_base(u): i for i, u in enumerate(image_urls)} filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)} total = len(image_urls) + # Активируем CDN-fallback в route_handler: теперь он знает ожидаемые имена файлов + expected_filenames.update(filename_to_idx.keys()) def _count_matched() -> int: count = 0 @@ -236,40 +247,73 @@ class ReadmangaSource: await asyncio.sleep(3) - # Retry timeout через JS fetch + async def _js_fetch(url: str) -> bytes | None: + """Скачивает изображение через JS fetch в контексте браузера.""" + try: + data_b64 = await page.evaluate("""async (url) => { + try { + const r = await fetch(url, {credentials: 'include'}); + if (!r.ok) return null; + const buf = await r.arrayBuffer(); + const bytes = new Uint8Array(buf); + let bin = ''; + for (let b of bytes) bin += String.fromCharCode(b); + return btoa(bin); + } catch(e) { return null; } + }""", url) + if data_b64: + body = base64.b64decode(data_b64) + return body if len(body) > 500 else None + except Exception: + pass + return None + + # Retry 1: timeout-ошибки через JS fetch async with lock: timeout_bases = [u for u, e in route_errors.items() if "timeout" in e.lower() and u not in captured] if timeout_bases: logger.info("[{}] Retry {} страниц с timeout...", ch_id, len(timeout_bases)) for retry_base in timeout_bases: - if retry_base in captured: - continue + async with lock: + if retry_base in captured: + continue fname = retry_base.split("/")[-1] - try: - data_b64 = await page.evaluate("""async (url) => { - try { - const r = await fetch(url, {credentials: 'include'}); - if (!r.ok) return null; - const buf = await r.arrayBuffer(); - const bytes = new Uint8Array(buf); - let bin = ''; - for (let b of bytes) bin += String.fromCharCode(b); - return btoa(bin); - } catch(e) { return null; } - }""", retry_base) - if data_b64: - body = base64.b64decode(data_b64) - if len(body) > 500: - async with lock: - captured[retry_base] = body - logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body)) - else: - logger.warning("[{}] Retry вернул {} байт — игнорируем", ch_id, len(body)) - else: - logger.warning("[{}] Retry null для '{}'", ch_id, fname) - except Exception as e2: - logger.warning("[{}] Retry JS ошибка '{}': {}", ch_id, fname, e2) + body = await _js_fetch(retry_base) + if body: + async with lock: + captured[retry_base] = body + logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body)) + else: + logger.warning("[{}] Retry null для '{}'", ch_id, fname) + + # Retry 2: не_перехваченные — CDN-домен сменился (VPN, балансировка). + # Браузер их загрузил, но route_handler не захватил байты. + # Берём URL напрямую из readerInit и достаём через JS fetch. + async with lock: + captured_fnames = {b.split("/")[-1] for b in captured} + unperceived = [ + _base(u) for u in image_urls + if _base(u).split("/")[-1] not in captured_fnames + and _base(u) not in route_errors + and _base(u) not in route_statuses + ] + if unperceived: + logger.info("[{}] JS retry для {} не_перехваченных (CDN-домен?)..", + ch_id, len(unperceived)) + for retry_base in unperceived: + async with lock: + if retry_base.split("/")[-1] in captured_fnames: + continue + fname = retry_base.split("/")[-1] + body = await _js_fetch(retry_base) + if body: + async with lock: + captured[retry_base] = body + captured_fnames.add(fname) + logger.info("[{}] CDN retry OK: {} ({} байт)", ch_id, fname, len(body)) + else: + logger.warning("[{}] CDN retry null для '{}'", ch_id, fname) await page.unroute("**/*", route_handler)