This commit is contained in:
2026-05-02 20:15:36 +03:00
parent fcd1dfb74c
commit 419614d295

View File

@@ -115,13 +115,22 @@ class ReadmangaSource:
route_errors: dict[str, str] = {}
route_statuses: dict[str, int] = {}
lock = asyncio.Lock()
# Имена файлов из readerInit — заполняются после парсинга страницы.
# Позволяет перехватывать картинки с незнакомых CDN-доменов (например, при VPN).
expected_filenames: set[str] = set()
async def route_handler(route, request):
url = request.url
base = _base(url)
fname = base.split("/")[-1]
if not _is_manga_image(url):
await route.continue_()
return
# Fallback: домен не в cdn_patterns, но имя файла совпадает с readerInit —
# значит CDN сменился (VPN, балансировка). Перехватываем.
if not expected_filenames or fname not in expected_filenames:
await route.continue_()
return
logger.debug("[{}] CDN fallback: {} (unknown domain: {})",
ch_id, fname, url.split("/")[2])
if BANNER_RE.search(base):
await route.continue_()
return
@@ -201,6 +210,8 @@ class ReadmangaSource:
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
total = len(image_urls)
# Активируем CDN-fallback в route_handler: теперь он знает ожидаемые имена файлов
expected_filenames.update(filename_to_idx.keys())
def _count_matched() -> int:
count = 0
@@ -236,40 +247,73 @@ class ReadmangaSource:
await asyncio.sleep(3)
# Retry timeout через JS fetch
async def _js_fetch(url: str) -> bytes | None:
"""Скачивает изображение через JS fetch в контексте браузера."""
try:
data_b64 = await page.evaluate("""async (url) => {
try {
const r = await fetch(url, {credentials: 'include'});
if (!r.ok) return null;
const buf = await r.arrayBuffer();
const bytes = new Uint8Array(buf);
let bin = '';
for (let b of bytes) bin += String.fromCharCode(b);
return btoa(bin);
} catch(e) { return null; }
}""", url)
if data_b64:
body = base64.b64decode(data_b64)
return body if len(body) > 500 else None
except Exception:
pass
return None
# Retry 1: timeout-ошибки через JS fetch
async with lock:
timeout_bases = [u for u, e in route_errors.items()
if "timeout" in e.lower() and u not in captured]
if timeout_bases:
logger.info("[{}] Retry {} страниц с timeout...", ch_id, len(timeout_bases))
for retry_base in timeout_bases:
if retry_base in captured:
continue
async with lock:
if retry_base in captured:
continue
fname = retry_base.split("/")[-1]
try:
data_b64 = await page.evaluate("""async (url) => {
try {
const r = await fetch(url, {credentials: 'include'});
if (!r.ok) return null;
const buf = await r.arrayBuffer();
const bytes = new Uint8Array(buf);
let bin = '';
for (let b of bytes) bin += String.fromCharCode(b);
return btoa(bin);
} catch(e) { return null; }
}""", retry_base)
if data_b64:
body = base64.b64decode(data_b64)
if len(body) > 500:
async with lock:
captured[retry_base] = body
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
else:
logger.warning("[{}] Retry вернул {} байт — игнорируем", ch_id, len(body))
else:
logger.warning("[{}] Retry null для '{}'", ch_id, fname)
except Exception as e2:
logger.warning("[{}] Retry JS ошибка '{}': {}", ch_id, fname, e2)
body = await _js_fetch(retry_base)
if body:
async with lock:
captured[retry_base] = body
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
else:
logger.warning("[{}] Retry null для '{}'", ch_id, fname)
# Retry 2: не_перехваченные — CDN-домен сменился (VPN, балансировка).
# Браузер их загрузил, но route_handler не захватил байты.
# Берём URL напрямую из readerInit и достаём через JS fetch.
async with lock:
captured_fnames = {b.split("/")[-1] for b in captured}
unperceived = [
_base(u) for u in image_urls
if _base(u).split("/")[-1] not in captured_fnames
and _base(u) not in route_errors
and _base(u) not in route_statuses
]
if unperceived:
logger.info("[{}] JS retry для {} не_перехваченных (CDN-домен?)..",
ch_id, len(unperceived))
for retry_base in unperceived:
async with lock:
if retry_base.split("/")[-1] in captured_fnames:
continue
fname = retry_base.split("/")[-1]
body = await _js_fetch(retry_base)
if body:
async with lock:
captured[retry_base] = body
captured_fnames.add(fname)
logger.info("[{}] CDN retry OK: {} ({} байт)", ch_id, fname, len(body))
else:
logger.warning("[{}] CDN retry null для '{}'", ch_id, fname)
await page.unroute("**/*", route_handler)