upd
This commit is contained in:
@@ -115,13 +115,22 @@ class ReadmangaSource:
|
||||
route_errors: dict[str, str] = {}
|
||||
route_statuses: dict[str, int] = {}
|
||||
lock = asyncio.Lock()
|
||||
# Имена файлов из readerInit — заполняются после парсинга страницы.
|
||||
# Позволяет перехватывать картинки с незнакомых CDN-доменов (например, при VPN).
|
||||
expected_filenames: set[str] = set()
|
||||
|
||||
async def route_handler(route, request):
|
||||
url = request.url
|
||||
base = _base(url)
|
||||
fname = base.split("/")[-1]
|
||||
if not _is_manga_image(url):
|
||||
await route.continue_()
|
||||
return
|
||||
# Fallback: домен не в cdn_patterns, но имя файла совпадает с readerInit —
|
||||
# значит CDN сменился (VPN, балансировка). Перехватываем.
|
||||
if not expected_filenames or fname not in expected_filenames:
|
||||
await route.continue_()
|
||||
return
|
||||
logger.debug("[{}] CDN fallback: {} (unknown domain: {})",
|
||||
ch_id, fname, url.split("/")[2])
|
||||
if BANNER_RE.search(base):
|
||||
await route.continue_()
|
||||
return
|
||||
@@ -201,6 +210,8 @@ class ReadmangaSource:
|
||||
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
|
||||
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
|
||||
total = len(image_urls)
|
||||
# Активируем CDN-fallback в route_handler: теперь он знает ожидаемые имена файлов
|
||||
expected_filenames.update(filename_to_idx.keys())
|
||||
|
||||
def _count_matched() -> int:
|
||||
count = 0
|
||||
@@ -236,40 +247,73 @@ class ReadmangaSource:
|
||||
|
||||
await asyncio.sleep(3)
|
||||
|
||||
# Retry timeout через JS fetch
|
||||
async def _js_fetch(url: str) -> bytes | None:
|
||||
"""Скачивает изображение через JS fetch в контексте браузера."""
|
||||
try:
|
||||
data_b64 = await page.evaluate("""async (url) => {
|
||||
try {
|
||||
const r = await fetch(url, {credentials: 'include'});
|
||||
if (!r.ok) return null;
|
||||
const buf = await r.arrayBuffer();
|
||||
const bytes = new Uint8Array(buf);
|
||||
let bin = '';
|
||||
for (let b of bytes) bin += String.fromCharCode(b);
|
||||
return btoa(bin);
|
||||
} catch(e) { return null; }
|
||||
}""", url)
|
||||
if data_b64:
|
||||
body = base64.b64decode(data_b64)
|
||||
return body if len(body) > 500 else None
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
# Retry 1: timeout-ошибки через JS fetch
|
||||
async with lock:
|
||||
timeout_bases = [u for u, e in route_errors.items()
|
||||
if "timeout" in e.lower() and u not in captured]
|
||||
if timeout_bases:
|
||||
logger.info("[{}] Retry {} страниц с timeout...", ch_id, len(timeout_bases))
|
||||
for retry_base in timeout_bases:
|
||||
if retry_base in captured:
|
||||
continue
|
||||
async with lock:
|
||||
if retry_base in captured:
|
||||
continue
|
||||
fname = retry_base.split("/")[-1]
|
||||
try:
|
||||
data_b64 = await page.evaluate("""async (url) => {
|
||||
try {
|
||||
const r = await fetch(url, {credentials: 'include'});
|
||||
if (!r.ok) return null;
|
||||
const buf = await r.arrayBuffer();
|
||||
const bytes = new Uint8Array(buf);
|
||||
let bin = '';
|
||||
for (let b of bytes) bin += String.fromCharCode(b);
|
||||
return btoa(bin);
|
||||
} catch(e) { return null; }
|
||||
}""", retry_base)
|
||||
if data_b64:
|
||||
body = base64.b64decode(data_b64)
|
||||
if len(body) > 500:
|
||||
async with lock:
|
||||
captured[retry_base] = body
|
||||
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
|
||||
else:
|
||||
logger.warning("[{}] Retry вернул {} байт — игнорируем", ch_id, len(body))
|
||||
else:
|
||||
logger.warning("[{}] Retry null для '{}'", ch_id, fname)
|
||||
except Exception as e2:
|
||||
logger.warning("[{}] Retry JS ошибка '{}': {}", ch_id, fname, e2)
|
||||
body = await _js_fetch(retry_base)
|
||||
if body:
|
||||
async with lock:
|
||||
captured[retry_base] = body
|
||||
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
|
||||
else:
|
||||
logger.warning("[{}] Retry null для '{}'", ch_id, fname)
|
||||
|
||||
# Retry 2: не_перехваченные — CDN-домен сменился (VPN, балансировка).
|
||||
# Браузер их загрузил, но route_handler не захватил байты.
|
||||
# Берём URL напрямую из readerInit и достаём через JS fetch.
|
||||
async with lock:
|
||||
captured_fnames = {b.split("/")[-1] for b in captured}
|
||||
unperceived = [
|
||||
_base(u) for u in image_urls
|
||||
if _base(u).split("/")[-1] not in captured_fnames
|
||||
and _base(u) not in route_errors
|
||||
and _base(u) not in route_statuses
|
||||
]
|
||||
if unperceived:
|
||||
logger.info("[{}] JS retry для {} не_перехваченных (CDN-домен?)..",
|
||||
ch_id, len(unperceived))
|
||||
for retry_base in unperceived:
|
||||
async with lock:
|
||||
if retry_base.split("/")[-1] in captured_fnames:
|
||||
continue
|
||||
fname = retry_base.split("/")[-1]
|
||||
body = await _js_fetch(retry_base)
|
||||
if body:
|
||||
async with lock:
|
||||
captured[retry_base] = body
|
||||
captured_fnames.add(fname)
|
||||
logger.info("[{}] CDN retry OK: {} ({} байт)", ch_id, fname, len(body))
|
||||
else:
|
||||
logger.warning("[{}] CDN retry null для '{}'", ch_id, fname)
|
||||
|
||||
await page.unroute("**/*", route_handler)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user