upd
This commit is contained in:
@@ -115,13 +115,22 @@ class ReadmangaSource:
|
|||||||
route_errors: dict[str, str] = {}
|
route_errors: dict[str, str] = {}
|
||||||
route_statuses: dict[str, int] = {}
|
route_statuses: dict[str, int] = {}
|
||||||
lock = asyncio.Lock()
|
lock = asyncio.Lock()
|
||||||
|
# Имена файлов из readerInit — заполняются после парсинга страницы.
|
||||||
|
# Позволяет перехватывать картинки с незнакомых CDN-доменов (например, при VPN).
|
||||||
|
expected_filenames: set[str] = set()
|
||||||
|
|
||||||
async def route_handler(route, request):
|
async def route_handler(route, request):
|
||||||
url = request.url
|
url = request.url
|
||||||
base = _base(url)
|
base = _base(url)
|
||||||
|
fname = base.split("/")[-1]
|
||||||
if not _is_manga_image(url):
|
if not _is_manga_image(url):
|
||||||
|
# Fallback: домен не в cdn_patterns, но имя файла совпадает с readerInit —
|
||||||
|
# значит CDN сменился (VPN, балансировка). Перехватываем.
|
||||||
|
if not expected_filenames or fname not in expected_filenames:
|
||||||
await route.continue_()
|
await route.continue_()
|
||||||
return
|
return
|
||||||
|
logger.debug("[{}] CDN fallback: {} (unknown domain: {})",
|
||||||
|
ch_id, fname, url.split("/")[2])
|
||||||
if BANNER_RE.search(base):
|
if BANNER_RE.search(base):
|
||||||
await route.continue_()
|
await route.continue_()
|
||||||
return
|
return
|
||||||
@@ -201,6 +210,8 @@ class ReadmangaSource:
|
|||||||
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
|
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
|
||||||
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
|
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
|
||||||
total = len(image_urls)
|
total = len(image_urls)
|
||||||
|
# Активируем CDN-fallback в route_handler: теперь он знает ожидаемые имена файлов
|
||||||
|
expected_filenames.update(filename_to_idx.keys())
|
||||||
|
|
||||||
def _count_matched() -> int:
|
def _count_matched() -> int:
|
||||||
count = 0
|
count = 0
|
||||||
@@ -236,16 +247,8 @@ class ReadmangaSource:
|
|||||||
|
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
# Retry timeout через JS fetch
|
async def _js_fetch(url: str) -> bytes | None:
|
||||||
async with lock:
|
"""Скачивает изображение через JS fetch в контексте браузера."""
|
||||||
timeout_bases = [u for u, e in route_errors.items()
|
|
||||||
if "timeout" in e.lower() and u not in captured]
|
|
||||||
if timeout_bases:
|
|
||||||
logger.info("[{}] Retry {} страниц с timeout...", ch_id, len(timeout_bases))
|
|
||||||
for retry_base in timeout_bases:
|
|
||||||
if retry_base in captured:
|
|
||||||
continue
|
|
||||||
fname = retry_base.split("/")[-1]
|
|
||||||
try:
|
try:
|
||||||
data_b64 = await page.evaluate("""async (url) => {
|
data_b64 = await page.evaluate("""async (url) => {
|
||||||
try {
|
try {
|
||||||
@@ -257,19 +260,60 @@ class ReadmangaSource:
|
|||||||
for (let b of bytes) bin += String.fromCharCode(b);
|
for (let b of bytes) bin += String.fromCharCode(b);
|
||||||
return btoa(bin);
|
return btoa(bin);
|
||||||
} catch(e) { return null; }
|
} catch(e) { return null; }
|
||||||
}""", retry_base)
|
}""", url)
|
||||||
if data_b64:
|
if data_b64:
|
||||||
body = base64.b64decode(data_b64)
|
body = base64.b64decode(data_b64)
|
||||||
if len(body) > 500:
|
return body if len(body) > 500 else None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Retry 1: timeout-ошибки через JS fetch
|
||||||
|
async with lock:
|
||||||
|
timeout_bases = [u for u, e in route_errors.items()
|
||||||
|
if "timeout" in e.lower() and u not in captured]
|
||||||
|
if timeout_bases:
|
||||||
|
logger.info("[{}] Retry {} страниц с timeout...", ch_id, len(timeout_bases))
|
||||||
|
for retry_base in timeout_bases:
|
||||||
|
async with lock:
|
||||||
|
if retry_base in captured:
|
||||||
|
continue
|
||||||
|
fname = retry_base.split("/")[-1]
|
||||||
|
body = await _js_fetch(retry_base)
|
||||||
|
if body:
|
||||||
async with lock:
|
async with lock:
|
||||||
captured[retry_base] = body
|
captured[retry_base] = body
|
||||||
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
|
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
|
||||||
else:
|
|
||||||
logger.warning("[{}] Retry вернул {} байт — игнорируем", ch_id, len(body))
|
|
||||||
else:
|
else:
|
||||||
logger.warning("[{}] Retry null для '{}'", ch_id, fname)
|
logger.warning("[{}] Retry null для '{}'", ch_id, fname)
|
||||||
except Exception as e2:
|
|
||||||
logger.warning("[{}] Retry JS ошибка '{}': {}", ch_id, fname, e2)
|
# Retry 2: не_перехваченные — CDN-домен сменился (VPN, балансировка).
|
||||||
|
# Браузер их загрузил, но route_handler не захватил байты.
|
||||||
|
# Берём URL напрямую из readerInit и достаём через JS fetch.
|
||||||
|
async with lock:
|
||||||
|
captured_fnames = {b.split("/")[-1] for b in captured}
|
||||||
|
unperceived = [
|
||||||
|
_base(u) for u in image_urls
|
||||||
|
if _base(u).split("/")[-1] not in captured_fnames
|
||||||
|
and _base(u) not in route_errors
|
||||||
|
and _base(u) not in route_statuses
|
||||||
|
]
|
||||||
|
if unperceived:
|
||||||
|
logger.info("[{}] JS retry для {} не_перехваченных (CDN-домен?)..",
|
||||||
|
ch_id, len(unperceived))
|
||||||
|
for retry_base in unperceived:
|
||||||
|
async with lock:
|
||||||
|
if retry_base.split("/")[-1] in captured_fnames:
|
||||||
|
continue
|
||||||
|
fname = retry_base.split("/")[-1]
|
||||||
|
body = await _js_fetch(retry_base)
|
||||||
|
if body:
|
||||||
|
async with lock:
|
||||||
|
captured[retry_base] = body
|
||||||
|
captured_fnames.add(fname)
|
||||||
|
logger.info("[{}] CDN retry OK: {} ({} байт)", ch_id, fname, len(body))
|
||||||
|
else:
|
||||||
|
logger.warning("[{}] CDN retry null для '{}'", ch_id, fname)
|
||||||
|
|
||||||
await page.unroute("**/*", route_handler)
|
await page.unroute("**/*", route_handler)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user