This commit is contained in:
2026-05-03 13:37:21 +03:00
parent 2cb244d973
commit bb6f2d67d8
7 changed files with 282 additions and 6 deletions

View File

@@ -682,6 +682,12 @@ async def refresh_meta(url: str, current_user: dict = Depends(get_current_user))
def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_status: str) -> tuple[int, int]: def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_status: str) -> tuple[int, int]:
updated = failed = 0 updated = failed = 0
url = manga["url"] url = manga["url"]
summary = manga.get("description") or ""
tags_raw = manga.get("tags") or ""
try:
tags_str = ", ".join(json.loads(tags_raw)) if tags_raw else ""
except Exception:
tags_str = ""
for ch in chapters: for ch in chapters:
for fmt_col in ("output_cbz", "output_pdf", "output_epub"): for fmt_col in ("output_cbz", "output_pdf", "output_epub"):
fpath = ch.get(fmt_col) fpath = ch.get(fmt_col)
@@ -699,6 +705,8 @@ def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_statu
chapters_total=chapters_total, chapters_total=chapters_total,
pub_status=pub_status, pub_status=pub_status,
source_url=url, source_url=url,
summary=summary,
tags=tags_str,
) )
if patch_meta(p, meta): if patch_meta(p, meta):
updated += 1 updated += 1
@@ -706,6 +714,43 @@ def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_statu
failed += 1 failed += 1
return updated, failed return updated, failed
def _refresh_cover_sync(manga: dict, manga_dir: Path) -> None:
"""Скачивает или обновляет обложку через urllib (синхронно, для asyncio.to_thread)."""
import urllib.request as _urllib_req
import re as _re
cover_url = manga.get("cover_url") or ""
if not cover_url:
return
# Определяем Referer по URL обложки (MangaLib CDN — cdnlibs / mangalib)
if any(pat in cover_url for pat in ("mangalib", "cdnlibs", "imglib")):
referer = "https://mangalib.me/"
else:
from urllib.parse import urlparse as _up
parsed = _up(manga.get("url") or "")
referer = f"{parsed.scheme}://{parsed.netloc}/" if parsed.netloc else "https://readmanga.ru/"
try:
req = _urllib_req.Request(cover_url, headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/124.0.0.0",
"Referer": referer,
"Accept": "image/png,image/jpeg,image/webp,image/*,*/*",
})
with _urllib_req.urlopen(req, timeout=30) as resp:
body = resp.read()
if len(body) < 500:
logger.warning("refresh_cover: слишком малый ответ ({} байт)", len(body))
return
m = _re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", cover_url, _re.IGNORECASE)
ext = ("." + (m.group(1).lower() if m else "jpg")).replace(".jpeg", ".jpg")
cover_path = manga_dir / f"cover{ext}"
cover_path.write_bytes(body)
logger.info("Обложка обновлена: {} ({} байт)", cover_path.name, len(body))
except Exception as e:
logger.warning("refresh_cover error {}: {}", cover_url, e)
async def _do_refresh_meta(url: str): async def _do_refresh_meta(url: str):
db = StateDB() db = StateDB()
try: try:
@@ -721,6 +766,14 @@ async def _do_refresh_meta(url: str):
await ws_manager.broadcast({"type": "meta_refresh_started", "url": url}) await ws_manager.broadcast({"type": "meta_refresh_started", "url": url})
updated, failed = await asyncio.to_thread(_patch_meta_sync, manga, chapters, chapters_total, pub_status) updated, failed = await asyncio.to_thread(_patch_meta_sync, manga, chapters, chapters_total, pub_status)
logger.info("refresh_meta {}: обновлено {}, ошибок {}", url, updated, failed) logger.info("refresh_meta {}: обновлено {}, ошибок {}", url, updated, failed)
# Обновляем обложку если у манги формат cbz
manga_fmt = manga.get("format", "cbz") or "cbz"
if manga_fmt in ("cbz", "all") and manga.get("cover_url"):
manga_dir = _manga_folder(manga)
if manga_dir.exists():
await asyncio.to_thread(_refresh_cover_sync, manga, manga_dir)
await ws_manager.broadcast({"type": "meta_refreshed", "url": url, await ws_manager.broadcast({"type": "meta_refreshed", "url": url,
"updated": updated, "failed": failed}) "updated": updated, "failed": failed})
except Exception as e: except Exception as e:

View File

@@ -26,6 +26,7 @@ class MangaMeta:
language: str = "ru" language: str = "ru"
summary: str = "" # Описание/синопсис серии summary: str = "" # Описание/синопсис серии
genre: str = "" # Жанры через запятую (для ComicInfo Genre) genre: str = "" # Жанры через запятую (для ComicInfo Genre)
tags: str = "" # Теги через запятую (для ComicInfo Tags)
series_group: str = "" # Группа/коллекция (для ComicInfo SeriesGroup) series_group: str = "" # Группа/коллекция (для ComicInfo SeriesGroup)
@@ -89,6 +90,7 @@ def _make_comic_info(meta: MangaMeta) -> str:
add("Count", meta.chapters_total) add("Count", meta.chapters_total)
add("Genre", meta.genre) add("Genre", meta.genre)
add("Tags", meta.tags)
add("LanguageISO", meta.language) add("LanguageISO", meta.language)
# Manga = YesAndRightToLeft — стандартная японская манга # Manga = YesAndRightToLeft — стандартная японская манга

View File

@@ -37,6 +37,8 @@ class MangaInfo:
title_full: str = "" title_full: str = ""
description: str = "" description: str = ""
genres: list[str] = field(default_factory=list) genres: list[str] = field(default_factory=list)
tags: list[str] = field(default_factory=list)
cover_url: str = ""
# ────────────────────────────────────────────── # ──────────────────────────────────────────────

View File

@@ -134,6 +134,18 @@ class MangalibSource:
description = await _extract_description(page) description = await _extract_description(page)
genres = await _extract_genres(page) genres = await _extract_genres(page)
# Получаем обложку, описание и теги из API
async with lock:
manga_meta_for_extras = dict(manga_api_data)
cover_url, extra_description, tags = await _fetch_extra_meta(
page, manga_meta_for_extras, url, self.auth_token
)
if extra_description:
description = extra_description
if not description:
description = await _extract_description(page)
async with lock: async with lock:
raw_chapters = list(chapters_api_data) raw_chapters = list(chapters_api_data)
@@ -154,6 +166,8 @@ class MangalibSource:
title_full=title_full, title_full=title_full,
description=description, description=description,
genres=genres, genres=genres,
tags=tags,
cover_url=cover_url,
) )
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
@@ -611,6 +625,85 @@ async def _extract_genres(page: Page) -> list[str]:
return [] return []
def _parse_summary_doc(doc) -> str:
"""Конвертирует ProseMirror JSON-документ в plain text."""
if not doc or not isinstance(doc, dict):
return ""
if doc.get("type") == "text":
return doc.get("text", "")
parts = []
for node in doc.get("content", []):
text = _parse_summary_doc(node)
if text:
parts.append(text)
return " ".join(parts)
async def _fetch_extra_meta(
page: Page,
manga_api_data: dict,
manga_url: str,
auth_token: str | None,
) -> tuple[str, str, list[str]]:
"""
Возвращает (cover_url, description, tags) из уже полученных данных API или,
если нужных полей нет, делает явный supplementary-запрос к API.
"""
def _extract_from_data(data: dict) -> tuple[str, str, list[str]]:
cover_url = ""
cover_obj = data.get("cover")
if isinstance(cover_obj, dict):
cover_url = cover_obj.get("default") or cover_obj.get("thumbnail") or ""
description = ""
summary = data.get("summary")
if summary:
if isinstance(summary, dict):
description = _parse_summary_doc(summary).strip()
elif isinstance(summary, str):
description = summary.strip()
tags: list[str] = []
for t in data.get("tags") or []:
name = (t.get("name") or t.get("label") or "").strip()
if name:
tags.append(name)
return cover_url, description, tags
cover_url, description, tags = _extract_from_data(manga_api_data)
# Если хотя бы одного поля нет — делаем явный supplementary-запрос
if not cover_url or not description or not tags:
slug = _manga_slug_from_url(manga_url)
referer = _base_url(manga_url) + "/"
api_url = (
f"https://api.cdnlibs.org/api/manga/{slug}"
"?fields[]=summary&fields[]=tags&fields[]=cover"
)
try:
headers: dict = {"Referer": referer, "Accept": "application/json"}
if auth_token:
headers["Authorization"] = f"Bearer {auth_token}"
resp = await page.context.request.get(api_url, headers=headers)
if resp.ok:
body = await resp.body()
data = _json.loads(body).get("data", {})
extra_cover, extra_desc, extra_tags = _extract_from_data(data)
if not cover_url:
cover_url = extra_cover
if not description:
description = extra_desc
if not tags:
tags = extra_tags
logger.debug("Supplementary API: cover={}, desc_len={}, tags={}",
bool(cover_url), len(description), len(tags))
except Exception as e:
logger.debug("Supplementary API error: {}", e)
return cover_url, description, tags
async def _detect_server(page: Page, servers_list: list[str]) -> str: async def _detect_server(page: Page, servers_list: list[str]) -> str:
"""Определяет CDN-сервер из img src на странице или из constants API.""" """Определяет CDN-сервер из img src на странице или из constants API."""
try: try:

View File

@@ -47,6 +47,8 @@ class ReadmangaSource:
description = await _extract_description(page) description = await _extract_description(page)
genres = await _extract_genres(page) genres = await _extract_genres(page)
tags = await _extract_tags(page)
cover_url = await _get_cover_url(page)
await _expand_chapters(page) await _expand_chapters(page)
chapters = await _extract_chapters(page) chapters = await _extract_chapters(page)
@@ -63,6 +65,8 @@ class ReadmangaSource:
title_full=title_full, title_full=title_full,
description=description, description=description,
genres=genres, genres=genres,
tags=tags,
cover_url=cover_url,
) )
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
@@ -474,6 +478,18 @@ async def _extract_description(page: Page) -> str:
try: try:
result = await page.evaluate(""" result = await page.evaluate("""
() => { () => {
// Приоритетный селектор — новый сайт ReadManga
const crDesc = document.querySelector('.cr-description__content');
if (crDesc) {
const parts = [];
crDesc.querySelectorAll('p, span, div').forEach(el => {
const t = el.textContent.trim();
if (t) parts.push(t);
});
if (parts.length) return parts.join(' ');
const t = crDesc.textContent.trim();
if (t) return t;
}
const selectors = [ const selectors = [
'.manga-description', '.elem_descr .value', '.manga-description', '.elem_descr .value',
'#tab-description .description-text', '.description', '#tab-description .description-text', '.description',
@@ -491,6 +507,42 @@ async def _extract_description(page: Page) -> str:
return "" return ""
async def _extract_tags(page: Page) -> list[str]:
try:
result = await page.evaluate("""
() => {
const crTags = document.querySelector('.cr-tags');
if (crTags) {
const els = crTags.querySelectorAll('a, span, li');
if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
const t = crTags.textContent.trim();
if (t) return t.split(/[,;]/).map(s => s.trim()).filter(Boolean);
}
return [];
}
""")
return result or []
except Exception:
return []
async def _get_cover_url(page: Page) -> str:
try:
result = await page.evaluate("""
() => {
const wrapper = document.querySelector('.cr-hero-poster-wrapper');
if (wrapper) {
const img = wrapper.querySelector('img');
if (img) return img.src || img.dataset.src || '';
}
return '';
}
""")
return (result or "").strip()
except Exception:
return ""
async def _extract_genres(page: Page) -> list[str]: async def _extract_genres(page: Page) -> list[str]:
try: try:
result = await page.evaluate(""" result = await page.evaluate("""

View File

@@ -160,6 +160,9 @@ class StateDB:
("mangas", "added_by", "INTEGER REFERENCES users(id)"), ("mangas", "added_by", "INTEGER REFERENCES users(id)"),
("mangas", "last_error", "TEXT"), ("mangas", "last_error", "TEXT"),
("users", "is_env_admin", "INTEGER NOT NULL DEFAULT 0"), ("users", "is_env_admin", "INTEGER NOT NULL DEFAULT 0"),
("mangas", "description", "TEXT"),
("mangas", "tags", "TEXT"),
("mangas", "cover_url", "TEXT"),
] ]
for table, col, typedef in migrations: for table, col, typedef in migrations:
try: try:
@@ -370,11 +373,16 @@ class StateDB:
def update_manga_info(self, url: str, title: str, chapters_total: int, def update_manga_info(self, url: str, title: str, chapters_total: int,
title_ru: str = "", title_full: str = "", title_ru: str = "", title_full: str = "",
pub_status: str = "unknown"): pub_status: str = "unknown",
description: str = "", tags: str = "",
cover_url: str = ""):
self.conn.execute(""" self.conn.execute("""
UPDATE mangas SET title=?, title_ru=?, title_full=?, pub_status=?, UPDATE mangas SET title=?, title_ru=?, title_full=?, pub_status=?,
chapters_total=?, updated_at=? WHERE url=? chapters_total=?, updated_at=?,
""", (title, title_ru, title_full, pub_status, chapters_total, _now(), url)) description=?, tags=?, cover_url=?
WHERE url=?
""", (title, title_ru, title_full, pub_status, chapters_total, _now(),
description or None, tags or None, cover_url or None, url))
self.conn.commit() self.conn.commit()
def set_folder_name(self, url: str, folder_name: str): def set_folder_name(self, url: str, folder_name: str):

View File

@@ -91,14 +91,15 @@ async def download_manga(
await emit({"type": "auth_required", "url": url, await emit({"type": "auth_required", "url": url,
"source_slug": e.source_slug, "finished_at": finished_ts}) "source_slug": e.source_slug, "finished_at": finished_ts})
return return
await info_page.close()
if not manga: if not manga:
await info_page.close()
await db_call(db.update_manga_status, url, "failed") await db_call(db.update_manga_status, url, "failed")
await emit({"type": "manga_failed", "url": url, await emit({"type": "manga_failed", "url": url,
"error": "Не удалось получить информацию о манге"}) "error": "Не удалось получить информацию о манге"})
return return
import json as _json_mod
await db_call( await db_call(
db.update_manga_info, db.update_manga_info,
url, url,
@@ -107,6 +108,9 @@ async def download_manga(
title_ru=manga.title_ru, title_ru=manga.title_ru,
title_full=manga.title_full, title_full=manga.title_full,
pub_status=manga.pub_status, pub_status=manga.pub_status,
description=manga.description,
tags=_json_mod.dumps(manga.tags, ensure_ascii=False) if manga.tags else "",
cover_url=manga.cover_url,
) )
await emit({ await emit({
"type": "manga_info", "type": "manga_info",
@@ -127,6 +131,12 @@ async def download_manga(
manga_dir = output_dir / folder_name manga_dir = output_dir / folder_name
manga_dir.mkdir(parents=True, exist_ok=True) manga_dir.mkdir(parents=True, exist_ok=True)
# Скачиваем обложку для CBZ-формата (info_page ещё открыта — контекст браузера жив)
if manga.cover_url and fmt in ("cbz", "all"):
await _download_cover(manga.cover_url, manga_dir, url, info_page)
await info_page.close()
for ch in manga.chapters: for ch in manga.chapters:
await db_call(db.upsert_chapter, url, ch.url, ch.title, ch.number, ch.volume) await db_call(db.upsert_chapter, url, ch.url, ch.title, ch.number, ch.volume)
@@ -250,6 +260,7 @@ async def download_manga(
source_url=url, source_url=url,
summary=manga.description, summary=manga.description,
genre=", ".join(manga.genres) if manga.genres else "", genre=", ".join(manga.genres) if manga.genres else "",
tags=", ".join(manga.tags) if manga.tags else "",
) )
for f in formats: for f in formats:
out_file = manga_dir / f"{ch_name}.{f}" out_file = manga_dir / f"{ch_name}.{f}"
@@ -350,6 +361,43 @@ async def download_manga(
db.close() db.close()
def _cover_ext_from_url(url: str) -> str:
import re as _re
m = _re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, _re.IGNORECASE)
if m:
ext = m.group(1).lower()
return ".jpg" if ext == "jpeg" else f".{ext}"
return ".jpg"
async def _download_cover(cover_url: str, manga_dir: Path, manga_url: str, page) -> Optional[Path]:
"""Скачивает обложку в manga_dir/cover.{ext}. Использует существующий Playwright page."""
from urllib.parse import urlparse as _urlparse
try:
parsed = _urlparse(manga_url)
referer = f"{parsed.scheme}://{parsed.netloc}/"
headers = {
"Accept": "image/png,image/jpeg,image/webp,image/*,*/*",
"Referer": referer,
}
response = await page.context.request.get(cover_url, headers=headers)
if not response.ok:
logger.warning("Обложка: HTTP {} для {}", response.status, cover_url)
return None
body = await response.body()
if len(body) < 500:
logger.warning("Обложка: слишком малый ответ ({} байт)", len(body))
return None
ext = _cover_ext_from_url(cover_url)
cover_path = manga_dir / f"cover{ext}"
cover_path.write_bytes(body)
logger.info("Обложка сохранена: {} ({} байт)", cover_path.name, len(body))
return cover_path
except Exception as e:
logger.warning("Ошибка скачивания обложки {}: {}", cover_url, e)
return None
async def check_for_updates( async def check_for_updates(
url: str, url: str,
on_event: Optional[Callable] = None, on_event: Optional[Callable] = None,
@@ -390,11 +438,12 @@ async def check_for_updates(
async with BrowserManager(headless=True) as bm: async with BrowserManager(headless=True) as bm:
_, page = await bm.new_page() _, page = await bm.new_page()
manga = await source.get_manga_info(page, url) manga = await source.get_manga_info(page, url)
await page.close()
if not manga: if not manga:
await page.close()
return [] return []
# Обновляем pub_status и количество глав import json as _json_mod
# Обновляем pub_status, количество глав и мета-поля
await db_call( await db_call(
db.update_manga_info, db.update_manga_info,
url, url,
@@ -403,8 +452,25 @@ async def check_for_updates(
title_ru=manga.title_ru, title_ru=manga.title_ru,
title_full=manga.title_full, title_full=manga.title_full,
pub_status=manga.pub_status, pub_status=manga.pub_status,
description=manga.description,
tags=_json_mod.dumps(manga.tags, ensure_ascii=False) if manga.tags else "",
cover_url=manga.cover_url,
) )
# Обновляем обложку если манга сохраняется как cbz
manga_row = await db_call(db.get_manga, url)
manga_fmt = (manga_row or {}).get("format", "cbz")
if manga.cover_url and manga_fmt in ("cbz", "all"):
folder_name = (
(manga_row.get("folder_name") if manga_row else None)
or safe_name(manga.title_ru or manga.title)
)
manga_dir = OUTPUT_DIR / folder_name
if manga_dir.exists():
await _download_cover(manga.cover_url, manga_dir, url, page)
await page.close()
# Находим главы которых ещё нет в БД # Находим главы которых ещё нет в БД
known = {ch["chapter_url"] for ch in await db_call(db.get_all_chapters, url)} known = {ch["chapter_url"] for ch in await db_call(db.get_all_chapters, url)}
new_chapters = [ch for ch in manga.chapters if ch.url not in known] new_chapters = [ch for ch in manga.chapters if ch.url not in known]