upd
This commit is contained in:
53
src/api.py
53
src/api.py
@@ -682,6 +682,12 @@ async def refresh_meta(url: str, current_user: dict = Depends(get_current_user))
|
||||
def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_status: str) -> tuple[int, int]:
|
||||
updated = failed = 0
|
||||
url = manga["url"]
|
||||
summary = manga.get("description") or ""
|
||||
tags_raw = manga.get("tags") or ""
|
||||
try:
|
||||
tags_str = ", ".join(json.loads(tags_raw)) if tags_raw else ""
|
||||
except Exception:
|
||||
tags_str = ""
|
||||
for ch in chapters:
|
||||
for fmt_col in ("output_cbz", "output_pdf", "output_epub"):
|
||||
fpath = ch.get(fmt_col)
|
||||
@@ -699,6 +705,8 @@ def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_statu
|
||||
chapters_total=chapters_total,
|
||||
pub_status=pub_status,
|
||||
source_url=url,
|
||||
summary=summary,
|
||||
tags=tags_str,
|
||||
)
|
||||
if patch_meta(p, meta):
|
||||
updated += 1
|
||||
@@ -706,6 +714,43 @@ def _patch_meta_sync(manga: dict, chapters: list, chapters_total: int, pub_statu
|
||||
failed += 1
|
||||
return updated, failed
|
||||
|
||||
def _refresh_cover_sync(manga: dict, manga_dir: Path) -> None:
|
||||
"""Скачивает или обновляет обложку через urllib (синхронно, для asyncio.to_thread)."""
|
||||
import urllib.request as _urllib_req
|
||||
import re as _re
|
||||
|
||||
cover_url = manga.get("cover_url") or ""
|
||||
if not cover_url:
|
||||
return
|
||||
|
||||
# Определяем Referer по URL обложки (MangaLib CDN — cdnlibs / mangalib)
|
||||
if any(pat in cover_url for pat in ("mangalib", "cdnlibs", "imglib")):
|
||||
referer = "https://mangalib.me/"
|
||||
else:
|
||||
from urllib.parse import urlparse as _up
|
||||
parsed = _up(manga.get("url") or "")
|
||||
referer = f"{parsed.scheme}://{parsed.netloc}/" if parsed.netloc else "https://readmanga.ru/"
|
||||
|
||||
try:
|
||||
req = _urllib_req.Request(cover_url, headers={
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/124.0.0.0",
|
||||
"Referer": referer,
|
||||
"Accept": "image/png,image/jpeg,image/webp,image/*,*/*",
|
||||
})
|
||||
with _urllib_req.urlopen(req, timeout=30) as resp:
|
||||
body = resp.read()
|
||||
if len(body) < 500:
|
||||
logger.warning("refresh_cover: слишком малый ответ ({} байт)", len(body))
|
||||
return
|
||||
m = _re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", cover_url, _re.IGNORECASE)
|
||||
ext = ("." + (m.group(1).lower() if m else "jpg")).replace(".jpeg", ".jpg")
|
||||
cover_path = manga_dir / f"cover{ext}"
|
||||
cover_path.write_bytes(body)
|
||||
logger.info("Обложка обновлена: {} ({} байт)", cover_path.name, len(body))
|
||||
except Exception as e:
|
||||
logger.warning("refresh_cover error {}: {}", cover_url, e)
|
||||
|
||||
|
||||
async def _do_refresh_meta(url: str):
|
||||
db = StateDB()
|
||||
try:
|
||||
@@ -721,6 +766,14 @@ async def _do_refresh_meta(url: str):
|
||||
await ws_manager.broadcast({"type": "meta_refresh_started", "url": url})
|
||||
updated, failed = await asyncio.to_thread(_patch_meta_sync, manga, chapters, chapters_total, pub_status)
|
||||
logger.info("refresh_meta {}: обновлено {}, ошибок {}", url, updated, failed)
|
||||
|
||||
# Обновляем обложку если у манги формат cbz
|
||||
manga_fmt = manga.get("format", "cbz") or "cbz"
|
||||
if manga_fmt in ("cbz", "all") and manga.get("cover_url"):
|
||||
manga_dir = _manga_folder(manga)
|
||||
if manga_dir.exists():
|
||||
await asyncio.to_thread(_refresh_cover_sync, manga, manga_dir)
|
||||
|
||||
await ws_manager.broadcast({"type": "meta_refreshed", "url": url,
|
||||
"updated": updated, "failed": failed})
|
||||
except Exception as e:
|
||||
|
||||
@@ -26,6 +26,7 @@ class MangaMeta:
|
||||
language: str = "ru"
|
||||
summary: str = "" # Описание/синопсис серии
|
||||
genre: str = "" # Жанры через запятую (для ComicInfo Genre)
|
||||
tags: str = "" # Теги через запятую (для ComicInfo Tags)
|
||||
series_group: str = "" # Группа/коллекция (для ComicInfo SeriesGroup)
|
||||
|
||||
|
||||
@@ -89,6 +90,7 @@ def _make_comic_info(meta: MangaMeta) -> str:
|
||||
add("Count", meta.chapters_total)
|
||||
|
||||
add("Genre", meta.genre)
|
||||
add("Tags", meta.tags)
|
||||
add("LanguageISO", meta.language)
|
||||
|
||||
# Manga = YesAndRightToLeft — стандартная японская манга
|
||||
|
||||
@@ -37,6 +37,8 @@ class MangaInfo:
|
||||
title_full: str = ""
|
||||
description: str = ""
|
||||
genres: list[str] = field(default_factory=list)
|
||||
tags: list[str] = field(default_factory=list)
|
||||
cover_url: str = ""
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
|
||||
@@ -134,6 +134,18 @@ class MangalibSource:
|
||||
description = await _extract_description(page)
|
||||
genres = await _extract_genres(page)
|
||||
|
||||
# Получаем обложку, описание и теги из API
|
||||
async with lock:
|
||||
manga_meta_for_extras = dict(manga_api_data)
|
||||
|
||||
cover_url, extra_description, tags = await _fetch_extra_meta(
|
||||
page, manga_meta_for_extras, url, self.auth_token
|
||||
)
|
||||
if extra_description:
|
||||
description = extra_description
|
||||
if not description:
|
||||
description = await _extract_description(page)
|
||||
|
||||
async with lock:
|
||||
raw_chapters = list(chapters_api_data)
|
||||
|
||||
@@ -154,6 +166,8 @@ class MangalibSource:
|
||||
title_full=title_full,
|
||||
description=description,
|
||||
genres=genres,
|
||||
tags=tags,
|
||||
cover_url=cover_url,
|
||||
)
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
@@ -611,6 +625,85 @@ async def _extract_genres(page: Page) -> list[str]:
|
||||
return []
|
||||
|
||||
|
||||
def _parse_summary_doc(doc) -> str:
|
||||
"""Конвертирует ProseMirror JSON-документ в plain text."""
|
||||
if not doc or not isinstance(doc, dict):
|
||||
return ""
|
||||
if doc.get("type") == "text":
|
||||
return doc.get("text", "")
|
||||
parts = []
|
||||
for node in doc.get("content", []):
|
||||
text = _parse_summary_doc(node)
|
||||
if text:
|
||||
parts.append(text)
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
async def _fetch_extra_meta(
|
||||
page: Page,
|
||||
manga_api_data: dict,
|
||||
manga_url: str,
|
||||
auth_token: str | None,
|
||||
) -> tuple[str, str, list[str]]:
|
||||
"""
|
||||
Возвращает (cover_url, description, tags) из уже полученных данных API или,
|
||||
если нужных полей нет, делает явный supplementary-запрос к API.
|
||||
"""
|
||||
def _extract_from_data(data: dict) -> tuple[str, str, list[str]]:
|
||||
cover_url = ""
|
||||
cover_obj = data.get("cover")
|
||||
if isinstance(cover_obj, dict):
|
||||
cover_url = cover_obj.get("default") or cover_obj.get("thumbnail") or ""
|
||||
|
||||
description = ""
|
||||
summary = data.get("summary")
|
||||
if summary:
|
||||
if isinstance(summary, dict):
|
||||
description = _parse_summary_doc(summary).strip()
|
||||
elif isinstance(summary, str):
|
||||
description = summary.strip()
|
||||
|
||||
tags: list[str] = []
|
||||
for t in data.get("tags") or []:
|
||||
name = (t.get("name") or t.get("label") or "").strip()
|
||||
if name:
|
||||
tags.append(name)
|
||||
|
||||
return cover_url, description, tags
|
||||
|
||||
cover_url, description, tags = _extract_from_data(manga_api_data)
|
||||
|
||||
# Если хотя бы одного поля нет — делаем явный supplementary-запрос
|
||||
if not cover_url or not description or not tags:
|
||||
slug = _manga_slug_from_url(manga_url)
|
||||
referer = _base_url(manga_url) + "/"
|
||||
api_url = (
|
||||
f"https://api.cdnlibs.org/api/manga/{slug}"
|
||||
"?fields[]=summary&fields[]=tags&fields[]=cover"
|
||||
)
|
||||
try:
|
||||
headers: dict = {"Referer": referer, "Accept": "application/json"}
|
||||
if auth_token:
|
||||
headers["Authorization"] = f"Bearer {auth_token}"
|
||||
resp = await page.context.request.get(api_url, headers=headers)
|
||||
if resp.ok:
|
||||
body = await resp.body()
|
||||
data = _json.loads(body).get("data", {})
|
||||
extra_cover, extra_desc, extra_tags = _extract_from_data(data)
|
||||
if not cover_url:
|
||||
cover_url = extra_cover
|
||||
if not description:
|
||||
description = extra_desc
|
||||
if not tags:
|
||||
tags = extra_tags
|
||||
logger.debug("Supplementary API: cover={}, desc_len={}, tags={}",
|
||||
bool(cover_url), len(description), len(tags))
|
||||
except Exception as e:
|
||||
logger.debug("Supplementary API error: {}", e)
|
||||
|
||||
return cover_url, description, tags
|
||||
|
||||
|
||||
async def _detect_server(page: Page, servers_list: list[str]) -> str:
|
||||
"""Определяет CDN-сервер из img src на странице или из constants API."""
|
||||
try:
|
||||
|
||||
@@ -47,6 +47,8 @@ class ReadmangaSource:
|
||||
|
||||
description = await _extract_description(page)
|
||||
genres = await _extract_genres(page)
|
||||
tags = await _extract_tags(page)
|
||||
cover_url = await _get_cover_url(page)
|
||||
|
||||
await _expand_chapters(page)
|
||||
chapters = await _extract_chapters(page)
|
||||
@@ -63,6 +65,8 @@ class ReadmangaSource:
|
||||
title_full=title_full,
|
||||
description=description,
|
||||
genres=genres,
|
||||
tags=tags,
|
||||
cover_url=cover_url,
|
||||
)
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
@@ -474,6 +478,18 @@ async def _extract_description(page: Page) -> str:
|
||||
try:
|
||||
result = await page.evaluate("""
|
||||
() => {
|
||||
// Приоритетный селектор — новый сайт ReadManga
|
||||
const crDesc = document.querySelector('.cr-description__content');
|
||||
if (crDesc) {
|
||||
const parts = [];
|
||||
crDesc.querySelectorAll('p, span, div').forEach(el => {
|
||||
const t = el.textContent.trim();
|
||||
if (t) parts.push(t);
|
||||
});
|
||||
if (parts.length) return parts.join(' ');
|
||||
const t = crDesc.textContent.trim();
|
||||
if (t) return t;
|
||||
}
|
||||
const selectors = [
|
||||
'.manga-description', '.elem_descr .value',
|
||||
'#tab-description .description-text', '.description',
|
||||
@@ -491,6 +507,42 @@ async def _extract_description(page: Page) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
async def _extract_tags(page: Page) -> list[str]:
|
||||
try:
|
||||
result = await page.evaluate("""
|
||||
() => {
|
||||
const crTags = document.querySelector('.cr-tags');
|
||||
if (crTags) {
|
||||
const els = crTags.querySelectorAll('a, span, li');
|
||||
if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
|
||||
const t = crTags.textContent.trim();
|
||||
if (t) return t.split(/[,;]/).map(s => s.trim()).filter(Boolean);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
""")
|
||||
return result or []
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
async def _get_cover_url(page: Page) -> str:
|
||||
try:
|
||||
result = await page.evaluate("""
|
||||
() => {
|
||||
const wrapper = document.querySelector('.cr-hero-poster-wrapper');
|
||||
if (wrapper) {
|
||||
const img = wrapper.querySelector('img');
|
||||
if (img) return img.src || img.dataset.src || '';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
""")
|
||||
return (result or "").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
async def _extract_genres(page: Page) -> list[str]:
|
||||
try:
|
||||
result = await page.evaluate("""
|
||||
|
||||
14
src/state.py
14
src/state.py
@@ -160,6 +160,9 @@ class StateDB:
|
||||
("mangas", "added_by", "INTEGER REFERENCES users(id)"),
|
||||
("mangas", "last_error", "TEXT"),
|
||||
("users", "is_env_admin", "INTEGER NOT NULL DEFAULT 0"),
|
||||
("mangas", "description", "TEXT"),
|
||||
("mangas", "tags", "TEXT"),
|
||||
("mangas", "cover_url", "TEXT"),
|
||||
]
|
||||
for table, col, typedef in migrations:
|
||||
try:
|
||||
@@ -370,11 +373,16 @@ class StateDB:
|
||||
|
||||
def update_manga_info(self, url: str, title: str, chapters_total: int,
|
||||
title_ru: str = "", title_full: str = "",
|
||||
pub_status: str = "unknown"):
|
||||
pub_status: str = "unknown",
|
||||
description: str = "", tags: str = "",
|
||||
cover_url: str = ""):
|
||||
self.conn.execute("""
|
||||
UPDATE mangas SET title=?, title_ru=?, title_full=?, pub_status=?,
|
||||
chapters_total=?, updated_at=? WHERE url=?
|
||||
""", (title, title_ru, title_full, pub_status, chapters_total, _now(), url))
|
||||
chapters_total=?, updated_at=?,
|
||||
description=?, tags=?, cover_url=?
|
||||
WHERE url=?
|
||||
""", (title, title_ru, title_full, pub_status, chapters_total, _now(),
|
||||
description or None, tags or None, cover_url or None, url))
|
||||
self.conn.commit()
|
||||
|
||||
def set_folder_name(self, url: str, folder_name: str):
|
||||
|
||||
@@ -91,14 +91,15 @@ async def download_manga(
|
||||
await emit({"type": "auth_required", "url": url,
|
||||
"source_slug": e.source_slug, "finished_at": finished_ts})
|
||||
return
|
||||
await info_page.close()
|
||||
|
||||
if not manga:
|
||||
await info_page.close()
|
||||
await db_call(db.update_manga_status, url, "failed")
|
||||
await emit({"type": "manga_failed", "url": url,
|
||||
"error": "Не удалось получить информацию о манге"})
|
||||
return
|
||||
|
||||
import json as _json_mod
|
||||
await db_call(
|
||||
db.update_manga_info,
|
||||
url,
|
||||
@@ -107,6 +108,9 @@ async def download_manga(
|
||||
title_ru=manga.title_ru,
|
||||
title_full=manga.title_full,
|
||||
pub_status=manga.pub_status,
|
||||
description=manga.description,
|
||||
tags=_json_mod.dumps(manga.tags, ensure_ascii=False) if manga.tags else "",
|
||||
cover_url=manga.cover_url,
|
||||
)
|
||||
await emit({
|
||||
"type": "manga_info",
|
||||
@@ -127,6 +131,12 @@ async def download_manga(
|
||||
manga_dir = output_dir / folder_name
|
||||
manga_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Скачиваем обложку для CBZ-формата (info_page ещё открыта — контекст браузера жив)
|
||||
if manga.cover_url and fmt in ("cbz", "all"):
|
||||
await _download_cover(manga.cover_url, manga_dir, url, info_page)
|
||||
|
||||
await info_page.close()
|
||||
|
||||
for ch in manga.chapters:
|
||||
await db_call(db.upsert_chapter, url, ch.url, ch.title, ch.number, ch.volume)
|
||||
|
||||
@@ -250,6 +260,7 @@ async def download_manga(
|
||||
source_url=url,
|
||||
summary=manga.description,
|
||||
genre=", ".join(manga.genres) if manga.genres else "",
|
||||
tags=", ".join(manga.tags) if manga.tags else "",
|
||||
)
|
||||
for f in formats:
|
||||
out_file = manga_dir / f"{ch_name}.{f}"
|
||||
@@ -350,6 +361,43 @@ async def download_manga(
|
||||
db.close()
|
||||
|
||||
|
||||
def _cover_ext_from_url(url: str) -> str:
|
||||
import re as _re
|
||||
m = _re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, _re.IGNORECASE)
|
||||
if m:
|
||||
ext = m.group(1).lower()
|
||||
return ".jpg" if ext == "jpeg" else f".{ext}"
|
||||
return ".jpg"
|
||||
|
||||
|
||||
async def _download_cover(cover_url: str, manga_dir: Path, manga_url: str, page) -> Optional[Path]:
|
||||
"""Скачивает обложку в manga_dir/cover.{ext}. Использует существующий Playwright page."""
|
||||
from urllib.parse import urlparse as _urlparse
|
||||
try:
|
||||
parsed = _urlparse(manga_url)
|
||||
referer = f"{parsed.scheme}://{parsed.netloc}/"
|
||||
headers = {
|
||||
"Accept": "image/png,image/jpeg,image/webp,image/*,*/*",
|
||||
"Referer": referer,
|
||||
}
|
||||
response = await page.context.request.get(cover_url, headers=headers)
|
||||
if not response.ok:
|
||||
logger.warning("Обложка: HTTP {} для {}", response.status, cover_url)
|
||||
return None
|
||||
body = await response.body()
|
||||
if len(body) < 500:
|
||||
logger.warning("Обложка: слишком малый ответ ({} байт)", len(body))
|
||||
return None
|
||||
ext = _cover_ext_from_url(cover_url)
|
||||
cover_path = manga_dir / f"cover{ext}"
|
||||
cover_path.write_bytes(body)
|
||||
logger.info("Обложка сохранена: {} ({} байт)", cover_path.name, len(body))
|
||||
return cover_path
|
||||
except Exception as e:
|
||||
logger.warning("Ошибка скачивания обложки {}: {}", cover_url, e)
|
||||
return None
|
||||
|
||||
|
||||
async def check_for_updates(
|
||||
url: str,
|
||||
on_event: Optional[Callable] = None,
|
||||
@@ -390,11 +438,12 @@ async def check_for_updates(
|
||||
async with BrowserManager(headless=True) as bm:
|
||||
_, page = await bm.new_page()
|
||||
manga = await source.get_manga_info(page, url)
|
||||
await page.close()
|
||||
if not manga:
|
||||
await page.close()
|
||||
return []
|
||||
|
||||
# Обновляем pub_status и количество глав
|
||||
import json as _json_mod
|
||||
# Обновляем pub_status, количество глав и мета-поля
|
||||
await db_call(
|
||||
db.update_manga_info,
|
||||
url,
|
||||
@@ -403,8 +452,25 @@ async def check_for_updates(
|
||||
title_ru=manga.title_ru,
|
||||
title_full=manga.title_full,
|
||||
pub_status=manga.pub_status,
|
||||
description=manga.description,
|
||||
tags=_json_mod.dumps(manga.tags, ensure_ascii=False) if manga.tags else "",
|
||||
cover_url=manga.cover_url,
|
||||
)
|
||||
|
||||
# Обновляем обложку если манга сохраняется как cbz
|
||||
manga_row = await db_call(db.get_manga, url)
|
||||
manga_fmt = (manga_row or {}).get("format", "cbz")
|
||||
if manga.cover_url and manga_fmt in ("cbz", "all"):
|
||||
folder_name = (
|
||||
(manga_row.get("folder_name") if manga_row else None)
|
||||
or safe_name(manga.title_ru or manga.title)
|
||||
)
|
||||
manga_dir = OUTPUT_DIR / folder_name
|
||||
if manga_dir.exists():
|
||||
await _download_cover(manga.cover_url, manga_dir, url, page)
|
||||
|
||||
await page.close()
|
||||
|
||||
# Находим главы которых ещё нет в БД
|
||||
known = {ch["chapter_url"] for ch in await db_call(db.get_all_chapters, url)}
|
||||
new_chapters = [ch for ch in manga.chapters if ch.url not in known]
|
||||
|
||||
Reference in New Issue
Block a user