This commit is contained in:
2026-04-29 02:07:21 +03:00
parent ba6bfc5ed3
commit 0aa057c991
14 changed files with 4257 additions and 139 deletions

779
src/api.py Normal file
View File

@@ -0,0 +1,779 @@
"""
FastAPI веб-сервер: REST API + WebSocket для мониторинга загрузок манги.
"""
import asyncio
import os
import re
from pathlib import Path
from typing import List, Optional
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from loguru import logger
from .state import StateDB
from .worker import download_manga, check_for_updates
from .exporter import patch_meta, MangaMeta
OUTPUT_DIR = Path("/app/output")
FRONTEND_DIR = Path("/app/frontend")
app = FastAPI(title="Manga Downloader API")
# ── WebSocket менеджер ────────────────────────
class ConnectionManager:
def __init__(self):
self.active: set[WebSocket] = set()
async def connect(self, ws: WebSocket):
await ws.accept()
self.active.add(ws)
def disconnect(self, ws: WebSocket):
self.active.discard(ws)
async def broadcast(self, data: dict):
dead = set()
for ws in list(self.active):
try:
await ws.send_json(data)
except Exception:
dead.add(ws)
self.active -= dead
ws_manager = ConnectionManager()
# ── Очередь загрузки ─────────────────────────
download_queue: asyncio.Queue = asyncio.Queue()
# url → asyncio.Task текущей загрузки
active_tasks: dict[str, asyncio.Task] = {}
async def queue_worker():
"""Последовательно обрабатывает очередь загрузок. Перезапускается при краше."""
while True:
try:
await _queue_worker_loop()
except Exception as e:
logger.error("queue_worker упал, перезапускаю через 5 сек: {}", e)
await asyncio.sleep(5)
async def _queue_worker_loop():
while True:
job = await download_queue.get()
url = job["url"]
fmt = job.get("fmt", "cbz")
# Проверяем, не была ли манга остановлена пока стояла в очереди
skip = False
db = StateDB()
try:
m = db.get_manga(url)
if m and m["status"] == "stopped":
logger.info("Воркер: пропускаю остановленную {}", url)
skip = True
finally:
db.close()
if skip:
download_queue.task_done()
continue
logger.info("Воркер: начинаю скачивать {}", url)
dl_task = asyncio.create_task(download_manga(
url=url,
fmt=fmt,
is_update=job.get("is_update", False),
resume=job.get("resume", True),
on_event=ws_manager.broadcast,
))
active_tasks[url] = dl_task
try:
await dl_task
except asyncio.CancelledError:
logger.info("Воркер: загрузка прервана: {}", url)
_db = StateDB()
try:
current_status = _db.get_manga(url)
# Если статус уже "queued" — значит нас приоритизировали и поставили обратно
# в очередь; не перетираем на "stopped"
if current_status and current_status["status"] != "queued":
_db.update_manga_status(url, "stopped")
await ws_manager.broadcast({"type": "manga_stopped", "url": url})
else:
await ws_manager.broadcast({"type": "manga_queued", "url": url, "format": fmt})
finally:
_db.close()
except Exception as e:
logger.error("Воркер ошибка {}: {}", url, e)
finally:
active_tasks.pop(url, None)
download_queue.task_done()
@app.on_event("startup")
async def startup_event():
asyncio.create_task(queue_worker())
asyncio.create_task(update_scheduler())
# Восстанавливаем очередь из БД (незавершённые задачи)
db = StateDB()
try:
for manga in db.get_all_mangas():
if manga["status"] in ("queued", "downloading"):
db.update_manga_status(manga["url"], "queued")
await download_queue.put({"url": manga["url"], "fmt": manga["format"]})
logger.info("Восстановлено из очереди: {}", manga["url"])
finally:
db.close()
async def update_scheduler():
"""Периодически проверяет новые главы для манг с auto_update=1."""
interval_hours = float(os.getenv("UPDATE_INTERVAL_HOURS", "6"))
interval_sec = interval_hours * 3600
logger.info("Планировщик обновлений: каждые {} ч", interval_hours)
# Первый запуск — через 5 минут после старта
await asyncio.sleep(300)
while True:
await _run_auto_updates()
await asyncio.sleep(interval_sec)
async def _run_auto_updates():
"""Проверяет все манги с auto_update=1 на наличие новых глав."""
db = StateDB()
try:
candidates = db.get_autos()
finally:
db.close()
if not candidates:
return
logger.info("Авто-обновление: проверяем {} манг", len(candidates))
for manga in candidates:
url = manga["url"]
fmt = manga.get("format", "cbz")
try:
new_chapters = await check_for_updates(url, on_event=ws_manager.broadcast)
if new_chapters:
logger.info("Новых глав для {}: {}", url, len(new_chapters))
# Добавляем в очередь с флагом is_update
db2 = StateDB()
try:
status = db2.get_manga(url)
if status and status["status"] not in ("downloading", "queued"):
db2.update_manga_status(url, "queued")
finally:
db2.close()
await download_queue.put({"url": url, "fmt": fmt, "is_update": True})
await ws_manager.broadcast({
"type": "manga_queued",
"url": url,
"format": fmt,
"reason": "auto_update",
})
except Exception as e:
logger.error("Ошибка авто-обновления {}: {}", url, e)
# ── Вспомогательные функции ───────────────────
def _dir_size(path: Path) -> int:
"""Размер директории в байтах."""
if not path.exists():
return 0
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file())
def _format_size(bytes_val: int) -> str:
for unit in ("Б", "КБ", "МБ", "ГБ"):
if bytes_val < 1024:
return f"{bytes_val:.1f} {unit}"
bytes_val /= 1024
return f"{bytes_val:.1f} ТБ"
def _enrich_manga(m: dict, db: StateDB) -> dict:
"""Обогащает строку манги реальными счётчиками из таблицы chapters."""
title = m.get("title") or ""
safe_title = re.sub(r'[^\w\s\-]', '', title).strip().replace(" ", "_")[:80]
size_bytes = _dir_size(OUTPUT_DIR / safe_title)
ch_done_count = db.conn.execute(
"SELECT COUNT(*) FROM chapters WHERE manga_url=? AND status='done'",
(m["url"],)
).fetchone()[0]
ch_failed = db.conn.execute(
"SELECT COUNT(*) FROM chapters WHERE manga_url=? AND status='failed'",
(m["url"],)
).fetchone()[0]
ch_partial = db.conn.execute(
"""SELECT COUNT(*) FROM chapters
WHERE manga_url=? AND status='done'
AND pages_total > 0 AND pages_done < pages_total""",
(m["url"],)
).fetchone()[0]
return {
**m,
"chapters_done": ch_done_count,
"size_bytes": size_bytes,
"size_human": _format_size(size_bytes),
"queue_position": None,
"is_active": m["url"] in active_tasks,
"errors_count": ch_failed + ch_partial,
"started_at": m.get("started_at"),
"finished_at": m.get("finished_at"),
}
def _manga_detail(manga: dict, db: StateDB) -> dict:
url = manga["url"]
chapters = db.get_all_chapters(url)
# Определяем директорию манги
title = manga.get("title") or ""
safe_title = re.sub(r'[^\w\s\-]', '', title).strip().replace(" ", "_")[:80]
manga_dir = OUTPUT_DIR / safe_title
size_bytes = _dir_size(manga_dir)
# Файлы
files = []
if manga_dir.exists():
for f in sorted(manga_dir.iterdir()):
if f.is_file():
files.append({
"name": f.name,
"size": f.stat().st_size,
"size_human": _format_size(f.stat().st_size),
})
# ── Статистика ───────────────────────────
ch_done = [c for c in chapters if c["status"] == "done"]
ch_failed = [c for c in chapters if c["status"] == "failed"]
ch_pending = [c for c in chapters if c["status"] == "pending"]
total_pages_downloaded = sum(c.get("pages_done", 0) for c in chapters)
total_pages_expected = sum(c.get("pages_total", 0) for c in chapters if c.get("pages_total", 0) > 0)
# Частично скачанные (done, но pages_done < pages_total)
ch_partial = [
c for c in ch_done
if c.get("pages_total", 0) > 0 and c.get("pages_done", 0) < c.get("pages_total", 0)
]
# Сколько страниц потеряно в частичных
pages_missing = sum(
c.get("pages_total", 0) - c.get("pages_done", 0)
for c in ch_partial
)
errors = []
for c in ch_failed:
errors.append({**c, "error_type": "failed", "error_label": "Глава не загружена"})
for c in ch_partial:
missing = c.get("pages_total", 0) - c.get("pages_done", 0)
errors.append({**c, "error_type": "partial",
"error_label": f"Частичная загрузка: пропущено {missing} стр."})
# Сортируем: сначала failed, потом partial, внутри — по номеру
errors.sort(key=lambda c: (0 if c["error_type"] == "failed" else 1, c.get("number", 0)))
stats = {
"chapters_done": len(ch_done),
"chapters_failed": len(ch_failed),
"chapters_pending": len(ch_pending),
"chapters_partial": len(ch_partial),
"total_pages_downloaded": total_pages_downloaded,
"total_pages_expected": total_pages_expected,
"pages_missing": pages_missing,
"errors_count": len(errors),
}
return {
**manga,
"chapters": chapters,
"files": files,
"size_bytes": size_bytes,
"size_human": _format_size(size_bytes),
"files_count": len(files),
"stats": stats,
"errors": errors,
}
# ── REST API ──────────────────────────────────
class AddMangaRequest(BaseModel):
urls: List[str]
format: str = "cbz"
@app.get("/api/mangas")
async def list_mangas():
db = StateDB()
try:
mangas = db.get_all_mangas()
result = [_enrich_manga(m, db) for m in mangas]
# Добавляем позицию в очереди
queue_list = list(download_queue._queue) # type: ignore
for i, job in enumerate(queue_list):
for r in result:
if r["url"] == job["url"]:
r["queue_position"] = i + 1
return result
finally:
db.close()
@app.get("/api/mangas/detail")
async def manga_detail(url: str):
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
return _manga_detail(manga, db)
finally:
db.close()
@app.post("/api/queue")
async def add_to_queue(body: AddMangaRequest):
db = StateDB()
added = []
skipped = []
try:
for url in body.urls:
url = url.strip()
if not url:
continue
is_new = db.add_manga(url, body.format)
if is_new:
await download_queue.put({"url": url, "fmt": body.format})
added.append(url)
await ws_manager.broadcast({
"type": "manga_queued",
"url": url,
"format": body.format,
})
# Запускаем фоновую задачу предпросмотра (без Chromium — быстро)
asyncio.create_task(_fetch_preview(url))
else:
skipped.append(url)
finally:
db.close()
return {"added": added, "skipped": skipped}
async def _fetch_preview(url: str):
"""Быстро получает название и количество глав сразу после добавления."""
try:
from .browser import BrowserManager
from .scraper import get_manga_info
async with BrowserManager(headless=True) as bm:
_, page = await bm.new_page()
manga = await get_manga_info(page, url)
if not manga:
return
db = StateDB()
try:
db.update_manga_info(
url,
title=manga.title_ru or manga.title,
chapters_total=len(manga.chapters),
title_ru=manga.title_ru,
title_full=manga.title_full,
pub_status=manga.pub_status,
)
finally:
db.close()
await ws_manager.broadcast({
"type": "manga_preview",
"url": url,
"title": manga.title_ru or manga.title,
"title_ru": manga.title_ru,
"title_full": manga.title_full,
"pub_status": manga.pub_status,
"chapters_total": len(manga.chapters),
})
logger.info("Предпросмотр готов: {} ({} глав)", manga.title_ru or manga.title, len(manga.chapters))
except Exception as e:
logger.warning("Ошибка предпросмотра {}: {}", url, e)
@app.post("/api/mangas/auto_update")
async def toggle_auto_update(url: str, enabled: bool):
"""Включить/выключить авто-обновление для манги."""
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
db.set_auto_update(url, enabled)
await ws_manager.broadcast({
"type": "auto_update_changed",
"url": url,
"auto_update": enabled,
})
return {"ok": True, "auto_update": enabled}
finally:
db.close()
@app.post("/api/mangas/check_now")
async def check_now(url: str):
"""Немедленно проверить новые главы для конкретной манги."""
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
finally:
db.close()
asyncio.create_task(_check_and_queue(url))
return {"ok": True}
async def _check_and_queue(url: str):
db = StateDB()
try:
manga = db.get_manga(url)
fmt = manga["format"] if manga else "cbz"
finally:
db.close()
new = await check_for_updates(url, on_event=ws_manager.broadcast)
if new:
db2 = StateDB()
try:
db2.update_manga_status(url, "queued")
finally:
db2.close()
await download_queue.put({"url": url, "fmt": fmt, "is_update": True})
@app.get("/api/news")
async def get_news(limit: int = 100):
"""Только скачанные и автодокаченные главы — для вкладки Новости."""
db = StateDB()
try:
cur = db.conn.execute("""
SELECT h.*, m.title as manga_title, m.title_ru
FROM history h LEFT JOIN mangas m ON h.manga_url = m.url
WHERE h.event_type IN ('downloaded', 'auto_downloaded')
ORDER BY h.created_at DESC LIMIT ?
""", (limit,))
return [dict(r) for r in cur.fetchall()]
finally:
db.close()
@app.get("/api/history")
async def get_history(limit: int = 100, manga_url: str = ""):
db = StateDB()
try:
return db.get_history(limit=limit, manga_url=manga_url)
finally:
db.close()
@app.post("/api/mangas/prioritize")
async def prioritize_manga(url: str):
"""Поместить мангу в начало очереди, прервав текущую загрузку и вернув её следом."""
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
if manga["status"] == "downloading" and url in active_tasks:
return {"ok": True, "message": "Уже загружается"}
fmt = manga["format"] or "cbz"
# 1. Убираем target из очереди если там уже есть
items = list(download_queue._queue) # type: ignore
items = [i for i in items if i["url"] != url]
download_queue._queue.clear() # type: ignore
for item in items:
download_queue._queue.append(item) # type: ignore
# 2. Текущая активная загрузка
current_url = next(iter(active_tasks), None)
if current_url and current_url != url:
cur_manga = db.get_manga(current_url)
cur_fmt = cur_manga["format"] if cur_manga else "cbz"
# Помечаем как queued — воркер увидит это и не поставит stopped
db.update_manga_status(current_url, "queued")
# Вставляем обратно на второе место (сразу после target)
download_queue._queue.appendleft({"url": current_url, "fmt": cur_fmt}) # type: ignore
# Отменяем задачу — воркер сразу перейдёт к следующему элементу (target)
task = active_tasks.get(current_url)
if task and not task.done():
task.cancel()
# 3. Вставляем target в самое начало
download_queue._queue.appendleft({"url": url, "fmt": fmt}) # type: ignore
db.update_manga_status(url, "queued")
logger.info("Приоритет: {} → начало очереди (вытеснен: {})", url, current_url)
await ws_manager.broadcast({
"type": "manga_prioritized",
"url": url,
"preempted_url": current_url,
})
return {"ok": True}
finally:
db.close()
@app.post("/api/mangas/retry_errors")
async def retry_errors(url: str):
"""Сбросить статус failed/partial глав на pending для повторной загрузки."""
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
# Сбрасываем failed
db.conn.execute(
"UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=? WHERE manga_url=? AND status='failed'",
(db.conn.execute("SELECT datetime('now')").fetchone()[0], url)
)
# Сбрасываем partial (done, но страниц скачано меньше)
db.conn.execute(
"""UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=?
WHERE manga_url=? AND status='done' AND pages_total > 0 AND pages_done < pages_total""",
(db.conn.execute("SELECT datetime('now')").fetchone()[0], url)
)
db.conn.commit()
return {"ok": True}
finally:
db.close()
@app.post("/api/mangas/refresh_meta")
async def refresh_meta(url: str):
"""Обновить метаданные (ComicInfo.xml / EPUB OPF / PDF XMP) во всех уже скачанных файлах."""
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
if manga["status"] == "downloading" and url in active_tasks:
raise HTTPException(status_code=400, detail="Манга сейчас загружается")
finally:
db.close()
asyncio.create_task(_do_refresh_meta(url))
return {"ok": True}
async def _do_refresh_meta(url: str):
"""Фоновая задача: обходит все скачанные файлы и обновляет метаданные."""
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
return
chapters = db.get_all_chapters(url)
chapters_total = len(chapters)
pub_status = manga.get("pub_status", "unknown") or "unknown"
updated = failed = 0
for ch in chapters:
for fmt_col, ext in (("output_cbz", ".cbz"), ("output_pdf", ".pdf"), ("output_epub", ".epub")):
fpath = ch.get(fmt_col)
if not fpath:
continue
p = Path(fpath)
if not p.exists():
continue
meta = MangaMeta(
series=manga.get("title_ru") or manga.get("title") or "",
series_full=manga.get("title_full") or "",
chapter_title=ch.get("title") or "",
number=float(ch.get("number") or 0),
volume=int(ch.get("volume") or 0),
chapters_total=chapters_total,
pub_status=pub_status,
source_url=url,
)
if patch_meta(p, meta):
updated += 1
else:
failed += 1
logger.info("refresh_meta {}: обновлено {}, ошибок {}", url, updated, failed)
await ws_manager.broadcast({
"type": "meta_refreshed",
"url": url,
"updated": updated,
"failed": failed,
})
except Exception as e:
logger.error("_do_refresh_meta {}: {}", url, e)
finally:
db.close()
@app.post("/api/mangas/force_redownload")
async def force_redownload(url: str):
"""Сбросить все главы на pending и поставить мангу заново в очередь."""
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
if manga["status"] == "downloading" and url in active_tasks:
raise HTTPException(status_code=400, detail="Сначала остановите загрузку")
# Сбрасываем все главы на pending
db.conn.execute(
"UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=? WHERE manga_url=?",
(db.conn.execute("SELECT datetime('now')").fetchone()[0], url)
)
db.conn.commit()
# Ставим в очередь с resume=False — перекачает всё заново
db.update_manga_status(url, "queued")
await download_queue.put({"url": url, "fmt": manga["format"], "resume": False})
await ws_manager.broadcast({"type": "manga_queued", "url": url, "format": manga["format"]})
return {"ok": True}
finally:
db.close()
@app.post("/api/mangas/stop")
async def stop_manga(url: str):
"""Остановить текущую загрузку манги."""
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
# Отменяем активную задачу если есть
task = active_tasks.get(url)
if task and not task.done():
task.cancel()
# Статус обновит воркер после CancelledError
else:
# Манга в очереди (ещё не начата) — просто помечаем как stopped
db.update_manga_status(url, "stopped")
await ws_manager.broadcast({"type": "manga_stopped", "url": url})
return {"ok": True}
finally:
db.close()
@app.post("/api/mangas/resume")
async def resume_manga(url: str):
"""Возобновить загрузку остановленной/упавшей манги."""
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
if manga["status"] == "downloading" and url in active_tasks:
raise HTTPException(status_code=400, detail="Манга уже загружается")
db.update_manga_status(url, "queued")
await download_queue.put({"url": url, "fmt": manga["format"]})
await ws_manager.broadcast({"type": "manga_queued", "url": url, "format": manga["format"]})
return {"ok": True}
finally:
db.close()
@app.delete("/api/mangas")
async def delete_manga(url: str, delete_files: bool = False):
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
if manga["status"] == "downloading" and url in active_tasks:
raise HTTPException(status_code=400, detail="Нельзя удалить активную загрузку")
deleted_size = 0
if delete_files:
title = manga.get("title") or ""
safe_title = re.sub(r'[^\w\s\-]', '', title).strip().replace(" ", "_")[:80]
manga_dir = OUTPUT_DIR / safe_title
if manga_dir.exists() and manga_dir.is_dir():
deleted_size = _dir_size(manga_dir)
import shutil
shutil.rmtree(str(manga_dir))
logger.info("Удалена папка: {} ({} байт)", manga_dir, deleted_size)
db.conn.execute("DELETE FROM chapters WHERE manga_url=?", (url,))
db.conn.execute("DELETE FROM history WHERE manga_url=?", (url,))
db.conn.execute("DELETE FROM mangas WHERE url=?", (url,))
db.conn.commit()
return {"ok": True, "deleted_size": deleted_size}
finally:
db.close()
@app.get("/api/stats")
async def global_stats():
db = StateDB()
try:
mangas = db.get_all_mangas()
total_size = _dir_size(OUTPUT_DIR)
return {
"mangas_total": len(mangas),
"mangas_done": sum(1 for m in mangas if m["status"] == "done"),
"mangas_downloading": sum(1 for m in mangas if m["status"] == "downloading"),
"mangas_queued": sum(1 for m in mangas if m["status"] == "queued"),
"mangas_failed": sum(1 for m in mangas if m["status"] == "failed"),
"mangas_stopped": sum(1 for m in mangas if m["status"] == "stopped"),
"queue_size": download_queue.qsize(),
"total_size_bytes": total_size,
"total_size_human": _format_size(total_size),
}
finally:
db.close()
# ── WebSocket ─────────────────────────────────
@app.websocket("/ws")
async def websocket_endpoint(ws: WebSocket):
await ws_manager.connect(ws)
try:
# Отправляем начальный снимок состояния
db = StateDB()
try:
mangas = db.get_all_mangas()
enriched = [_enrich_manga(m, db) for m in mangas]
# Добавляем позицию в очереди
queue_list = list(download_queue._queue) # type: ignore
for i, job in enumerate(queue_list):
for em in enriched:
if em["url"] == job["url"]:
em["queue_position"] = i + 1
await ws.send_json({"type": "snapshot", "mangas": enriched})
finally:
db.close()
while True:
# Держим соединение живым, ждём пинги
data = await ws.receive_text()
if data == "ping":
await ws.send_json({"type": "pong"})
except WebSocketDisconnect:
ws_manager.disconnect(ws)
except Exception:
ws_manager.disconnect(ws)
# ── Статические файлы (фронтенд) ──────────────
if FRONTEND_DIR.exists():
app.mount("/", StaticFiles(directory=str(FRONTEND_DIR), html=True), name="frontend")

View File

@@ -59,23 +59,26 @@ def cli(ctx, verbose):
help="Папка для сохранения", show_default=True)
@click.option("--resume/--no-resume", default=True,
help="Пропускать уже скачанные главы")
@click.option("--force", "-F", is_flag=True, default=False,
help="Игнорировать состояние и скачать заново, перезаписывая файлы")
@click.option("--concurrency", default=4, show_default=True,
help="Параллельных загрузок изображений")
@click.pass_context
def download(ctx, url, fmt, chapters, output, resume, concurrency):
def download(ctx, url, fmt, chapters, output, resume, force, concurrency):
"""Скачать мангу по URL страницы."""
asyncio.run(_download(
url=url,
fmt=fmt,
chapters_filter=chapters,
output_dir=Path(output),
resume=resume,
resume=resume and not force,
force=force,
concurrency=concurrency,
verbose=ctx.obj.get("verbose", False),
))
async def _download(url, fmt, chapters_filter, output_dir, resume, concurrency, verbose):
async def _download(url, fmt, chapters_filter, output_dir, resume, force, concurrency, verbose):
db = StateDB()
async with BrowserManager(headless=True) as bm:
@@ -106,8 +109,10 @@ async def _download(url, fmt, chapters_filter, output_dir, resume, concurrency,
for ch in chapters:
pbar.set_description(f"Глава {ch.number}: {ch.title[:30]}")
# Проверяем статус (resume)
if resume and db.chapter_status(ch.url) == "done":
# Проверяем статус (resume / force)
if force:
db.reset_chapter(ch.url)
elif resume and db.chapter_status(ch.url) == "done":
logger.info("Пропускаем (уже скачана): {}", ch.title)
pbar.update(1)
continue
@@ -116,7 +121,7 @@ async def _download(url, fmt, chapters_filter, output_dir, resume, concurrency,
bm=bm, ctx=ctx, ch=ch,
manga_url=url,
manga_dir=manga_dir, formats=formats,
concurrency=concurrency, db=db,
concurrency=concurrency, db=db, force=force,
)
pbar.update(1)
@@ -126,7 +131,7 @@ async def _download(url, fmt, chapters_filter, output_dir, resume, concurrency,
async def _process_chapter(bm, ctx, ch: Chapter, manga_url: str, manga_dir: Path,
formats: list, concurrency: int, db: StateDB):
formats: list, concurrency: int, db: StateDB, force: bool = False):
# Новая страница для каждой главы (чистый контекст)
ch_page = await ctx.new_page()
@@ -147,6 +152,10 @@ async def _process_chapter(bm, ctx, ch: Chapter, manga_url: str, manga_dir: Path
for fmt in formats:
out_file = manga_dir / f"{ch_name}.{fmt}"
# При --force удаляем старый файл перед перезаписью
if force and out_file.exists():
out_file.unlink()
logger.debug("Удалён старый файл: {}", out_file.name)
try:
export(image_paths, out_file, fmt, manga_dir.name, ch.title)
db.mark_done(ch.url, fmt, str(out_file))
@@ -243,3 +252,10 @@ if __name__ == "__main__":

View File

@@ -1,104 +1,278 @@
"""
Экспорт в CBZ, PDF, EPUB.
Экспорт в CBZ, PDF, EPUB с поддержкой метаданных для Komga.
"""
import zipfile
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from pathlib import Path
from typing import Literal
from typing import Literal, Optional
from loguru import logger
ExportFormat = Literal["cbz", "pdf", "epub"]
@dataclass
class MangaMeta:
"""Метаданные манги и главы для встраивания в файлы."""
series: str = "" # Название серии (title_ru)
series_full: str = "" # Полное название
chapter_title: str = "" # Название главы
number: float = 0.0 # Номер главы
volume: int = 0 # Том
chapters_total: int = 0 # Всего глав в серии (для completed)
pub_status: str = "unknown" # completed / ongoing / unknown
source_url: str = "" # URL источника
language: str = "ru"
summary: str = "" # Описание/синопсис серии
genre: str = "" # Жанры через запятую (для ComicInfo Genre)
series_group: str = "" # Группа/коллекция (для ComicInfo SeriesGroup)
def export(
image_paths: list[Path],
output_path: Path,
fmt: ExportFormat,
title: str = "Manga",
chapter: str = "",
meta: Optional[MangaMeta] = None,
):
# Строим meta из legacy-аргументов если не передан явно
if meta is None:
meta = MangaMeta(series=title, chapter_title=chapter)
output_path.parent.mkdir(parents=True, exist_ok=True)
logger.info("Экспортирую {} страниц → {} ({})", len(image_paths), output_path.name, fmt)
if fmt == "cbz":
_export_cbz(image_paths, output_path)
_export_cbz(image_paths, output_path, meta)
elif fmt == "pdf":
_export_pdf(image_paths, output_path)
_export_pdf(image_paths, output_path, meta)
elif fmt == "epub":
_export_epub(image_paths, output_path, title, chapter)
_export_epub(image_paths, output_path, meta)
else:
raise ValueError(f"Неизвестный формат: {fmt}")
logger.info("Сохранено: {}", output_path)
# ── CBZ ───────────────────────────────────────
# ── CBZ + ComicInfo.xml ───────────────────────
def _export_cbz(images: list[Path], out: Path):
def _make_comic_info(meta: MangaMeta) -> str:
"""Генерирует ComicInfo.xml по спецификации Anansi v2.1 (Komga-совместимый)."""
root = ET.Element("ComicInfo")
root.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
root.set("xsi:noNamespaceSchemaLocation",
"https://raw.githubusercontent.com/anansi-project/comicinfo/main/schema/v2.1/ComicInfo.xsd")
def add(tag: str, value):
if value is None:
return
s = str(value).strip()
if s:
ET.SubElement(root, tag).text = s
add("Series", meta.series)
add("Title", meta.chapter_title)
add("Summary", meta.summary)
# Номер главы: целое если без дроби, иначе float
if meta.number:
num_str = str(int(meta.number)) if meta.number == int(meta.number) else str(meta.number)
add("Number", num_str)
if meta.volume:
add("Volume", meta.volume)
# Count — только для завершённых серий
if meta.pub_status == "completed" and meta.chapters_total:
add("Count", meta.chapters_total)
add("Genre", meta.genre)
add("LanguageISO", meta.language)
# Manga = YesAndRightToLeft — стандартная японская манга
ET.SubElement(root, "Manga").text = "YesAndRightToLeft"
if meta.source_url:
add("Web", meta.source_url)
# SeriesGroup — Komga создаёт коллекцию с этим именем
if meta.series_group:
add("SeriesGroup", meta.series_group)
ET.indent(root, space=" ")
return '<?xml version="1.0" encoding="utf-8"?>\n' + ET.tostring(root, encoding="unicode")
def _export_cbz(images: list[Path], out: Path, meta: MangaMeta):
with zipfile.ZipFile(out, "w", compression=zipfile.ZIP_DEFLATED) as zf:
# ComicInfo.xml первым файлом — Komga ищет его в корне архива
zf.writestr("ComicInfo.xml", _make_comic_info(meta))
for i, img in enumerate(images):
zf.write(img, f"{i:04d}{img.suffix}")
# ── PDF ───────────────────────────────────────
def _export_pdf(images: list[Path], out: Path):
def _export_pdf(images: list[Path], out: Path, meta: MangaMeta):
try:
import img2pdf
with open(out, "wb") as f:
f.write(img2pdf.convert([str(p) for p in images]))
pdf_bytes = img2pdf.convert([str(p) for p in images])
out.write_bytes(pdf_bytes)
except Exception as e:
logger.warning("img2pdf не сработал ({}), использую Pillow", e)
_export_pdf_pillow(images, out)
# Записываем метаданные поверх готового PDF через pypdf
_patch_pdf_meta(out, meta)
def _export_pdf_pillow(images: list[Path], out: Path):
from PIL import Image
pil_images = []
for p in images:
img = Image.open(p).convert("RGB")
pil_images.append(img)
pil_images = [Image.open(p).convert("RGB") for p in images]
if pil_images:
pil_images[0].save(
out,
save_all=True,
append_images=pil_images[1:],
format="PDF",
)
pil_images[0].save(out, save_all=True, append_images=pil_images[1:], format="PDF")
def _patch_pdf_meta(pdf_path: Path, meta: MangaMeta):
"""Добавляет /Info и XMP метаданные в PDF через pypdf."""
try:
from pypdf import PdfReader, PdfWriter
import io
reader = PdfReader(str(pdf_path))
writer = PdfWriter()
writer.append(reader)
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
full_title = (f"{meta.series} — Том {meta.volume}, Глава {ch_num}"
if meta.volume else f"{meta.series} — Глава {ch_num}")
if meta.chapter_title:
full_title += f": {meta.chapter_title}"
# Стандартные PDF /Info поля
writer.add_metadata({
"/Title": full_title,
"/Subject": meta.series_full or meta.series,
"/Creator": "Manga Downloader",
"/Producer": "Manga Downloader",
})
# XMP-метаданные (Dublin Core + PDF) — Komga читает их при сканировании
xmp = _build_xmp(meta, full_title)
writer.add_metadata_xmp(xmp.encode("utf-8"))
buf = io.BytesIO()
writer.write(buf)
pdf_path.write_bytes(buf.getvalue())
except ImportError:
logger.debug("pypdf не установлен — PDF-метаданные пропущены")
except Exception as e:
logger.warning("Ошибка записи PDF-метаданных: {}", e)
def _build_xmp(meta: MangaMeta, full_title: str) -> str:
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
return f"""<?xpacket begin='\ufeff' id='W5M0MpCehiHzreSzNTczkc9d'?>
<x:xmpmeta xmlns:x='adobe:ns:meta/'>
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
<rdf:Description rdf:about=''
xmlns:dc='http://purl.org/dc/elements/1.1/'
xmlns:pdf='http://ns.adobe.com/pdf/1.3/'
xmlns:xmp='http://ns.adobe.com/xap/1.0/'>
<dc:title><rdf:Alt><rdf:li xml:lang='x-default'>{_xe(full_title)}</rdf:li></rdf:Alt></dc:title>
<dc:description><rdf:Alt><rdf:li xml:lang='x-default'>{_xe(meta.series_full or meta.series)}</rdf:li></rdf:Alt></dc:description>
<dc:language><rdf:Bag><rdf:li>{meta.language}</rdf:li></rdf:Bag></dc:language>
<dc:source>{_xe(meta.source_url)}</dc:source>
<pdf:Producer>Manga Downloader</pdf:Producer>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
<?xpacket end='w'?>"""
def _xe(s: str) -> str:
"""Экранирование для XML."""
return (s.replace("&", "&amp;").replace("<", "&lt;")
.replace(">", "&gt;").replace('"', "&quot;"))
# ── EPUB ──────────────────────────────────────
def _export_epub(images: list[Path], out: Path, title: str, chapter: str):
def _export_epub(images: list[Path], out: Path, meta: MangaMeta):
from ebooklib import epub
from PIL import Image
import base64
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
full_title = (f"{meta.series} — Том {meta.volume}, Глава {ch_num}"
if meta.volume else f"{meta.series} — Глава {ch_num}")
if meta.chapter_title:
full_title += f": {meta.chapter_title}"
book = epub.EpubBook()
book.set_identifier(f"manga-{title}-{chapter}".replace(" ", "-"))
book.set_title(f"{title}{chapter}" if chapter else title)
book.set_language("ru")
book.set_identifier(
f"manga-{meta.series}-v{meta.volume}-ch{meta.number}".replace(" ", "-")
)
book.set_title(full_title)
book.set_language(meta.language)
# Dublin Core — серия как subject
if meta.series:
book.add_metadata("DC", "subject", meta.series)
if meta.summary:
book.add_metadata("DC", "description", meta.summary)
elif meta.series_full:
book.add_metadata("DC", "description", meta.series_full)
if meta.source_url:
book.add_metadata("DC", "source", meta.source_url)
# Calibre-совместимые метаданные серии (читает Komga и большинство читалок)
book.add_metadata(None, "meta", "", {
"name": "calibre:series",
"content": meta.series,
})
book.add_metadata(None, "meta", "", {
"name": "calibre:series_index",
"content": str(float(meta.number)),
})
# EPUB3 belongs-to-collection (официальный стандарт, Komga ≥ 0.157)
book.add_metadata(None, "meta", meta.series, {
"property": "belongs-to-collection",
"id": "series-id",
})
book.add_metadata(None, "meta", "series", {
"refines": "#series-id",
"property": "collection-type",
})
book.add_metadata(None, "meta", str(float(meta.number)), {
"refines": "#series-id",
"property": "group-position",
})
# Если серия завершена — указываем общее количество томов
if meta.pub_status == "completed" and meta.chapters_total:
book.add_metadata("DC", "relation",
f"chapters_total:{meta.chapters_total}")
spine = ["nav"]
toc = []
for i, img_path in enumerate(images):
# Добавляем изображение в книгу
with open(img_path, "rb") as f:
img_data = f.read()
img_data = img_path.read_bytes()
img_name = f"images/page_{i:04d}{img_path.suffix}"
epub_img = epub.EpubImage()
epub_img.file_name = img_name
epub_img.media_type = _mime(img_path.suffix)
epub_img.content = img_data
book.add_item(epub_img)
# HTML-страница для каждого изображения
page_html = epub.EpubHtml(
title=f"Страница {i + 1}",
file_name=f"page_{i:04d}.xhtml",
lang="ru",
lang=meta.language,
)
page_html.content = (
f'<html><body style="margin:0;padding:0;">'
@@ -125,3 +299,110 @@ def _mime(ext: str) -> str:
".webp": "image/webp",
}.get(ext.lower(), "image/jpeg")
# ── Обновление метаданных в существующих файлах ──
def patch_meta(file_path: Path, meta: MangaMeta) -> bool:
"""
Обновляет метаданные в уже существующем файле без перескачивания.
Возвращает True при успехе.
"""
suffix = file_path.suffix.lower()
try:
if suffix == ".cbz":
_patch_cbz_meta(file_path, meta)
elif suffix == ".pdf":
_patch_pdf_meta(file_path, meta)
elif suffix == ".epub":
_patch_epub_meta(file_path, meta)
else:
logger.warning("patch_meta: неизвестный формат {}", suffix)
return False
return True
except Exception as e:
logger.error("patch_meta {}: {}", file_path.name, e)
return False
def _patch_cbz_meta(cbz_path: Path, meta: MangaMeta):
"""Заменяет или добавляет ComicInfo.xml в существующем CBZ."""
import shutil
tmp = cbz_path.with_suffix(".tmp.cbz")
try:
with zipfile.ZipFile(cbz_path, "r") as zin, \
zipfile.ZipFile(tmp, "w", compression=zipfile.ZIP_DEFLATED) as zout:
# Сначала ComicInfo.xml
zout.writestr("ComicInfo.xml", _make_comic_info(meta))
# Затем все остальные файлы (пропускаем старый ComicInfo.xml если был)
for item in zin.infolist():
if item.filename.lower() != "comicinfo.xml":
zout.writestr(item, zin.read(item.filename))
shutil.move(str(tmp), str(cbz_path))
except Exception:
if tmp.exists():
tmp.unlink()
raise
def _patch_epub_meta(epub_path: Path, meta: MangaMeta):
"""
Обновляет OPF-метаданные в существующем EPUB.
Перезаписывает content.opf с новыми dc:* и meta-тегами.
"""
import shutil
import re as _re
tmp = epub_path.with_suffix(".tmp.epub")
try:
with zipfile.ZipFile(epub_path, "r") as zin, \
zipfile.ZipFile(tmp, "w", compression=zipfile.ZIP_DEFLATED) as zout:
# Находим путь к OPF внутри EPUB
opf_path = None
if "META-INF/container.xml" in zin.namelist():
container_xml = zin.read("META-INF/container.xml").decode("utf-8")
m = _re.search(r'full-path=["\']([^"\']+\.opf)["\']', container_xml)
if m:
opf_path = m.group(1)
for item in zin.infolist():
data = zin.read(item.filename)
if opf_path and item.filename == opf_path:
data = _inject_opf_meta(data.decode("utf-8"), meta).encode("utf-8")
zout.writestr(item, data)
shutil.move(str(tmp), str(epub_path))
except Exception:
if tmp.exists():
tmp.unlink()
raise
def _inject_opf_meta(opf: str, meta: MangaMeta) -> str:
"""
Вставляет/заменяет calibre:series и belongs-to-collection в OPF-строку.
Удаляет старые вхождения и добавляет свежие перед </metadata>.
"""
import re as _re
# Удаляем старые calibre и belongs-to-collection мета-теги
opf = _re.sub(
r'<meta[^>]+(?:calibre:series|belongs-to-collection|collection-type|group-position)[^/]*/?>',
'', opf, flags=_re.IGNORECASE
)
# Удаляем старые refines на series-id
opf = _re.sub(r'<meta[^>]+refines=["\']#series-id["\'][^/]*/?>',
'', opf, flags=_re.IGNORECASE)
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
new_meta = (
f'\n <meta name="calibre:series" content="{_xe(meta.series)}"/>'
f'\n <meta name="calibre:series_index" content="{float(meta.number)}"/>'
f'\n <meta property="belongs-to-collection" id="series-id">{_xe(meta.series)}</meta>'
f'\n <meta refines="#series-id" property="collection-type">series</meta>'
f'\n <meta refines="#series-id" property="group-position">{float(meta.number)}</meta>'
)
opf = opf.replace("</metadata>", new_meta + "\n </metadata>")
return opf

View File

@@ -3,6 +3,7 @@
"""
import asyncio
import re
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
@@ -30,6 +31,11 @@ class MangaInfo:
title: str
url: str
chapters: list[Chapter] = field(default_factory=list)
pub_status: str = "unknown" # completed / ongoing / unknown
title_ru: str = "" # Только русский тайтл (для папки)
title_full: str = "" # Полный тайтл как на странице
description: str = "" # Описание/синопсис
genres: list[str] = field(default_factory=list) # Жанры
# ──────────────────────────────────────────────
@@ -43,9 +49,21 @@ async def get_manga_info(page: Page, url: str) -> Optional[MangaInfo]:
if not ok:
return None
title = await page.title()
title = re.sub(r"\s*[-|].*$", "", title).strip()
logger.info("Манга: {}", title)
title_full = await page.title()
title_full = re.sub(r"\s*[-|].*$", "", title_full).strip()
# Пробуем взять русский тайтл напрямую из DOM
title_ru = await _extract_ru_title_from_dom(page)
if not title_ru:
title_ru = _parse_ru_title(title_full)
logger.info("Манга: {} | ru: {}", title_full, title_ru)
pub_status = await _extract_pub_status(page)
logger.info("Статус выпуска: {}", pub_status)
description = await _extract_description(page)
genres = await _extract_genres(page)
await _expand_chapters(page)
chapters = await _extract_chapters(page)
@@ -53,7 +71,162 @@ async def get_manga_info(page: Page, url: str) -> Optional[MangaInfo]:
chapters = await _extract_chapters_alt(page)
logger.info("Найдено глав: {}", len(chapters))
return MangaInfo(title=title, url=url, chapters=chapters)
return MangaInfo(
title=title_ru or title_full,
url=url,
chapters=chapters,
pub_status=pub_status,
title_ru=title_ru,
title_full=title_full,
description=description,
genres=genres,
)
async def _extract_ru_title_from_dom(page: Page) -> str:
"""Ищет русский тайтл в структуре страницы readmanga."""
try:
result = await page.evaluate("""
() => {
// readmanga: основной тайтл в span.name внутри .names
const selectors = [
'.names .name',
'h1.manga-title',
'h1 .name',
'.name-block .name',
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el && el.textContent.trim()) return el.textContent.trim();
}
return '';
}
""")
return (result or "").strip()
except Exception:
return ""
def _parse_ru_title(full_title: str) -> str:
"""Извлекает русский тайтл из полной строки тайтла.
Примеры:
'Манга Режим — АД. Хардкорный геймер ... (Hellmode)''Режим — АД. Хардкорный геймер ...'
'Манга Магическая битва (Sorcery Fight) Гэгэ онлайн''Магическая битва'
'Авантюрист Monster Eater Adventurer''Авантюрист'
"""
t = full_title.strip()
# Убираем префикс "Манга "
t = re.sub(r'^Манга\s+', '', t).strip()
# Берём только до первой скобки (начало английского тайтла)
t = re.split(r'\s*[\(\[]', t)[0].strip()
# Убираем суффикс " онлайн"
t = re.sub(r'\s+онлайн\s*$', '', t, flags=re.IGNORECASE).strip()
# Обрезаем хвост из латинских слов.
# Правило: стоп только на токене содержащем латиницу (a-zA-Z).
# Пунктуация между кириллическими словами (—, , ., :, !) — сохраняем.
words = t.split()
result = []
for w in words:
if re.search(r'[а-яёА-ЯЁ]', w):
result.append(w)
elif re.search(r'[a-zA-Z]', w):
# Первое латинское слово после кириллических — обрезаем здесь
if result:
break
else:
# Чисто пунктуационный токен (—, , ., :, …)
# Добавляем только если уже есть кириллические слова (связка внутри)
if result:
result.append(w)
# Убираем висячую пунктуацию в конце (если последнее слово — не кириллица)
while result and not re.search(r'[а-яёА-ЯЁ]', result[-1]):
result.pop()
if result:
t = ' '.join(result)
return t
async def _extract_pub_status(page: Page) -> str:
"""Извлекает статус выпуска: completed / ongoing / unknown."""
try:
result = await page.evaluate("""
() => {
// readmanga хранит статус в .elem_status .value или похожих блоках
const statusSelectors = [
'.elem_status .value',
'.manga-info .status',
'[class*="status"] .value',
'.property .status',
];
for (const sel of statusSelectors) {
const el = document.querySelector(sel);
if (el) {
const t = el.textContent.toLowerCase();
if (t.includes('завершён') || t.includes('завершен') || t.includes('complete')) return 'completed';
if (t.includes('продолжает') || t.includes('ongoing')) return 'ongoing';
}
}
// Fallback: сканируем весь текст страницы
const bodyText = document.body ? document.body.innerText.toLowerCase() : '';
if (bodyText.includes('выпуск завершён') || bodyText.includes('выпуск завершен')) return 'completed';
if (bodyText.includes('продолжается')) return 'ongoing';
return 'unknown';
}
""")
return result or "unknown"
except Exception:
return "unknown"
async def _extract_description(page: Page) -> str:
"""Извлекает описание/синопсис манги."""
try:
result = await page.evaluate("""
() => {
const selectors = [
'.manga-description',
'.elem_descr .value',
'#tab-description .description-text',
'.description',
'[itemprop="description"]',
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el && el.textContent.trim()) return el.textContent.trim();
}
return '';
}
""")
return (result or "").strip()[:2000] # обрезаем до 2000 символов
except Exception:
return ""
async def _extract_genres(page: Page) -> list[str]:
"""Извлекает список жанров манги."""
try:
result = await page.evaluate("""
() => {
const selectors = [
'.elem_genre .value a',
'.genres a',
'[itemprop="genre"]',
'.genre-list a',
];
for (const sel of selectors) {
const els = document.querySelectorAll(sel);
if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
}
return [];
}
""")
return result or []
except Exception:
return []
async def _navigate(page: Page, url: str, retries: int = 3,
@@ -218,6 +391,7 @@ async def get_chapter_images_and_download(
chapter_url: str,
dest_dir: Path,
manga_url: str | None = None,
on_page: object = None,
) -> list[Path]:
"""
1. Открывает страницу главы (устанавливает DDoS-Guard cookies для CDN).
@@ -225,8 +399,11 @@ async def get_chapter_images_and_download(
3. Перехватывает img-запросы через page.route() + route.fetch()
(браузерный стек — правильные Sec-Fetch-* заголовки, cookies).
4. Пролистывает читалку клавишей ArrowRight чтобы загрузить все страницы.
5. Retry для страниц с timeout через JS fetch.
"""
logger.info("Загружаем главу: {}", chapter_url)
t_start = time.monotonic()
ch_id = chapter_url.split("/")[-1] # короткий идентификатор для логов
logger.info("[{}] Загружаем главу: {}", ch_id, chapter_url)
from urllib.parse import urlparse
parsed = urlparse(chapter_url)
@@ -240,22 +417,20 @@ async def get_chapter_images_and_download(
def _base(u: str) -> str:
return u.split("?")[0]
# CDN домены которые хостят изображения манги (не статику сайта)
CDN_RE = re.compile(r"(?<!\bstatic\b)(^|[./])one-way\.work|staticfa\.|cdnmanga|reimg", re.I)
IMG_RE = re.compile(r"\.(jpg|jpeg|png|webp)(\?|$)", re.I)
# Баннеры/рекламные изображения — игнорируем без логирования
BANNER_RE = re.compile(r"466_p\.|570_p\.|banner|advert", re.I)
# Более точный фильтр: только image-хосты, не resrmr/статика
def _is_manga_image(url: str) -> bool:
base = _base(url)
if not IMG_RE.search(base):
if not re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", base, re.I):
return False
# Исключаем статику сайта (логотипы, иконки, шрифты)
if "resrmr." in url or "/static/" in url:
return False
# Принимаем image CDN
return bool(re.search(r"one-way\.work|staticfa\.|rm\.one-way|cdnmanga|reimg", url, re.I))
captured: dict[str, bytes] = {} # base_url → bytes
captured: dict[str, bytes] = {} # base_url → bytes
route_errors: dict[str, str] = {} # base_url → текст ошибки
route_statuses: dict[str, int] = {} # base_url → HTTP status (не 200/206)
lock = asyncio.Lock()
async def route_handler(route, request):
@@ -264,23 +439,47 @@ async def get_chapter_images_and_download(
if not _is_manga_image(url):
await route.continue_()
return
# Уже есть — пропускаем
if BANNER_RE.search(base):
await route.continue_()
return
async with lock:
already = base in captured
if already:
await route.continue_()
return
fname = base.split("/")[-1]
try:
response = await route.fetch()
status = response.status
body = await response.body()
if body and len(body) > 500 and response.status in (200, 206):
if body and len(body) > 500 and status in (200, 206):
async with lock:
if base not in captured:
captured[base] = body
logger.debug("{}: {} байт", base.split("/")[-1], len(body))
logger.debug("[{}] {}: {} байт", ch_id, fname, len(body))
if on_page:
try:
asyncio.ensure_future(on_page(0, 0))
except Exception:
pass
else:
async with lock:
route_statuses[base] = status
if status not in (200, 206):
logger.warning("[{}] CDN HTTP {} для '{}' | {}",
ch_id, status, fname, base[-70:])
else:
logger.warning("[{}] Слишком мал ответ ({} байт) для '{}'",
ch_id, len(body), fname)
await route.fulfill(response=response)
except Exception as e:
logger.debug("route.fetch {}: {}", base[-40:], e)
err = str(e)
async with lock:
route_errors[base] = err
is_timeout = "timeout" in err.lower()
level = logger.warning if is_timeout else logger.warning
level("[{}] route.fetch {} '{}': {}",
ch_id, "timeout" if is_timeout else "ошибка", fname, err[:150])
try:
await route.continue_()
except Exception:
@@ -292,7 +491,7 @@ async def get_chapter_images_and_download(
ok = await _navigate(page, load_url, referer=referer)
if not ok:
await page.unroute("**/*", route_handler)
logger.error("Не удалось открыть главу: {}", chapter_url)
logger.error("[{}] Не удалось открыть главу после всех retry: {}", ch_id, chapter_url)
return []
# 2. Ждём readerInit
@@ -302,63 +501,165 @@ async def get_chapter_images_and_download(
".some(s => s.textContent.includes('readerInit'))",
timeout=15_000,
)
except Exception:
logger.debug("readerInit не появился за 15с")
except Exception as e:
logger.warning("[{}] readerInit не появился за 15с ({}). "
"Продолжаем через DOM-fallback.", ch_id, str(e)[:80])
# 3. Извлекаем список URL
image_urls = await _extract_images_from_js(page)
if not image_urls:
logger.debug("[{}] JS readerInit не дал URL, пробуем DOM-парсинг", ch_id)
image_urls = await _extract_images_from_dom(page)
if not image_urls:
await page.unroute("**/*", route_handler)
logger.error("Список изображений пуст: {}", chapter_url)
try:
page_info = await page.evaluate("() => document.title + ' | ' + location.href")
except Exception:
page_info = "?"
logger.error("[{}] Список изображений пуст. Текущая страница: {}", ch_id, page_info)
return []
logger.info("Найдено изображений: {}", len(image_urls))
logger.info("[{}] Найдено изображений: {}", ch_id, len(image_urls))
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
total = len(image_urls)
# 4. Пролистываем читалку — reader грузит страницы по мере листания
def _count_matched() -> int:
count = 0
for base_url in captured:
if base_url in url_to_idx or base_url.split("/")[-1] in filename_to_idx:
count += 1
return count
# 4. Пролистываем читалку
await asyncio.sleep(1)
for i in range(total + 10):
async with lock:
done = len(captured)
stall_count = 0
prev_done = -1
for i in range(total + 20):
done = _count_matched()
if done >= total:
break
try:
await page.keyboard.press("ArrowRight")
await asyncio.sleep(0.5)
except Exception:
except Exception as e:
logger.warning("[{}] Ошибка листания на шаге {}: {}", ch_id, i + 1, e)
break
if i % 20 == 19:
async with lock:
done = len(captured)
logger.debug("Пролистано {}, загружено: {}/{}", i + 1, done, total)
done = _count_matched()
logger.debug("[{}] Пролистано {}, загружено: {}/{}", ch_id, i + 1, done, total)
if done == prev_done:
stall_count += 1
if stall_count >= 3:
logger.warning("[{}] Прогресс завис ({}/{}) после {} листаний — прерываем",
ch_id, done, total, i + 1)
break
else:
stall_count = 0
prev_done = done
# Финальное ожидание
await asyncio.sleep(3)
# 5. Retry для страниц с timeout через браузерный JS fetch
async with lock:
timeout_bases = [u for u, e in route_errors.items()
if "timeout" in e.lower() and u not in captured]
if timeout_bases:
logger.info("[{}] Retry {} страниц с timeout через JS fetch...",
ch_id, len(timeout_bases))
for retry_base in timeout_bases:
if retry_base in captured:
continue
fname = retry_base.split("/")[-1]
try:
data_b64 = await page.evaluate("""async (url) => {
try {
const r = await fetch(url, {credentials: 'include'});
if (!r.ok) return null;
const buf = await r.arrayBuffer();
const bytes = new Uint8Array(buf);
let bin = '';
for (let b of bytes) bin += String.fromCharCode(b);
return btoa(bin);
} catch(e) { return null; }
}""", retry_base)
if data_b64:
import base64
body = base64.b64decode(data_b64)
if len(body) > 500:
async with lock:
captured[retry_base] = body
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
else:
logger.warning("[{}] Retry вернул {} байт для '{}' — игнорируем",
ch_id, len(body), fname)
else:
logger.warning("[{}] Retry вернул null для '{}' | {}",
ch_id, fname, retry_base[-70:])
except Exception as e2:
logger.warning("[{}] Retry JS ошибка для '{}': {}", ch_id, fname, e2)
await page.unroute("**/*", route_handler)
async with lock:
done = len(captured)
logger.info("Перехвачено: {}/{}", done, total)
done = _count_matched()
elapsed = time.monotonic() - t_start
logger.info("[{}] Перехвачено: {}/{} за {:.1f}с", ch_id, done, total, elapsed)
# 6. Сохраняем в правильном порядке
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
# 5. Сохраняем в правильном порядке
paths: dict[int, Path] = {}
unmatched_other: list[str] = []
for base_url, body in captured.items():
if base_url not in url_to_idx:
idx = url_to_idx.get(base_url)
if idx is None:
fname = base_url.split("/")[-1]
idx = filename_to_idx.get(fname)
if idx is None:
if not BANNER_RE.search(base_url):
unmatched_other.append(base_url.split("/")[-1])
continue
idx = url_to_idx[base_url]
ext = _get_ext(base_url)
p = dest_dir / f"{idx:04d}{ext}"
p.write_bytes(body)
paths[idx] = p
missing = total - len(paths)
if missing:
logger.warning("Не загружено страниц: {}", missing)
if unmatched_other:
logger.debug("[{}] Перехвачено, но не совпало с readerInit ({}): {}",
ch_id, len(unmatched_other), unmatched_other)
# 7. Итоговый отчёт по пропущенным страницам
missing_idxs = [i for i in range(total) if i not in paths]
if missing_idxs:
missing_files = [_base(image_urls[i]).split("/")[-1] for i in missing_idxs]
missing_full = [_base(image_urls[i]) for i in missing_idxs]
timeout_miss = [missing_files[j] for j, i in enumerate(missing_idxs)
if missing_full[j] in route_errors
and "timeout" in route_errors[missing_full[j]].lower()]
http_miss = [f"{missing_files[j]}(HTTP {route_statuses.get(missing_full[j], '?')})"
for j, i in enumerate(missing_idxs)
if missing_full[j] in route_statuses]
unrcv = [missing_files[j] for j, i in enumerate(missing_idxs)
if missing_full[j] not in route_errors
and missing_full[j] not in route_statuses]
reasons = []
if timeout_miss:
reasons.append(f"timeout×{len(timeout_miss)}: {timeout_miss}")
if http_miss:
reasons.append(f"HTTP-err×{len(http_miss)}: {http_miss}")
if unrcv:
reasons.append(f"не_перехвачено×{len(unrcv)}: {unrcv}")
logger.warning(
"[{}] Пропущено {}/{} стр. | №: {} | причины: {}",
ch_id, len(missing_idxs), total,
[i + 1 for i in missing_idxs],
" | ".join(reasons) if reasons else "неизвестно",
)
logger.debug("[{}] Полные URL пропущенных: {}", ch_id, missing_full)
return [paths[i] for i in sorted(paths.keys())]

View File

@@ -13,10 +13,31 @@ DB_PATH = Path("/app/state/progress.db")
class StateDB:
def __init__(self, db_path: Path = DB_PATH):
db_path.parent.mkdir(parents=True, exist_ok=True)
self.conn = sqlite3.connect(str(db_path))
self.conn = sqlite3.connect(str(db_path), check_same_thread=False)
self.conn.row_factory = sqlite3.Row
self._init()
def _init(self):
self.conn.execute("""
CREATE TABLE IF NOT EXISTS mangas (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT UNIQUE,
title TEXT,
title_ru TEXT,
title_full TEXT,
pub_status TEXT DEFAULT 'unknown',
auto_update INTEGER DEFAULT 0,
last_checked_at TEXT,
status TEXT DEFAULT 'queued',
format TEXT DEFAULT 'cbz',
chapters_total INTEGER DEFAULT 0,
chapters_done INTEGER DEFAULT 0,
added_at TEXT,
updated_at TEXT,
started_at TEXT,
finished_at TEXT
)
""")
self.conn.execute("""
CREATE TABLE IF NOT EXISTS chapters (
id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -26,14 +47,137 @@ class StateDB:
number REAL,
volume INTEGER,
status TEXT DEFAULT 'pending',
pages_total INTEGER DEFAULT 0,
pages_done INTEGER DEFAULT 0,
output_cbz TEXT,
output_pdf TEXT,
output_epub TEXT,
updated_at TEXT
)
""")
self.conn.execute("""
CREATE TABLE IF NOT EXISTS history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
manga_url TEXT NOT NULL,
event_type TEXT NOT NULL,
chapter_url TEXT,
chapter_title TEXT,
chapter_number REAL,
volume INTEGER,
details TEXT,
created_at TEXT
)
""")
# Migrate old DB: add missing columns
migrations = [
("chapters", "pages_total", "INTEGER DEFAULT 0"),
("chapters", "pages_done", "INTEGER DEFAULT 0"),
("mangas", "title_ru", "TEXT"),
("mangas", "title_full", "TEXT"),
("mangas", "pub_status", "TEXT DEFAULT 'unknown'"),
("mangas", "auto_update", "INTEGER DEFAULT 0"),
("mangas", "last_checked_at", "TEXT"),
("mangas", "started_at", "TEXT"),
("mangas", "finished_at", "TEXT"),
]
for table, col, typedef in migrations:
try:
self.conn.execute(f"ALTER TABLE {table} ADD COLUMN {col} {typedef}")
except Exception:
pass
self.conn.commit()
# ── Mangas ────────────────────────────────────
def add_manga(self, url: str, fmt: str = "cbz") -> bool:
"""Добавляет мангу в очередь. Возвращает True если новая."""
cur = self.conn.execute("SELECT id FROM mangas WHERE url=?", (url,))
if cur.fetchone():
return False
self.conn.execute("""
INSERT INTO mangas (url, format, status, added_at, updated_at)
VALUES (?, ?, 'queued', ?, ?)
""", (url, fmt, _now(), _now()))
self.conn.commit()
return True
def update_manga_info(self, url: str, title: str, chapters_total: int,
title_ru: str = "", title_full: str = "",
pub_status: str = "unknown"):
self.conn.execute("""
UPDATE mangas SET title=?, title_ru=?, title_full=?, pub_status=?,
chapters_total=?, updated_at=? WHERE url=?
""", (title, title_ru, title_full, pub_status, chapters_total, _now(), url))
self.conn.commit()
def set_auto_update(self, url: str, enabled: bool):
self.conn.execute("""
UPDATE mangas SET auto_update=?, updated_at=? WHERE url=?
""", (1 if enabled else 0, _now(), url))
self.conn.commit()
def set_last_checked(self, url: str):
self.conn.execute("""
UPDATE mangas SET last_checked_at=?, updated_at=? WHERE url=?
""", (_now(), _now(), url))
self.conn.commit()
def update_manga_status(self, url: str, status: str):
self.conn.execute("""
UPDATE mangas SET status=?, updated_at=? WHERE url=?
""", (status, _now(), url))
self.conn.commit()
def mark_started(self, url: str) -> str:
"""Записывает время начала загрузки. Возвращает timestamp."""
ts = _now()
self.conn.execute("""
UPDATE mangas SET started_at=?, finished_at=NULL, updated_at=? WHERE url=?
""", (ts, ts, url))
self.conn.commit()
return ts
def mark_finished(self, url: str) -> str:
"""Записывает время окончания загрузки. Возвращает timestamp."""
ts = _now()
self.conn.execute("""
UPDATE mangas SET finished_at=?, updated_at=? WHERE url=?
""", (ts, ts, url))
self.conn.commit()
return ts
def sync_chapters_done(self, url: str):
"""Синхронизирует chapters_done из реального счёта таблицы chapters."""
count = self.conn.execute(
"SELECT COUNT(*) FROM chapters WHERE manga_url=? AND status='done'", (url,)
).fetchone()[0]
self.conn.execute(
"UPDATE mangas SET chapters_done=?, updated_at=? WHERE url=?",
(count, _now(), url)
)
self.conn.commit()
return count
def increment_manga_chapters_done(self, url: str):
# Оставлен для совместимости, но не используется в воркере
pass
def get_manga(self, url: str) -> Optional[dict]:
cur = self.conn.execute("SELECT * FROM mangas WHERE url=?", (url,))
row = cur.fetchone()
return dict(row) if row else None
def get_all_mangas(self) -> list[dict]:
cur = self.conn.execute("SELECT * FROM mangas ORDER BY added_at DESC")
return [dict(r) for r in cur.fetchall()]
def get_manga_format(self, url: str) -> str:
cur = self.conn.execute("SELECT format FROM mangas WHERE url=?", (url,))
row = cur.fetchone()
return row["format"] if row else "cbz"
# ── Chapters ──────────────────────────────────
def upsert_chapter(self, manga_url: str, chapter_url: str,
title: str = "", number: float = 0, volume: int = 0):
self.conn.execute("""
@@ -46,6 +190,14 @@ class StateDB:
""", (manga_url, chapter_url, title, number, volume, _now()))
self.conn.commit()
def reset_chapter(self, chapter_url: str):
self.conn.execute("""
UPDATE chapters SET status='pending', pages_total=0, pages_done=0,
output_cbz=NULL, output_pdf=NULL, output_epub=NULL, updated_at=?
WHERE chapter_url=?
""", (_now(), chapter_url))
self.conn.commit()
def mark_done(self, chapter_url: str, fmt: str, output_path: str):
col = f"output_{fmt}"
self.conn.execute(f"""
@@ -60,6 +212,12 @@ class StateDB:
""", (_now(), chapter_url))
self.conn.commit()
def update_chapter_pages(self, chapter_url: str, pages_total: int, pages_done: int):
self.conn.execute("""
UPDATE chapters SET pages_total=?, pages_done=?, updated_at=? WHERE chapter_url=?
""", (pages_total, pages_done, _now(), chapter_url))
self.conn.commit()
def get_pending(self, manga_url: str) -> list[dict]:
cur = self.conn.execute("""
SELECT chapter_url, title, number, volume
@@ -67,21 +225,64 @@ class StateDB:
WHERE manga_url=? AND status != 'done'
ORDER BY volume, number
""", (manga_url,))
cols = [d[0] for d in cur.description]
return [dict(zip(cols, row)) for row in cur.fetchall()]
return [dict(r) for r in cur.fetchall()]
def get_all(self, manga_url: str) -> list[dict]:
def get_all_chapters(self, manga_url: str) -> list[dict]:
cur = self.conn.execute("""
SELECT * FROM chapters WHERE manga_url=? ORDER BY volume, number
""", (manga_url,))
cols = [d[0] for d in cur.description]
return [dict(zip(cols, row)) for row in cur.fetchall()]
return [dict(r) for r in cur.fetchall()]
def chapter_status(self, chapter_url: str) -> Optional[str]:
cur = self.conn.execute(
"SELECT status FROM chapters WHERE chapter_url=?", (chapter_url,))
row = cur.fetchone()
return row[0] if row else None
return row["status"] if row else None
def get_all(self, manga_url: str) -> list[dict]:
return self.get_all_chapters(manga_url)
# ── History ───────────────────────────────────
def add_history(self, manga_url: str, event_type: str,
chapter_url: str = "", chapter_title: str = "",
chapter_number: float = 0, volume: int = 0,
details: str = ""):
"""
event_type: downloaded | auto_downloaded | new_chapter_found |
check_started | check_done
"""
self.conn.execute("""
INSERT INTO history
(manga_url, event_type, chapter_url, chapter_title, chapter_number,
volume, details, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (manga_url, event_type, chapter_url, chapter_title, chapter_number,
volume, details, _now()))
self.conn.commit()
def get_history(self, limit: int = 200, manga_url: str = "") -> list[dict]:
if manga_url:
cur = self.conn.execute("""
SELECT h.*, m.title as manga_title, m.title_ru
FROM history h LEFT JOIN mangas m ON h.manga_url = m.url
WHERE h.manga_url=? ORDER BY h.created_at DESC LIMIT ?
""", (manga_url, limit))
else:
cur = self.conn.execute("""
SELECT h.*, m.title as manga_title, m.title_ru
FROM history h LEFT JOIN mangas m ON h.manga_url = m.url
ORDER BY h.created_at DESC LIMIT ?
""", (limit,))
return [dict(r) for r in cur.fetchall()]
def get_autos(self) -> list[dict]:
"""Манги с включённым авто-обновлением."""
cur = self.conn.execute("""
SELECT * FROM mangas
WHERE auto_update=1 AND status NOT IN ('downloading')
""")
return [dict(r) for r in cur.fetchall()]
def close(self):
self.conn.close()

380
src/worker.py Normal file
View File

@@ -0,0 +1,380 @@
"""
Воркер скачивания манги с поддержкой событий прогресса.
"""
import asyncio
import os
import re
import tempfile
from pathlib import Path
from typing import Callable, Optional
from loguru import logger
from .browser import BrowserManager
from .scraper import get_manga_info, get_chapter_images_and_download, Chapter
from .exporter import export, MangaMeta
from .state import StateDB
OUTPUT_DIR = Path("/app/output")
# Читаем из переменных окружения; можно переопределить в docker-compose
CHAPTER_CONCURRENCY = int(os.getenv("CHAPTER_CONCURRENCY", "3"))
def _safe_name(s: str) -> str:
return re.sub(r'[^\w\s\-]', '', s).strip().replace(" ", "_")[:80]
def _safe_chapter_name(ch: Chapter) -> str:
vol = f"v{ch.volume:02d}_" if ch.volume else ""
return f"{vol}ch{ch.number:06.1f}"
async def download_manga(
url: str,
fmt: str = "cbz",
output_dir: Path = OUTPUT_DIR,
resume: bool = True,
is_update: bool = False,
on_event: Optional[Callable] = None,
chapter_concurrency: int = CHAPTER_CONCURRENCY,
):
"""Скачать мангу. Главы обрабатываются параллельно (chapter_concurrency штук)."""
async def emit(event: dict):
if on_event:
try:
await on_event(event)
except Exception as e:
logger.debug("on_event error: {}", e)
db = StateDB()
db_lock = asyncio.Lock() # защита от параллельных записей в SQLite
async def db_call(fn, *args, **kwargs):
"""Обёртка: все обращения к db идут через общий asyncio.Lock."""
async with db_lock:
return fn(*args, **kwargs)
try:
await db_call(db.update_manga_status, url, "downloading")
started_ts = await db_call(db.mark_started, url)
await emit({"type": "manga_start", "url": url, "started_at": started_ts})
async with BrowserManager(headless=True) as bm:
ctx, info_page = await bm.new_page()
manga = await get_manga_info(info_page, url)
await info_page.close()
if not manga:
await db_call(db.update_manga_status, url, "failed")
await emit({"type": "manga_failed", "url": url,
"error": "Не удалось получить информацию о манге"})
return
await db_call(
db.update_manga_info,
url,
title=manga.title_ru or manga.title,
chapters_total=len(manga.chapters),
title_ru=manga.title_ru,
title_full=manga.title_full,
pub_status=manga.pub_status,
)
await emit({
"type": "manga_info",
"url": url,
"title": manga.title_ru or manga.title,
"title_ru": manga.title_ru,
"title_full": manga.title_full,
"pub_status": manga.pub_status,
"chapters_total": len(manga.chapters),
})
folder_name = _safe_name(manga.title_ru or manga.title)
manga_dir = output_dir / folder_name
manga_dir.mkdir(parents=True, exist_ok=True)
for ch in manga.chapters:
await db_call(db.upsert_chapter, url, ch.url, ch.title, ch.number, ch.volume)
formats = ["cbz", "pdf", "epub"] if fmt == "all" else [fmt]
# ── Разделяем главы: пропустить / скачать ────────────────────
to_skip = []
to_download = []
for ch in manga.chapters:
if resume and (await db_call(db.chapter_status, ch.url)) == "done":
to_skip.append(ch)
else:
to_download.append(ch)
# Счётчик и блокировка для безопасного обновления из параллельных задач
counter_lock = asyncio.Lock()
# Начинаем с 0: to_skip-цикл сам доберёт до len(to_skip),
# иначе sync_chapters_done() + len(to_skip) = двойной счёт
chapters_done = 0
# Сообщаем о пропущенных главах (уже скачаны)
for ch in to_skip:
chapters_done += 1
await emit({
"type": "chapter_skipped",
"url": url,
"chapter_url": ch.url,
"chapter_number": ch.number,
"chapter_title": ch.title,
"volume": ch.volume,
"chapters_done": chapters_done,
"chapters_total": len(manga.chapters),
})
logger.info(
"Параллельность: {} гл одновременно. Пропущено: {}, скачать: {}",
chapter_concurrency, len(to_skip), len(to_download),
)
# ── Семафор ограничивает одновременно открытые страницы ───────
sem = asyncio.Semaphore(chapter_concurrency)
async def process_chapter(ch: Chapter) -> None:
nonlocal chapters_done
async with sem:
# Повторная проверка (другая горутина могла скачать)
if (await db_call(db.chapter_status, ch.url)) == "done":
async with counter_lock:
chapters_done += 1
done_snap = chapters_done
await emit({
"type": "chapter_skipped",
"url": url,
"chapter_url": ch.url,
"chapter_number": ch.number,
"chapter_title": ch.title,
"volume": ch.volume,
"chapters_done": done_snap,
"chapters_total": len(manga.chapters),
})
return
await emit({
"type": "chapter_start",
"url": url,
"chapter_url": ch.url,
"chapter_title": ch.title,
"chapter_number": ch.number,
"volume": ch.volume,
"chapters_done": chapters_done,
"chapters_total": len(manga.chapters),
})
ch_page = await ctx.new_page()
try:
with tempfile.TemporaryDirectory() as tmpdir:
tmp_path = Path(tmpdir)
pages_done_count = [0]
async def on_page(page_idx: int, pages_total: int):
pages_done_count[0] += 1
await db_call(db.update_chapter_pages,
ch.url, pages_total, pages_done_count[0])
await emit({
"type": "page_done",
"url": url,
"chapter_url": ch.url,
"page_idx": page_idx,
"pages_done": pages_done_count[0],
"pages_total": pages_total,
})
image_paths = await get_chapter_images_and_download(
ch_page, ch.url,
dest_dir=tmp_path,
manga_url=url,
on_page=on_page,
)
if not image_paths:
logger.error(
"Т{} Гл.{} '{}' — get_chapter_images вернул пустой список. "
"URL: {}",
ch.volume, ch.number, ch.title, ch.url,
)
await db_call(db.mark_failed, ch.url)
await emit({"type": "chapter_failed", "url": url,
"chapter_url": ch.url})
return
ch_name = _safe_chapter_name(ch)
ch_meta = MangaMeta(
series=manga.title_ru or manga.title,
series_full=manga.title_full or "",
chapter_title=ch.title,
number=ch.number,
volume=ch.volume,
chapters_total=len(manga.chapters),
pub_status=manga.pub_status,
source_url=url,
summary=manga.description,
genre=", ".join(manga.genres) if manga.genres else "",
)
for f in formats:
out_file = manga_dir / f"{ch_name}.{f}"
try:
export(image_paths, out_file, f, meta=ch_meta)
await db_call(db.mark_done, ch.url, f, str(out_file))
except Exception as e:
logger.exception(
"Ошибка экспорта Т{} Гл.{}{} | {}: {}",
ch.volume, ch.number, f, out_file.name, e,
)
event_type = "auto_downloaded" if is_update else "downloaded"
await db_call(
db.add_history,
manga_url=url,
event_type=event_type,
chapter_url=ch.url,
chapter_title=ch.title,
chapter_number=ch.number,
volume=ch.volume,
)
async with counter_lock:
chapters_done += 1
done_snap = chapters_done
await emit({
"type": "chapter_done",
"url": url,
"chapter_url": ch.url,
"chapter_title": ch.title,
"chapter_number": ch.number,
"volume": ch.volume,
"chapters_done": done_snap,
"chapters_total": len(manga.chapters),
})
except Exception as e:
logger.exception(
"Необработанное исключение в Т{} Гл.{} '{}' | {}: {}",
ch.volume, ch.number, ch.title, ch.url, e,
)
await db_call(db.mark_failed, ch.url)
await emit({"type": "chapter_failed", "url": url,
"chapter_url": ch.url, "error": str(e)})
finally:
await ch_page.close()
# ── Запускаем все задачи сразу; семафор дозирует параллельность ──
tasks = [process_chapter(ch) for ch in to_download]
results = await asyncio.gather(*tasks, return_exceptions=True)
# Логируем неожиданные исключения из gather
for ch, res in zip(to_download, results):
if isinstance(res, Exception) and not isinstance(res, asyncio.CancelledError):
logger.exception(
"gather: необработанное исключение Т{} Гл.{} '{}': {}",
ch.volume, ch.number, ch.title, res,
)
real_done = await db_call(db.sync_chapters_done, url)
await db_call(db.update_manga_status, url, "done")
finished_ts = await db_call(db.mark_finished, url)
await db_call(db.set_last_checked, url)
await emit({
"type": "manga_done",
"url": url,
"chapters_done": real_done,
"chapters_total": len(manga.chapters),
"finished_at": finished_ts,
})
await ctx.close()
except asyncio.CancelledError:
raise
except Exception as e:
logger.error("Manga worker error {}: {}", url, e)
await db_call(db.update_manga_status, url, "failed")
finished_ts = await db_call(db.mark_finished, url)
await emit({"type": "manga_failed", "url": url, "error": str(e), "finished_at": finished_ts})
finally:
db.close()
async def check_for_updates(
url: str,
on_event: Optional[Callable] = None,
) -> list[str]:
"""
Проверяет наличие новых глав для манги.
Возвращает список новых chapter_url.
"""
async def emit(event: dict):
if on_event:
try:
await on_event(event)
except Exception:
pass
db = StateDB()
try:
db.set_last_checked(url)
db.add_history(manga_url=url, event_type="check_started")
await emit({"type": "check_started", "url": url})
async with BrowserManager(headless=True) as bm:
_, page = await bm.new_page()
manga = await get_manga_info(page, url)
await page.close()
if not manga:
return []
# Обновляем pub_status и количество глав
db.update_manga_info(
url,
title=manga.title_ru or manga.title,
chapters_total=len(manga.chapters),
title_ru=manga.title_ru,
title_full=manga.title_full,
pub_status=manga.pub_status,
)
# Находим главы которых ещё нет в БД
known = {ch["chapter_url"] for ch in db.get_all_chapters(url)}
new_chapters = [ch for ch in manga.chapters if ch.url not in known]
for ch in new_chapters:
db.upsert_chapter(url, ch.url, ch.title, ch.number, ch.volume)
db.add_history(
manga_url=url,
event_type="new_chapter_found",
chapter_url=ch.url,
chapter_title=ch.title,
chapter_number=ch.number,
volume=ch.volume,
)
await emit({
"type": "new_chapter_found",
"url": url,
"chapter_url": ch.url,
"chapter_title": ch.title,
"chapter_number": ch.number,
})
db.add_history(
manga_url=url,
event_type="check_done",
details=f"Найдено новых: {len(new_chapters)}",
)
await emit({
"type": "check_done",
"url": url,
"new_chapters": len(new_chapters),
})
return [ch.url for ch in new_chapters]
finally:
db.close()