Base app
This commit is contained in:
779
src/api.py
Normal file
779
src/api.py
Normal file
@@ -0,0 +1,779 @@
|
||||
"""
|
||||
FastAPI веб-сервер: REST API + WebSocket для мониторинга загрузок манги.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel
|
||||
from loguru import logger
|
||||
|
||||
from .state import StateDB
|
||||
from .worker import download_manga, check_for_updates
|
||||
from .exporter import patch_meta, MangaMeta
|
||||
|
||||
OUTPUT_DIR = Path("/app/output")
|
||||
FRONTEND_DIR = Path("/app/frontend")
|
||||
|
||||
app = FastAPI(title="Manga Downloader API")
|
||||
|
||||
# ── WebSocket менеджер ────────────────────────
|
||||
|
||||
class ConnectionManager:
|
||||
def __init__(self):
|
||||
self.active: set[WebSocket] = set()
|
||||
|
||||
async def connect(self, ws: WebSocket):
|
||||
await ws.accept()
|
||||
self.active.add(ws)
|
||||
|
||||
def disconnect(self, ws: WebSocket):
|
||||
self.active.discard(ws)
|
||||
|
||||
async def broadcast(self, data: dict):
|
||||
dead = set()
|
||||
for ws in list(self.active):
|
||||
try:
|
||||
await ws.send_json(data)
|
||||
except Exception:
|
||||
dead.add(ws)
|
||||
self.active -= dead
|
||||
|
||||
|
||||
ws_manager = ConnectionManager()
|
||||
|
||||
# ── Очередь загрузки ─────────────────────────
|
||||
|
||||
download_queue: asyncio.Queue = asyncio.Queue()
|
||||
|
||||
# url → asyncio.Task текущей загрузки
|
||||
active_tasks: dict[str, asyncio.Task] = {}
|
||||
|
||||
|
||||
async def queue_worker():
|
||||
"""Последовательно обрабатывает очередь загрузок. Перезапускается при краше."""
|
||||
while True:
|
||||
try:
|
||||
await _queue_worker_loop()
|
||||
except Exception as e:
|
||||
logger.error("queue_worker упал, перезапускаю через 5 сек: {}", e)
|
||||
await asyncio.sleep(5)
|
||||
|
||||
|
||||
async def _queue_worker_loop():
|
||||
while True:
|
||||
job = await download_queue.get()
|
||||
url = job["url"]
|
||||
fmt = job.get("fmt", "cbz")
|
||||
|
||||
# Проверяем, не была ли манга остановлена пока стояла в очереди
|
||||
skip = False
|
||||
db = StateDB()
|
||||
try:
|
||||
m = db.get_manga(url)
|
||||
if m and m["status"] == "stopped":
|
||||
logger.info("Воркер: пропускаю остановленную {}", url)
|
||||
skip = True
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
if skip:
|
||||
download_queue.task_done()
|
||||
continue
|
||||
|
||||
logger.info("Воркер: начинаю скачивать {}", url)
|
||||
dl_task = asyncio.create_task(download_manga(
|
||||
url=url,
|
||||
fmt=fmt,
|
||||
is_update=job.get("is_update", False),
|
||||
resume=job.get("resume", True),
|
||||
on_event=ws_manager.broadcast,
|
||||
))
|
||||
active_tasks[url] = dl_task
|
||||
try:
|
||||
await dl_task
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Воркер: загрузка прервана: {}", url)
|
||||
_db = StateDB()
|
||||
try:
|
||||
current_status = _db.get_manga(url)
|
||||
# Если статус уже "queued" — значит нас приоритизировали и поставили обратно
|
||||
# в очередь; не перетираем на "stopped"
|
||||
if current_status and current_status["status"] != "queued":
|
||||
_db.update_manga_status(url, "stopped")
|
||||
await ws_manager.broadcast({"type": "manga_stopped", "url": url})
|
||||
else:
|
||||
await ws_manager.broadcast({"type": "manga_queued", "url": url, "format": fmt})
|
||||
finally:
|
||||
_db.close()
|
||||
except Exception as e:
|
||||
logger.error("Воркер ошибка {}: {}", url, e)
|
||||
finally:
|
||||
active_tasks.pop(url, None)
|
||||
download_queue.task_done()
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
asyncio.create_task(queue_worker())
|
||||
asyncio.create_task(update_scheduler())
|
||||
# Восстанавливаем очередь из БД (незавершённые задачи)
|
||||
db = StateDB()
|
||||
try:
|
||||
for manga in db.get_all_mangas():
|
||||
if manga["status"] in ("queued", "downloading"):
|
||||
db.update_manga_status(manga["url"], "queued")
|
||||
await download_queue.put({"url": manga["url"], "fmt": manga["format"]})
|
||||
logger.info("Восстановлено из очереди: {}", manga["url"])
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def update_scheduler():
|
||||
"""Периодически проверяет новые главы для манг с auto_update=1."""
|
||||
interval_hours = float(os.getenv("UPDATE_INTERVAL_HOURS", "6"))
|
||||
interval_sec = interval_hours * 3600
|
||||
logger.info("Планировщик обновлений: каждые {} ч", interval_hours)
|
||||
# Первый запуск — через 5 минут после старта
|
||||
await asyncio.sleep(300)
|
||||
while True:
|
||||
await _run_auto_updates()
|
||||
await asyncio.sleep(interval_sec)
|
||||
|
||||
|
||||
async def _run_auto_updates():
|
||||
"""Проверяет все манги с auto_update=1 на наличие новых глав."""
|
||||
db = StateDB()
|
||||
try:
|
||||
candidates = db.get_autos()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
if not candidates:
|
||||
return
|
||||
|
||||
logger.info("Авто-обновление: проверяем {} манг", len(candidates))
|
||||
for manga in candidates:
|
||||
url = manga["url"]
|
||||
fmt = manga.get("format", "cbz")
|
||||
try:
|
||||
new_chapters = await check_for_updates(url, on_event=ws_manager.broadcast)
|
||||
if new_chapters:
|
||||
logger.info("Новых глав для {}: {}", url, len(new_chapters))
|
||||
# Добавляем в очередь с флагом is_update
|
||||
db2 = StateDB()
|
||||
try:
|
||||
status = db2.get_manga(url)
|
||||
if status and status["status"] not in ("downloading", "queued"):
|
||||
db2.update_manga_status(url, "queued")
|
||||
finally:
|
||||
db2.close()
|
||||
await download_queue.put({"url": url, "fmt": fmt, "is_update": True})
|
||||
await ws_manager.broadcast({
|
||||
"type": "manga_queued",
|
||||
"url": url,
|
||||
"format": fmt,
|
||||
"reason": "auto_update",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("Ошибка авто-обновления {}: {}", url, e)
|
||||
|
||||
|
||||
# ── Вспомогательные функции ───────────────────
|
||||
|
||||
def _dir_size(path: Path) -> int:
|
||||
"""Размер директории в байтах."""
|
||||
if not path.exists():
|
||||
return 0
|
||||
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file())
|
||||
|
||||
|
||||
def _format_size(bytes_val: int) -> str:
|
||||
for unit in ("Б", "КБ", "МБ", "ГБ"):
|
||||
if bytes_val < 1024:
|
||||
return f"{bytes_val:.1f} {unit}"
|
||||
bytes_val /= 1024
|
||||
return f"{bytes_val:.1f} ТБ"
|
||||
|
||||
|
||||
def _enrich_manga(m: dict, db: StateDB) -> dict:
|
||||
"""Обогащает строку манги реальными счётчиками из таблицы chapters."""
|
||||
title = m.get("title") or ""
|
||||
safe_title = re.sub(r'[^\w\s\-]', '', title).strip().replace(" ", "_")[:80]
|
||||
size_bytes = _dir_size(OUTPUT_DIR / safe_title)
|
||||
ch_done_count = db.conn.execute(
|
||||
"SELECT COUNT(*) FROM chapters WHERE manga_url=? AND status='done'",
|
||||
(m["url"],)
|
||||
).fetchone()[0]
|
||||
ch_failed = db.conn.execute(
|
||||
"SELECT COUNT(*) FROM chapters WHERE manga_url=? AND status='failed'",
|
||||
(m["url"],)
|
||||
).fetchone()[0]
|
||||
ch_partial = db.conn.execute(
|
||||
"""SELECT COUNT(*) FROM chapters
|
||||
WHERE manga_url=? AND status='done'
|
||||
AND pages_total > 0 AND pages_done < pages_total""",
|
||||
(m["url"],)
|
||||
).fetchone()[0]
|
||||
return {
|
||||
**m,
|
||||
"chapters_done": ch_done_count,
|
||||
"size_bytes": size_bytes,
|
||||
"size_human": _format_size(size_bytes),
|
||||
"queue_position": None,
|
||||
"is_active": m["url"] in active_tasks,
|
||||
"errors_count": ch_failed + ch_partial,
|
||||
"started_at": m.get("started_at"),
|
||||
"finished_at": m.get("finished_at"),
|
||||
}
|
||||
|
||||
|
||||
def _manga_detail(manga: dict, db: StateDB) -> dict:
|
||||
url = manga["url"]
|
||||
chapters = db.get_all_chapters(url)
|
||||
|
||||
# Определяем директорию манги
|
||||
title = manga.get("title") or ""
|
||||
safe_title = re.sub(r'[^\w\s\-]', '', title).strip().replace(" ", "_")[:80]
|
||||
manga_dir = OUTPUT_DIR / safe_title
|
||||
size_bytes = _dir_size(manga_dir)
|
||||
|
||||
# Файлы
|
||||
files = []
|
||||
if manga_dir.exists():
|
||||
for f in sorted(manga_dir.iterdir()):
|
||||
if f.is_file():
|
||||
files.append({
|
||||
"name": f.name,
|
||||
"size": f.stat().st_size,
|
||||
"size_human": _format_size(f.stat().st_size),
|
||||
})
|
||||
|
||||
# ── Статистика ───────────────────────────
|
||||
ch_done = [c for c in chapters if c["status"] == "done"]
|
||||
ch_failed = [c for c in chapters if c["status"] == "failed"]
|
||||
ch_pending = [c for c in chapters if c["status"] == "pending"]
|
||||
|
||||
total_pages_downloaded = sum(c.get("pages_done", 0) for c in chapters)
|
||||
total_pages_expected = sum(c.get("pages_total", 0) for c in chapters if c.get("pages_total", 0) > 0)
|
||||
|
||||
# Частично скачанные (done, но pages_done < pages_total)
|
||||
ch_partial = [
|
||||
c for c in ch_done
|
||||
if c.get("pages_total", 0) > 0 and c.get("pages_done", 0) < c.get("pages_total", 0)
|
||||
]
|
||||
# Сколько страниц потеряно в частичных
|
||||
pages_missing = sum(
|
||||
c.get("pages_total", 0) - c.get("pages_done", 0)
|
||||
for c in ch_partial
|
||||
)
|
||||
|
||||
errors = []
|
||||
for c in ch_failed:
|
||||
errors.append({**c, "error_type": "failed", "error_label": "Глава не загружена"})
|
||||
for c in ch_partial:
|
||||
missing = c.get("pages_total", 0) - c.get("pages_done", 0)
|
||||
errors.append({**c, "error_type": "partial",
|
||||
"error_label": f"Частичная загрузка: пропущено {missing} стр."})
|
||||
# Сортируем: сначала failed, потом partial, внутри — по номеру
|
||||
errors.sort(key=lambda c: (0 if c["error_type"] == "failed" else 1, c.get("number", 0)))
|
||||
|
||||
stats = {
|
||||
"chapters_done": len(ch_done),
|
||||
"chapters_failed": len(ch_failed),
|
||||
"chapters_pending": len(ch_pending),
|
||||
"chapters_partial": len(ch_partial),
|
||||
"total_pages_downloaded": total_pages_downloaded,
|
||||
"total_pages_expected": total_pages_expected,
|
||||
"pages_missing": pages_missing,
|
||||
"errors_count": len(errors),
|
||||
}
|
||||
|
||||
return {
|
||||
**manga,
|
||||
"chapters": chapters,
|
||||
"files": files,
|
||||
"size_bytes": size_bytes,
|
||||
"size_human": _format_size(size_bytes),
|
||||
"files_count": len(files),
|
||||
"stats": stats,
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
|
||||
# ── REST API ──────────────────────────────────
|
||||
|
||||
class AddMangaRequest(BaseModel):
|
||||
urls: List[str]
|
||||
format: str = "cbz"
|
||||
|
||||
|
||||
@app.get("/api/mangas")
|
||||
async def list_mangas():
|
||||
db = StateDB()
|
||||
try:
|
||||
mangas = db.get_all_mangas()
|
||||
result = [_enrich_manga(m, db) for m in mangas]
|
||||
# Добавляем позицию в очереди
|
||||
queue_list = list(download_queue._queue) # type: ignore
|
||||
for i, job in enumerate(queue_list):
|
||||
for r in result:
|
||||
if r["url"] == job["url"]:
|
||||
r["queue_position"] = i + 1
|
||||
return result
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.get("/api/mangas/detail")
|
||||
async def manga_detail(url: str):
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
return _manga_detail(manga, db)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.post("/api/queue")
|
||||
async def add_to_queue(body: AddMangaRequest):
|
||||
db = StateDB()
|
||||
added = []
|
||||
skipped = []
|
||||
try:
|
||||
for url in body.urls:
|
||||
url = url.strip()
|
||||
if not url:
|
||||
continue
|
||||
is_new = db.add_manga(url, body.format)
|
||||
if is_new:
|
||||
await download_queue.put({"url": url, "fmt": body.format})
|
||||
added.append(url)
|
||||
await ws_manager.broadcast({
|
||||
"type": "manga_queued",
|
||||
"url": url,
|
||||
"format": body.format,
|
||||
})
|
||||
# Запускаем фоновую задачу предпросмотра (без Chromium — быстро)
|
||||
asyncio.create_task(_fetch_preview(url))
|
||||
else:
|
||||
skipped.append(url)
|
||||
finally:
|
||||
db.close()
|
||||
return {"added": added, "skipped": skipped}
|
||||
|
||||
|
||||
async def _fetch_preview(url: str):
|
||||
"""Быстро получает название и количество глав сразу после добавления."""
|
||||
try:
|
||||
from .browser import BrowserManager
|
||||
from .scraper import get_manga_info
|
||||
async with BrowserManager(headless=True) as bm:
|
||||
_, page = await bm.new_page()
|
||||
manga = await get_manga_info(page, url)
|
||||
if not manga:
|
||||
return
|
||||
db = StateDB()
|
||||
try:
|
||||
db.update_manga_info(
|
||||
url,
|
||||
title=manga.title_ru or manga.title,
|
||||
chapters_total=len(manga.chapters),
|
||||
title_ru=manga.title_ru,
|
||||
title_full=manga.title_full,
|
||||
pub_status=manga.pub_status,
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
await ws_manager.broadcast({
|
||||
"type": "manga_preview",
|
||||
"url": url,
|
||||
"title": manga.title_ru or manga.title,
|
||||
"title_ru": manga.title_ru,
|
||||
"title_full": manga.title_full,
|
||||
"pub_status": manga.pub_status,
|
||||
"chapters_total": len(manga.chapters),
|
||||
})
|
||||
logger.info("Предпросмотр готов: {} ({} глав)", manga.title_ru or manga.title, len(manga.chapters))
|
||||
except Exception as e:
|
||||
logger.warning("Ошибка предпросмотра {}: {}", url, e)
|
||||
|
||||
|
||||
@app.post("/api/mangas/auto_update")
|
||||
async def toggle_auto_update(url: str, enabled: bool):
|
||||
"""Включить/выключить авто-обновление для манги."""
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
db.set_auto_update(url, enabled)
|
||||
await ws_manager.broadcast({
|
||||
"type": "auto_update_changed",
|
||||
"url": url,
|
||||
"auto_update": enabled,
|
||||
})
|
||||
return {"ok": True, "auto_update": enabled}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.post("/api/mangas/check_now")
|
||||
async def check_now(url: str):
|
||||
"""Немедленно проверить новые главы для конкретной манги."""
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
finally:
|
||||
db.close()
|
||||
asyncio.create_task(_check_and_queue(url))
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
async def _check_and_queue(url: str):
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
fmt = manga["format"] if manga else "cbz"
|
||||
finally:
|
||||
db.close()
|
||||
new = await check_for_updates(url, on_event=ws_manager.broadcast)
|
||||
if new:
|
||||
db2 = StateDB()
|
||||
try:
|
||||
db2.update_manga_status(url, "queued")
|
||||
finally:
|
||||
db2.close()
|
||||
await download_queue.put({"url": url, "fmt": fmt, "is_update": True})
|
||||
|
||||
|
||||
@app.get("/api/news")
|
||||
async def get_news(limit: int = 100):
|
||||
"""Только скачанные и автодокаченные главы — для вкладки Новости."""
|
||||
db = StateDB()
|
||||
try:
|
||||
cur = db.conn.execute("""
|
||||
SELECT h.*, m.title as manga_title, m.title_ru
|
||||
FROM history h LEFT JOIN mangas m ON h.manga_url = m.url
|
||||
WHERE h.event_type IN ('downloaded', 'auto_downloaded')
|
||||
ORDER BY h.created_at DESC LIMIT ?
|
||||
""", (limit,))
|
||||
return [dict(r) for r in cur.fetchall()]
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.get("/api/history")
|
||||
async def get_history(limit: int = 100, manga_url: str = ""):
|
||||
db = StateDB()
|
||||
try:
|
||||
return db.get_history(limit=limit, manga_url=manga_url)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.post("/api/mangas/prioritize")
|
||||
async def prioritize_manga(url: str):
|
||||
"""Поместить мангу в начало очереди, прервав текущую загрузку и вернув её следом."""
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
if manga["status"] == "downloading" and url in active_tasks:
|
||||
return {"ok": True, "message": "Уже загружается"}
|
||||
|
||||
fmt = manga["format"] or "cbz"
|
||||
|
||||
# 1. Убираем target из очереди если там уже есть
|
||||
items = list(download_queue._queue) # type: ignore
|
||||
items = [i for i in items if i["url"] != url]
|
||||
download_queue._queue.clear() # type: ignore
|
||||
for item in items:
|
||||
download_queue._queue.append(item) # type: ignore
|
||||
|
||||
# 2. Текущая активная загрузка
|
||||
current_url = next(iter(active_tasks), None)
|
||||
if current_url and current_url != url:
|
||||
cur_manga = db.get_manga(current_url)
|
||||
cur_fmt = cur_manga["format"] if cur_manga else "cbz"
|
||||
# Помечаем как queued — воркер увидит это и не поставит stopped
|
||||
db.update_manga_status(current_url, "queued")
|
||||
# Вставляем обратно на второе место (сразу после target)
|
||||
download_queue._queue.appendleft({"url": current_url, "fmt": cur_fmt}) # type: ignore
|
||||
# Отменяем задачу — воркер сразу перейдёт к следующему элементу (target)
|
||||
task = active_tasks.get(current_url)
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
|
||||
# 3. Вставляем target в самое начало
|
||||
download_queue._queue.appendleft({"url": url, "fmt": fmt}) # type: ignore
|
||||
db.update_manga_status(url, "queued")
|
||||
|
||||
logger.info("Приоритет: {} → начало очереди (вытеснен: {})", url, current_url)
|
||||
await ws_manager.broadcast({
|
||||
"type": "manga_prioritized",
|
||||
"url": url,
|
||||
"preempted_url": current_url,
|
||||
})
|
||||
return {"ok": True}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.post("/api/mangas/retry_errors")
|
||||
async def retry_errors(url: str):
|
||||
"""Сбросить статус failed/partial глав на pending для повторной загрузки."""
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
# Сбрасываем failed
|
||||
db.conn.execute(
|
||||
"UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=? WHERE manga_url=? AND status='failed'",
|
||||
(db.conn.execute("SELECT datetime('now')").fetchone()[0], url)
|
||||
)
|
||||
# Сбрасываем partial (done, но страниц скачано меньше)
|
||||
db.conn.execute(
|
||||
"""UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=?
|
||||
WHERE manga_url=? AND status='done' AND pages_total > 0 AND pages_done < pages_total""",
|
||||
(db.conn.execute("SELECT datetime('now')").fetchone()[0], url)
|
||||
)
|
||||
db.conn.commit()
|
||||
return {"ok": True}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.post("/api/mangas/refresh_meta")
|
||||
async def refresh_meta(url: str):
|
||||
"""Обновить метаданные (ComicInfo.xml / EPUB OPF / PDF XMP) во всех уже скачанных файлах."""
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
if manga["status"] == "downloading" and url in active_tasks:
|
||||
raise HTTPException(status_code=400, detail="Манга сейчас загружается")
|
||||
finally:
|
||||
db.close()
|
||||
asyncio.create_task(_do_refresh_meta(url))
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
async def _do_refresh_meta(url: str):
|
||||
"""Фоновая задача: обходит все скачанные файлы и обновляет метаданные."""
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
return
|
||||
chapters = db.get_all_chapters(url)
|
||||
chapters_total = len(chapters)
|
||||
pub_status = manga.get("pub_status", "unknown") or "unknown"
|
||||
|
||||
updated = failed = 0
|
||||
for ch in chapters:
|
||||
for fmt_col, ext in (("output_cbz", ".cbz"), ("output_pdf", ".pdf"), ("output_epub", ".epub")):
|
||||
fpath = ch.get(fmt_col)
|
||||
if not fpath:
|
||||
continue
|
||||
p = Path(fpath)
|
||||
if not p.exists():
|
||||
continue
|
||||
meta = MangaMeta(
|
||||
series=manga.get("title_ru") or manga.get("title") or "",
|
||||
series_full=manga.get("title_full") or "",
|
||||
chapter_title=ch.get("title") or "",
|
||||
number=float(ch.get("number") or 0),
|
||||
volume=int(ch.get("volume") or 0),
|
||||
chapters_total=chapters_total,
|
||||
pub_status=pub_status,
|
||||
source_url=url,
|
||||
)
|
||||
if patch_meta(p, meta):
|
||||
updated += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
logger.info("refresh_meta {}: обновлено {}, ошибок {}", url, updated, failed)
|
||||
await ws_manager.broadcast({
|
||||
"type": "meta_refreshed",
|
||||
"url": url,
|
||||
"updated": updated,
|
||||
"failed": failed,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("_do_refresh_meta {}: {}", url, e)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.post("/api/mangas/force_redownload")
|
||||
async def force_redownload(url: str):
|
||||
"""Сбросить все главы на pending и поставить мангу заново в очередь."""
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
if manga["status"] == "downloading" and url in active_tasks:
|
||||
raise HTTPException(status_code=400, detail="Сначала остановите загрузку")
|
||||
|
||||
# Сбрасываем все главы на pending
|
||||
db.conn.execute(
|
||||
"UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=? WHERE manga_url=?",
|
||||
(db.conn.execute("SELECT datetime('now')").fetchone()[0], url)
|
||||
)
|
||||
db.conn.commit()
|
||||
|
||||
# Ставим в очередь с resume=False — перекачает всё заново
|
||||
db.update_manga_status(url, "queued")
|
||||
await download_queue.put({"url": url, "fmt": manga["format"], "resume": False})
|
||||
await ws_manager.broadcast({"type": "manga_queued", "url": url, "format": manga["format"]})
|
||||
return {"ok": True}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.post("/api/mangas/stop")
|
||||
async def stop_manga(url: str):
|
||||
"""Остановить текущую загрузку манги."""
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
|
||||
# Отменяем активную задачу если есть
|
||||
task = active_tasks.get(url)
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
# Статус обновит воркер после CancelledError
|
||||
else:
|
||||
# Манга в очереди (ещё не начата) — просто помечаем как stopped
|
||||
db.update_manga_status(url, "stopped")
|
||||
await ws_manager.broadcast({"type": "manga_stopped", "url": url})
|
||||
|
||||
return {"ok": True}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.post("/api/mangas/resume")
|
||||
async def resume_manga(url: str):
|
||||
"""Возобновить загрузку остановленной/упавшей манги."""
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
if manga["status"] == "downloading" and url in active_tasks:
|
||||
raise HTTPException(status_code=400, detail="Манга уже загружается")
|
||||
|
||||
db.update_manga_status(url, "queued")
|
||||
await download_queue.put({"url": url, "fmt": manga["format"]})
|
||||
await ws_manager.broadcast({"type": "manga_queued", "url": url, "format": manga["format"]})
|
||||
return {"ok": True}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.delete("/api/mangas")
|
||||
async def delete_manga(url: str, delete_files: bool = False):
|
||||
db = StateDB()
|
||||
try:
|
||||
manga = db.get_manga(url)
|
||||
if not manga:
|
||||
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||
if manga["status"] == "downloading" and url in active_tasks:
|
||||
raise HTTPException(status_code=400, detail="Нельзя удалить активную загрузку")
|
||||
|
||||
deleted_size = 0
|
||||
if delete_files:
|
||||
title = manga.get("title") or ""
|
||||
safe_title = re.sub(r'[^\w\s\-]', '', title).strip().replace(" ", "_")[:80]
|
||||
manga_dir = OUTPUT_DIR / safe_title
|
||||
if manga_dir.exists() and manga_dir.is_dir():
|
||||
deleted_size = _dir_size(manga_dir)
|
||||
import shutil
|
||||
shutil.rmtree(str(manga_dir))
|
||||
logger.info("Удалена папка: {} ({} байт)", manga_dir, deleted_size)
|
||||
|
||||
db.conn.execute("DELETE FROM chapters WHERE manga_url=?", (url,))
|
||||
db.conn.execute("DELETE FROM history WHERE manga_url=?", (url,))
|
||||
db.conn.execute("DELETE FROM mangas WHERE url=?", (url,))
|
||||
db.conn.commit()
|
||||
return {"ok": True, "deleted_size": deleted_size}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.get("/api/stats")
|
||||
async def global_stats():
|
||||
db = StateDB()
|
||||
try:
|
||||
mangas = db.get_all_mangas()
|
||||
total_size = _dir_size(OUTPUT_DIR)
|
||||
return {
|
||||
"mangas_total": len(mangas),
|
||||
"mangas_done": sum(1 for m in mangas if m["status"] == "done"),
|
||||
"mangas_downloading": sum(1 for m in mangas if m["status"] == "downloading"),
|
||||
"mangas_queued": sum(1 for m in mangas if m["status"] == "queued"),
|
||||
"mangas_failed": sum(1 for m in mangas if m["status"] == "failed"),
|
||||
"mangas_stopped": sum(1 for m in mangas if m["status"] == "stopped"),
|
||||
"queue_size": download_queue.qsize(),
|
||||
"total_size_bytes": total_size,
|
||||
"total_size_human": _format_size(total_size),
|
||||
}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── WebSocket ─────────────────────────────────
|
||||
|
||||
@app.websocket("/ws")
|
||||
async def websocket_endpoint(ws: WebSocket):
|
||||
await ws_manager.connect(ws)
|
||||
try:
|
||||
# Отправляем начальный снимок состояния
|
||||
db = StateDB()
|
||||
try:
|
||||
mangas = db.get_all_mangas()
|
||||
enriched = [_enrich_manga(m, db) for m in mangas]
|
||||
# Добавляем позицию в очереди
|
||||
queue_list = list(download_queue._queue) # type: ignore
|
||||
for i, job in enumerate(queue_list):
|
||||
for em in enriched:
|
||||
if em["url"] == job["url"]:
|
||||
em["queue_position"] = i + 1
|
||||
await ws.send_json({"type": "snapshot", "mangas": enriched})
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
while True:
|
||||
# Держим соединение живым, ждём пинги
|
||||
data = await ws.receive_text()
|
||||
if data == "ping":
|
||||
await ws.send_json({"type": "pong"})
|
||||
except WebSocketDisconnect:
|
||||
ws_manager.disconnect(ws)
|
||||
except Exception:
|
||||
ws_manager.disconnect(ws)
|
||||
|
||||
|
||||
# ── Статические файлы (фронтенд) ──────────────
|
||||
|
||||
if FRONTEND_DIR.exists():
|
||||
app.mount("/", StaticFiles(directory=str(FRONTEND_DIR), html=True), name="frontend")
|
||||
|
||||
30
src/cli.py
30
src/cli.py
@@ -59,23 +59,26 @@ def cli(ctx, verbose):
|
||||
help="Папка для сохранения", show_default=True)
|
||||
@click.option("--resume/--no-resume", default=True,
|
||||
help="Пропускать уже скачанные главы")
|
||||
@click.option("--force", "-F", is_flag=True, default=False,
|
||||
help="Игнорировать состояние и скачать заново, перезаписывая файлы")
|
||||
@click.option("--concurrency", default=4, show_default=True,
|
||||
help="Параллельных загрузок изображений")
|
||||
@click.pass_context
|
||||
def download(ctx, url, fmt, chapters, output, resume, concurrency):
|
||||
def download(ctx, url, fmt, chapters, output, resume, force, concurrency):
|
||||
"""Скачать мангу по URL страницы."""
|
||||
asyncio.run(_download(
|
||||
url=url,
|
||||
fmt=fmt,
|
||||
chapters_filter=chapters,
|
||||
output_dir=Path(output),
|
||||
resume=resume,
|
||||
resume=resume and not force,
|
||||
force=force,
|
||||
concurrency=concurrency,
|
||||
verbose=ctx.obj.get("verbose", False),
|
||||
))
|
||||
|
||||
|
||||
async def _download(url, fmt, chapters_filter, output_dir, resume, concurrency, verbose):
|
||||
async def _download(url, fmt, chapters_filter, output_dir, resume, force, concurrency, verbose):
|
||||
db = StateDB()
|
||||
|
||||
async with BrowserManager(headless=True) as bm:
|
||||
@@ -106,8 +109,10 @@ async def _download(url, fmt, chapters_filter, output_dir, resume, concurrency,
|
||||
for ch in chapters:
|
||||
pbar.set_description(f"Глава {ch.number}: {ch.title[:30]}")
|
||||
|
||||
# Проверяем статус (resume)
|
||||
if resume and db.chapter_status(ch.url) == "done":
|
||||
# Проверяем статус (resume / force)
|
||||
if force:
|
||||
db.reset_chapter(ch.url)
|
||||
elif resume and db.chapter_status(ch.url) == "done":
|
||||
logger.info("Пропускаем (уже скачана): {}", ch.title)
|
||||
pbar.update(1)
|
||||
continue
|
||||
@@ -116,7 +121,7 @@ async def _download(url, fmt, chapters_filter, output_dir, resume, concurrency,
|
||||
bm=bm, ctx=ctx, ch=ch,
|
||||
manga_url=url,
|
||||
manga_dir=manga_dir, formats=formats,
|
||||
concurrency=concurrency, db=db,
|
||||
concurrency=concurrency, db=db, force=force,
|
||||
)
|
||||
pbar.update(1)
|
||||
|
||||
@@ -126,7 +131,7 @@ async def _download(url, fmt, chapters_filter, output_dir, resume, concurrency,
|
||||
|
||||
|
||||
async def _process_chapter(bm, ctx, ch: Chapter, manga_url: str, manga_dir: Path,
|
||||
formats: list, concurrency: int, db: StateDB):
|
||||
formats: list, concurrency: int, db: StateDB, force: bool = False):
|
||||
# Новая страница для каждой главы (чистый контекст)
|
||||
ch_page = await ctx.new_page()
|
||||
|
||||
@@ -147,6 +152,10 @@ async def _process_chapter(bm, ctx, ch: Chapter, manga_url: str, manga_dir: Path
|
||||
|
||||
for fmt in formats:
|
||||
out_file = manga_dir / f"{ch_name}.{fmt}"
|
||||
# При --force удаляем старый файл перед перезаписью
|
||||
if force and out_file.exists():
|
||||
out_file.unlink()
|
||||
logger.debug("Удалён старый файл: {}", out_file.name)
|
||||
try:
|
||||
export(image_paths, out_file, fmt, manga_dir.name, ch.title)
|
||||
db.mark_done(ch.url, fmt, str(out_file))
|
||||
@@ -243,3 +252,10 @@ if __name__ == "__main__":
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
345
src/exporter.py
345
src/exporter.py
@@ -1,104 +1,278 @@
|
||||
"""
|
||||
Экспорт в CBZ, PDF, EPUB.
|
||||
Экспорт в CBZ, PDF, EPUB с поддержкой метаданных для Komga.
|
||||
"""
|
||||
import zipfile
|
||||
import xml.etree.ElementTree as ET
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
from typing import Literal, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
ExportFormat = Literal["cbz", "pdf", "epub"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MangaMeta:
|
||||
"""Метаданные манги и главы для встраивания в файлы."""
|
||||
series: str = "" # Название серии (title_ru)
|
||||
series_full: str = "" # Полное название
|
||||
chapter_title: str = "" # Название главы
|
||||
number: float = 0.0 # Номер главы
|
||||
volume: int = 0 # Том
|
||||
chapters_total: int = 0 # Всего глав в серии (для completed)
|
||||
pub_status: str = "unknown" # completed / ongoing / unknown
|
||||
source_url: str = "" # URL источника
|
||||
language: str = "ru"
|
||||
summary: str = "" # Описание/синопсис серии
|
||||
genre: str = "" # Жанры через запятую (для ComicInfo Genre)
|
||||
series_group: str = "" # Группа/коллекция (для ComicInfo SeriesGroup)
|
||||
|
||||
|
||||
def export(
|
||||
image_paths: list[Path],
|
||||
output_path: Path,
|
||||
fmt: ExportFormat,
|
||||
title: str = "Manga",
|
||||
chapter: str = "",
|
||||
meta: Optional[MangaMeta] = None,
|
||||
):
|
||||
# Строим meta из legacy-аргументов если не передан явно
|
||||
if meta is None:
|
||||
meta = MangaMeta(series=title, chapter_title=chapter)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
logger.info("Экспортирую {} страниц → {} ({})", len(image_paths), output_path.name, fmt)
|
||||
|
||||
if fmt == "cbz":
|
||||
_export_cbz(image_paths, output_path)
|
||||
_export_cbz(image_paths, output_path, meta)
|
||||
elif fmt == "pdf":
|
||||
_export_pdf(image_paths, output_path)
|
||||
_export_pdf(image_paths, output_path, meta)
|
||||
elif fmt == "epub":
|
||||
_export_epub(image_paths, output_path, title, chapter)
|
||||
_export_epub(image_paths, output_path, meta)
|
||||
else:
|
||||
raise ValueError(f"Неизвестный формат: {fmt}")
|
||||
|
||||
logger.info("Сохранено: {}", output_path)
|
||||
|
||||
|
||||
# ── CBZ ───────────────────────────────────────
|
||||
# ── CBZ + ComicInfo.xml ───────────────────────
|
||||
|
||||
def _export_cbz(images: list[Path], out: Path):
|
||||
def _make_comic_info(meta: MangaMeta) -> str:
|
||||
"""Генерирует ComicInfo.xml по спецификации Anansi v2.1 (Komga-совместимый)."""
|
||||
root = ET.Element("ComicInfo")
|
||||
root.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
|
||||
root.set("xsi:noNamespaceSchemaLocation",
|
||||
"https://raw.githubusercontent.com/anansi-project/comicinfo/main/schema/v2.1/ComicInfo.xsd")
|
||||
|
||||
def add(tag: str, value):
|
||||
if value is None:
|
||||
return
|
||||
s = str(value).strip()
|
||||
if s:
|
||||
ET.SubElement(root, tag).text = s
|
||||
|
||||
add("Series", meta.series)
|
||||
add("Title", meta.chapter_title)
|
||||
add("Summary", meta.summary)
|
||||
|
||||
# Номер главы: целое если без дроби, иначе float
|
||||
if meta.number:
|
||||
num_str = str(int(meta.number)) if meta.number == int(meta.number) else str(meta.number)
|
||||
add("Number", num_str)
|
||||
|
||||
if meta.volume:
|
||||
add("Volume", meta.volume)
|
||||
|
||||
# Count — только для завершённых серий
|
||||
if meta.pub_status == "completed" and meta.chapters_total:
|
||||
add("Count", meta.chapters_total)
|
||||
|
||||
add("Genre", meta.genre)
|
||||
add("LanguageISO", meta.language)
|
||||
|
||||
# Manga = YesAndRightToLeft — стандартная японская манга
|
||||
ET.SubElement(root, "Manga").text = "YesAndRightToLeft"
|
||||
|
||||
if meta.source_url:
|
||||
add("Web", meta.source_url)
|
||||
|
||||
# SeriesGroup — Komga создаёт коллекцию с этим именем
|
||||
if meta.series_group:
|
||||
add("SeriesGroup", meta.series_group)
|
||||
|
||||
ET.indent(root, space=" ")
|
||||
return '<?xml version="1.0" encoding="utf-8"?>\n' + ET.tostring(root, encoding="unicode")
|
||||
|
||||
|
||||
def _export_cbz(images: list[Path], out: Path, meta: MangaMeta):
|
||||
with zipfile.ZipFile(out, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||||
# ComicInfo.xml первым файлом — Komga ищет его в корне архива
|
||||
zf.writestr("ComicInfo.xml", _make_comic_info(meta))
|
||||
for i, img in enumerate(images):
|
||||
zf.write(img, f"{i:04d}{img.suffix}")
|
||||
|
||||
|
||||
# ── PDF ───────────────────────────────────────
|
||||
|
||||
def _export_pdf(images: list[Path], out: Path):
|
||||
def _export_pdf(images: list[Path], out: Path, meta: MangaMeta):
|
||||
try:
|
||||
import img2pdf
|
||||
with open(out, "wb") as f:
|
||||
f.write(img2pdf.convert([str(p) for p in images]))
|
||||
pdf_bytes = img2pdf.convert([str(p) for p in images])
|
||||
out.write_bytes(pdf_bytes)
|
||||
except Exception as e:
|
||||
logger.warning("img2pdf не сработал ({}), использую Pillow", e)
|
||||
_export_pdf_pillow(images, out)
|
||||
|
||||
# Записываем метаданные поверх готового PDF через pypdf
|
||||
_patch_pdf_meta(out, meta)
|
||||
|
||||
|
||||
def _export_pdf_pillow(images: list[Path], out: Path):
|
||||
from PIL import Image
|
||||
pil_images = []
|
||||
for p in images:
|
||||
img = Image.open(p).convert("RGB")
|
||||
pil_images.append(img)
|
||||
pil_images = [Image.open(p).convert("RGB") for p in images]
|
||||
if pil_images:
|
||||
pil_images[0].save(
|
||||
out,
|
||||
save_all=True,
|
||||
append_images=pil_images[1:],
|
||||
format="PDF",
|
||||
)
|
||||
pil_images[0].save(out, save_all=True, append_images=pil_images[1:], format="PDF")
|
||||
|
||||
|
||||
def _patch_pdf_meta(pdf_path: Path, meta: MangaMeta):
|
||||
"""Добавляет /Info и XMP метаданные в PDF через pypdf."""
|
||||
try:
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
import io
|
||||
|
||||
reader = PdfReader(str(pdf_path))
|
||||
writer = PdfWriter()
|
||||
writer.append(reader)
|
||||
|
||||
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
|
||||
full_title = (f"{meta.series} — Том {meta.volume}, Глава {ch_num}"
|
||||
if meta.volume else f"{meta.series} — Глава {ch_num}")
|
||||
if meta.chapter_title:
|
||||
full_title += f": {meta.chapter_title}"
|
||||
|
||||
# Стандартные PDF /Info поля
|
||||
writer.add_metadata({
|
||||
"/Title": full_title,
|
||||
"/Subject": meta.series_full or meta.series,
|
||||
"/Creator": "Manga Downloader",
|
||||
"/Producer": "Manga Downloader",
|
||||
})
|
||||
|
||||
# XMP-метаданные (Dublin Core + PDF) — Komga читает их при сканировании
|
||||
xmp = _build_xmp(meta, full_title)
|
||||
writer.add_metadata_xmp(xmp.encode("utf-8"))
|
||||
|
||||
buf = io.BytesIO()
|
||||
writer.write(buf)
|
||||
pdf_path.write_bytes(buf.getvalue())
|
||||
|
||||
except ImportError:
|
||||
logger.debug("pypdf не установлен — PDF-метаданные пропущены")
|
||||
except Exception as e:
|
||||
logger.warning("Ошибка записи PDF-метаданных: {}", e)
|
||||
|
||||
|
||||
def _build_xmp(meta: MangaMeta, full_title: str) -> str:
|
||||
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
|
||||
return f"""<?xpacket begin='\ufeff' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
||||
<x:xmpmeta xmlns:x='adobe:ns:meta/'>
|
||||
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
|
||||
<rdf:Description rdf:about=''
|
||||
xmlns:dc='http://purl.org/dc/elements/1.1/'
|
||||
xmlns:pdf='http://ns.adobe.com/pdf/1.3/'
|
||||
xmlns:xmp='http://ns.adobe.com/xap/1.0/'>
|
||||
<dc:title><rdf:Alt><rdf:li xml:lang='x-default'>{_xe(full_title)}</rdf:li></rdf:Alt></dc:title>
|
||||
<dc:description><rdf:Alt><rdf:li xml:lang='x-default'>{_xe(meta.series_full or meta.series)}</rdf:li></rdf:Alt></dc:description>
|
||||
<dc:language><rdf:Bag><rdf:li>{meta.language}</rdf:li></rdf:Bag></dc:language>
|
||||
<dc:source>{_xe(meta.source_url)}</dc:source>
|
||||
<pdf:Producer>Manga Downloader</pdf:Producer>
|
||||
</rdf:Description>
|
||||
</rdf:RDF>
|
||||
</x:xmpmeta>
|
||||
<?xpacket end='w'?>"""
|
||||
|
||||
|
||||
def _xe(s: str) -> str:
|
||||
"""Экранирование для XML."""
|
||||
return (s.replace("&", "&").replace("<", "<")
|
||||
.replace(">", ">").replace('"', """))
|
||||
|
||||
|
||||
# ── EPUB ──────────────────────────────────────
|
||||
|
||||
def _export_epub(images: list[Path], out: Path, title: str, chapter: str):
|
||||
def _export_epub(images: list[Path], out: Path, meta: MangaMeta):
|
||||
from ebooklib import epub
|
||||
from PIL import Image
|
||||
import base64
|
||||
|
||||
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
|
||||
full_title = (f"{meta.series} — Том {meta.volume}, Глава {ch_num}"
|
||||
if meta.volume else f"{meta.series} — Глава {ch_num}")
|
||||
if meta.chapter_title:
|
||||
full_title += f": {meta.chapter_title}"
|
||||
|
||||
book = epub.EpubBook()
|
||||
book.set_identifier(f"manga-{title}-{chapter}".replace(" ", "-"))
|
||||
book.set_title(f"{title} — {chapter}" if chapter else title)
|
||||
book.set_language("ru")
|
||||
book.set_identifier(
|
||||
f"manga-{meta.series}-v{meta.volume}-ch{meta.number}".replace(" ", "-")
|
||||
)
|
||||
book.set_title(full_title)
|
||||
book.set_language(meta.language)
|
||||
|
||||
# Dublin Core — серия как subject
|
||||
if meta.series:
|
||||
book.add_metadata("DC", "subject", meta.series)
|
||||
if meta.summary:
|
||||
book.add_metadata("DC", "description", meta.summary)
|
||||
elif meta.series_full:
|
||||
book.add_metadata("DC", "description", meta.series_full)
|
||||
if meta.source_url:
|
||||
book.add_metadata("DC", "source", meta.source_url)
|
||||
|
||||
# Calibre-совместимые метаданные серии (читает Komga и большинство читалок)
|
||||
book.add_metadata(None, "meta", "", {
|
||||
"name": "calibre:series",
|
||||
"content": meta.series,
|
||||
})
|
||||
book.add_metadata(None, "meta", "", {
|
||||
"name": "calibre:series_index",
|
||||
"content": str(float(meta.number)),
|
||||
})
|
||||
|
||||
# EPUB3 belongs-to-collection (официальный стандарт, Komga ≥ 0.157)
|
||||
book.add_metadata(None, "meta", meta.series, {
|
||||
"property": "belongs-to-collection",
|
||||
"id": "series-id",
|
||||
})
|
||||
book.add_metadata(None, "meta", "series", {
|
||||
"refines": "#series-id",
|
||||
"property": "collection-type",
|
||||
})
|
||||
book.add_metadata(None, "meta", str(float(meta.number)), {
|
||||
"refines": "#series-id",
|
||||
"property": "group-position",
|
||||
})
|
||||
|
||||
# Если серия завершена — указываем общее количество томов
|
||||
if meta.pub_status == "completed" and meta.chapters_total:
|
||||
book.add_metadata("DC", "relation",
|
||||
f"chapters_total:{meta.chapters_total}")
|
||||
|
||||
spine = ["nav"]
|
||||
toc = []
|
||||
|
||||
for i, img_path in enumerate(images):
|
||||
# Добавляем изображение в книгу
|
||||
with open(img_path, "rb") as f:
|
||||
img_data = f.read()
|
||||
|
||||
img_data = img_path.read_bytes()
|
||||
img_name = f"images/page_{i:04d}{img_path.suffix}"
|
||||
|
||||
epub_img = epub.EpubImage()
|
||||
epub_img.file_name = img_name
|
||||
epub_img.media_type = _mime(img_path.suffix)
|
||||
epub_img.content = img_data
|
||||
book.add_item(epub_img)
|
||||
|
||||
# HTML-страница для каждого изображения
|
||||
page_html = epub.EpubHtml(
|
||||
title=f"Страница {i + 1}",
|
||||
file_name=f"page_{i:04d}.xhtml",
|
||||
lang="ru",
|
||||
lang=meta.language,
|
||||
)
|
||||
page_html.content = (
|
||||
f'<html><body style="margin:0;padding:0;">'
|
||||
@@ -125,3 +299,110 @@ def _mime(ext: str) -> str:
|
||||
".webp": "image/webp",
|
||||
}.get(ext.lower(), "image/jpeg")
|
||||
|
||||
|
||||
# ── Обновление метаданных в существующих файлах ──
|
||||
|
||||
def patch_meta(file_path: Path, meta: MangaMeta) -> bool:
|
||||
"""
|
||||
Обновляет метаданные в уже существующем файле без перескачивания.
|
||||
Возвращает True при успехе.
|
||||
"""
|
||||
suffix = file_path.suffix.lower()
|
||||
try:
|
||||
if suffix == ".cbz":
|
||||
_patch_cbz_meta(file_path, meta)
|
||||
elif suffix == ".pdf":
|
||||
_patch_pdf_meta(file_path, meta)
|
||||
elif suffix == ".epub":
|
||||
_patch_epub_meta(file_path, meta)
|
||||
else:
|
||||
logger.warning("patch_meta: неизвестный формат {}", suffix)
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("patch_meta {}: {}", file_path.name, e)
|
||||
return False
|
||||
|
||||
|
||||
def _patch_cbz_meta(cbz_path: Path, meta: MangaMeta):
|
||||
"""Заменяет или добавляет ComicInfo.xml в существующем CBZ."""
|
||||
import shutil
|
||||
tmp = cbz_path.with_suffix(".tmp.cbz")
|
||||
try:
|
||||
with zipfile.ZipFile(cbz_path, "r") as zin, \
|
||||
zipfile.ZipFile(tmp, "w", compression=zipfile.ZIP_DEFLATED) as zout:
|
||||
# Сначала ComicInfo.xml
|
||||
zout.writestr("ComicInfo.xml", _make_comic_info(meta))
|
||||
# Затем все остальные файлы (пропускаем старый ComicInfo.xml если был)
|
||||
for item in zin.infolist():
|
||||
if item.filename.lower() != "comicinfo.xml":
|
||||
zout.writestr(item, zin.read(item.filename))
|
||||
shutil.move(str(tmp), str(cbz_path))
|
||||
except Exception:
|
||||
if tmp.exists():
|
||||
tmp.unlink()
|
||||
raise
|
||||
|
||||
|
||||
def _patch_epub_meta(epub_path: Path, meta: MangaMeta):
|
||||
"""
|
||||
Обновляет OPF-метаданные в существующем EPUB.
|
||||
Перезаписывает content.opf с новыми dc:* и meta-тегами.
|
||||
"""
|
||||
import shutil
|
||||
import re as _re
|
||||
|
||||
tmp = epub_path.with_suffix(".tmp.epub")
|
||||
try:
|
||||
with zipfile.ZipFile(epub_path, "r") as zin, \
|
||||
zipfile.ZipFile(tmp, "w", compression=zipfile.ZIP_DEFLATED) as zout:
|
||||
|
||||
# Находим путь к OPF внутри EPUB
|
||||
opf_path = None
|
||||
if "META-INF/container.xml" in zin.namelist():
|
||||
container_xml = zin.read("META-INF/container.xml").decode("utf-8")
|
||||
m = _re.search(r'full-path=["\']([^"\']+\.opf)["\']', container_xml)
|
||||
if m:
|
||||
opf_path = m.group(1)
|
||||
|
||||
for item in zin.infolist():
|
||||
data = zin.read(item.filename)
|
||||
if opf_path and item.filename == opf_path:
|
||||
data = _inject_opf_meta(data.decode("utf-8"), meta).encode("utf-8")
|
||||
zout.writestr(item, data)
|
||||
|
||||
shutil.move(str(tmp), str(epub_path))
|
||||
except Exception:
|
||||
if tmp.exists():
|
||||
tmp.unlink()
|
||||
raise
|
||||
|
||||
|
||||
def _inject_opf_meta(opf: str, meta: MangaMeta) -> str:
|
||||
"""
|
||||
Вставляет/заменяет calibre:series и belongs-to-collection в OPF-строку.
|
||||
Удаляет старые вхождения и добавляет свежие перед </metadata>.
|
||||
"""
|
||||
import re as _re
|
||||
|
||||
# Удаляем старые calibre и belongs-to-collection мета-теги
|
||||
opf = _re.sub(
|
||||
r'<meta[^>]+(?:calibre:series|belongs-to-collection|collection-type|group-position)[^/]*/?>',
|
||||
'', opf, flags=_re.IGNORECASE
|
||||
)
|
||||
# Удаляем старые refines на series-id
|
||||
opf = _re.sub(r'<meta[^>]+refines=["\']#series-id["\'][^/]*/?>',
|
||||
'', opf, flags=_re.IGNORECASE)
|
||||
|
||||
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
|
||||
new_meta = (
|
||||
f'\n <meta name="calibre:series" content="{_xe(meta.series)}"/>'
|
||||
f'\n <meta name="calibre:series_index" content="{float(meta.number)}"/>'
|
||||
f'\n <meta property="belongs-to-collection" id="series-id">{_xe(meta.series)}</meta>'
|
||||
f'\n <meta refines="#series-id" property="collection-type">series</meta>'
|
||||
f'\n <meta refines="#series-id" property="group-position">{float(meta.number)}</meta>'
|
||||
)
|
||||
opf = opf.replace("</metadata>", new_meta + "\n </metadata>")
|
||||
return opf
|
||||
|
||||
|
||||
|
||||
383
src/scraper.py
383
src/scraper.py
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
@@ -30,6 +31,11 @@ class MangaInfo:
|
||||
title: str
|
||||
url: str
|
||||
chapters: list[Chapter] = field(default_factory=list)
|
||||
pub_status: str = "unknown" # completed / ongoing / unknown
|
||||
title_ru: str = "" # Только русский тайтл (для папки)
|
||||
title_full: str = "" # Полный тайтл как на странице
|
||||
description: str = "" # Описание/синопсис
|
||||
genres: list[str] = field(default_factory=list) # Жанры
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
@@ -43,9 +49,21 @@ async def get_manga_info(page: Page, url: str) -> Optional[MangaInfo]:
|
||||
if not ok:
|
||||
return None
|
||||
|
||||
title = await page.title()
|
||||
title = re.sub(r"\s*[-–|].*$", "", title).strip()
|
||||
logger.info("Манга: {}", title)
|
||||
title_full = await page.title()
|
||||
title_full = re.sub(r"\s*[-–|].*$", "", title_full).strip()
|
||||
|
||||
# Пробуем взять русский тайтл напрямую из DOM
|
||||
title_ru = await _extract_ru_title_from_dom(page)
|
||||
if not title_ru:
|
||||
title_ru = _parse_ru_title(title_full)
|
||||
|
||||
logger.info("Манга: {} | ru: {}", title_full, title_ru)
|
||||
|
||||
pub_status = await _extract_pub_status(page)
|
||||
logger.info("Статус выпуска: {}", pub_status)
|
||||
|
||||
description = await _extract_description(page)
|
||||
genres = await _extract_genres(page)
|
||||
|
||||
await _expand_chapters(page)
|
||||
chapters = await _extract_chapters(page)
|
||||
@@ -53,7 +71,162 @@ async def get_manga_info(page: Page, url: str) -> Optional[MangaInfo]:
|
||||
chapters = await _extract_chapters_alt(page)
|
||||
|
||||
logger.info("Найдено глав: {}", len(chapters))
|
||||
return MangaInfo(title=title, url=url, chapters=chapters)
|
||||
return MangaInfo(
|
||||
title=title_ru or title_full,
|
||||
url=url,
|
||||
chapters=chapters,
|
||||
pub_status=pub_status,
|
||||
title_ru=title_ru,
|
||||
title_full=title_full,
|
||||
description=description,
|
||||
genres=genres,
|
||||
)
|
||||
|
||||
|
||||
async def _extract_ru_title_from_dom(page: Page) -> str:
|
||||
"""Ищет русский тайтл в структуре страницы readmanga."""
|
||||
try:
|
||||
result = await page.evaluate("""
|
||||
() => {
|
||||
// readmanga: основной тайтл в span.name внутри .names
|
||||
const selectors = [
|
||||
'.names .name',
|
||||
'h1.manga-title',
|
||||
'h1 .name',
|
||||
'.name-block .name',
|
||||
];
|
||||
for (const sel of selectors) {
|
||||
const el = document.querySelector(sel);
|
||||
if (el && el.textContent.trim()) return el.textContent.trim();
|
||||
}
|
||||
return '';
|
||||
}
|
||||
""")
|
||||
return (result or "").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _parse_ru_title(full_title: str) -> str:
|
||||
"""Извлекает русский тайтл из полной строки тайтла.
|
||||
|
||||
Примеры:
|
||||
'Манга Режим — АД. Хардкорный геймер ... (Hellmode)' → 'Режим — АД. Хардкорный геймер ...'
|
||||
'Манга Магическая битва (Sorcery Fight) Гэгэ онлайн' → 'Магическая битва'
|
||||
'Авантюрист Monster Eater Adventurer' → 'Авантюрист'
|
||||
"""
|
||||
t = full_title.strip()
|
||||
# Убираем префикс "Манга "
|
||||
t = re.sub(r'^Манга\s+', '', t).strip()
|
||||
# Берём только до первой скобки (начало английского тайтла)
|
||||
t = re.split(r'\s*[\(\[]', t)[0].strip()
|
||||
# Убираем суффикс " онлайн"
|
||||
t = re.sub(r'\s+онлайн\s*$', '', t, flags=re.IGNORECASE).strip()
|
||||
|
||||
# Обрезаем хвост из латинских слов.
|
||||
# Правило: стоп только на токене содержащем латиницу (a-zA-Z).
|
||||
# Пунктуация между кириллическими словами (—, –, ., :, !) — сохраняем.
|
||||
words = t.split()
|
||||
result = []
|
||||
for w in words:
|
||||
if re.search(r'[а-яёА-ЯЁ]', w):
|
||||
result.append(w)
|
||||
elif re.search(r'[a-zA-Z]', w):
|
||||
# Первое латинское слово после кириллических — обрезаем здесь
|
||||
if result:
|
||||
break
|
||||
else:
|
||||
# Чисто пунктуационный токен (—, –, ., :, …)
|
||||
# Добавляем только если уже есть кириллические слова (связка внутри)
|
||||
if result:
|
||||
result.append(w)
|
||||
|
||||
# Убираем висячую пунктуацию в конце (если последнее слово — не кириллица)
|
||||
while result and not re.search(r'[а-яёА-ЯЁ]', result[-1]):
|
||||
result.pop()
|
||||
|
||||
if result:
|
||||
t = ' '.join(result)
|
||||
return t
|
||||
|
||||
|
||||
async def _extract_pub_status(page: Page) -> str:
|
||||
"""Извлекает статус выпуска: completed / ongoing / unknown."""
|
||||
try:
|
||||
result = await page.evaluate("""
|
||||
() => {
|
||||
// readmanga хранит статус в .elem_status .value или похожих блоках
|
||||
const statusSelectors = [
|
||||
'.elem_status .value',
|
||||
'.manga-info .status',
|
||||
'[class*="status"] .value',
|
||||
'.property .status',
|
||||
];
|
||||
for (const sel of statusSelectors) {
|
||||
const el = document.querySelector(sel);
|
||||
if (el) {
|
||||
const t = el.textContent.toLowerCase();
|
||||
if (t.includes('завершён') || t.includes('завершен') || t.includes('complete')) return 'completed';
|
||||
if (t.includes('продолжает') || t.includes('ongoing')) return 'ongoing';
|
||||
}
|
||||
}
|
||||
// Fallback: сканируем весь текст страницы
|
||||
const bodyText = document.body ? document.body.innerText.toLowerCase() : '';
|
||||
if (bodyText.includes('выпуск завершён') || bodyText.includes('выпуск завершен')) return 'completed';
|
||||
if (bodyText.includes('продолжается')) return 'ongoing';
|
||||
return 'unknown';
|
||||
}
|
||||
""")
|
||||
return result or "unknown"
|
||||
except Exception:
|
||||
return "unknown"
|
||||
|
||||
|
||||
async def _extract_description(page: Page) -> str:
|
||||
"""Извлекает описание/синопсис манги."""
|
||||
try:
|
||||
result = await page.evaluate("""
|
||||
() => {
|
||||
const selectors = [
|
||||
'.manga-description',
|
||||
'.elem_descr .value',
|
||||
'#tab-description .description-text',
|
||||
'.description',
|
||||
'[itemprop="description"]',
|
||||
];
|
||||
for (const sel of selectors) {
|
||||
const el = document.querySelector(sel);
|
||||
if (el && el.textContent.trim()) return el.textContent.trim();
|
||||
}
|
||||
return '';
|
||||
}
|
||||
""")
|
||||
return (result or "").strip()[:2000] # обрезаем до 2000 символов
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
async def _extract_genres(page: Page) -> list[str]:
|
||||
"""Извлекает список жанров манги."""
|
||||
try:
|
||||
result = await page.evaluate("""
|
||||
() => {
|
||||
const selectors = [
|
||||
'.elem_genre .value a',
|
||||
'.genres a',
|
||||
'[itemprop="genre"]',
|
||||
'.genre-list a',
|
||||
];
|
||||
for (const sel of selectors) {
|
||||
const els = document.querySelectorAll(sel);
|
||||
if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
""")
|
||||
return result or []
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
async def _navigate(page: Page, url: str, retries: int = 3,
|
||||
@@ -218,6 +391,7 @@ async def get_chapter_images_and_download(
|
||||
chapter_url: str,
|
||||
dest_dir: Path,
|
||||
manga_url: str | None = None,
|
||||
on_page: object = None,
|
||||
) -> list[Path]:
|
||||
"""
|
||||
1. Открывает страницу главы (устанавливает DDoS-Guard cookies для CDN).
|
||||
@@ -225,8 +399,11 @@ async def get_chapter_images_and_download(
|
||||
3. Перехватывает img-запросы через page.route() + route.fetch()
|
||||
(браузерный стек — правильные Sec-Fetch-* заголовки, cookies).
|
||||
4. Пролистывает читалку клавишей ArrowRight чтобы загрузить все страницы.
|
||||
5. Retry для страниц с timeout через JS fetch.
|
||||
"""
|
||||
logger.info("Загружаем главу: {}", chapter_url)
|
||||
t_start = time.monotonic()
|
||||
ch_id = chapter_url.split("/")[-1] # короткий идентификатор для логов
|
||||
logger.info("[{}] Загружаем главу: {}", ch_id, chapter_url)
|
||||
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(chapter_url)
|
||||
@@ -240,22 +417,20 @@ async def get_chapter_images_and_download(
|
||||
def _base(u: str) -> str:
|
||||
return u.split("?")[0]
|
||||
|
||||
# CDN домены которые хостят изображения манги (не статику сайта)
|
||||
CDN_RE = re.compile(r"(?<!\bstatic\b)(^|[./])one-way\.work|staticfa\.|cdnmanga|reimg", re.I)
|
||||
IMG_RE = re.compile(r"\.(jpg|jpeg|png|webp)(\?|$)", re.I)
|
||||
# Баннеры/рекламные изображения — игнорируем без логирования
|
||||
BANNER_RE = re.compile(r"466_p\.|570_p\.|banner|advert", re.I)
|
||||
|
||||
# Более точный фильтр: только image-хосты, не resrmr/статика
|
||||
def _is_manga_image(url: str) -> bool:
|
||||
base = _base(url)
|
||||
if not IMG_RE.search(base):
|
||||
if not re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", base, re.I):
|
||||
return False
|
||||
# Исключаем статику сайта (логотипы, иконки, шрифты)
|
||||
if "resrmr." in url or "/static/" in url:
|
||||
return False
|
||||
# Принимаем image CDN
|
||||
return bool(re.search(r"one-way\.work|staticfa\.|rm\.one-way|cdnmanga|reimg", url, re.I))
|
||||
|
||||
captured: dict[str, bytes] = {} # base_url → bytes
|
||||
captured: dict[str, bytes] = {} # base_url → bytes
|
||||
route_errors: dict[str, str] = {} # base_url → текст ошибки
|
||||
route_statuses: dict[str, int] = {} # base_url → HTTP status (не 200/206)
|
||||
lock = asyncio.Lock()
|
||||
|
||||
async def route_handler(route, request):
|
||||
@@ -264,23 +439,47 @@ async def get_chapter_images_and_download(
|
||||
if not _is_manga_image(url):
|
||||
await route.continue_()
|
||||
return
|
||||
# Уже есть — пропускаем
|
||||
if BANNER_RE.search(base):
|
||||
await route.continue_()
|
||||
return
|
||||
async with lock:
|
||||
already = base in captured
|
||||
if already:
|
||||
await route.continue_()
|
||||
return
|
||||
fname = base.split("/")[-1]
|
||||
try:
|
||||
response = await route.fetch()
|
||||
status = response.status
|
||||
body = await response.body()
|
||||
if body and len(body) > 500 and response.status in (200, 206):
|
||||
if body and len(body) > 500 and status in (200, 206):
|
||||
async with lock:
|
||||
if base not in captured:
|
||||
captured[base] = body
|
||||
logger.debug("✓ {}: {} байт", base.split("/")[-1], len(body))
|
||||
logger.debug("[{}] ✓ {}: {} байт", ch_id, fname, len(body))
|
||||
if on_page:
|
||||
try:
|
||||
asyncio.ensure_future(on_page(0, 0))
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
async with lock:
|
||||
route_statuses[base] = status
|
||||
if status not in (200, 206):
|
||||
logger.warning("[{}] CDN HTTP {} для '{}' | {}",
|
||||
ch_id, status, fname, base[-70:])
|
||||
else:
|
||||
logger.warning("[{}] Слишком мал ответ ({} байт) для '{}'",
|
||||
ch_id, len(body), fname)
|
||||
await route.fulfill(response=response)
|
||||
except Exception as e:
|
||||
logger.debug("route.fetch {}: {}", base[-40:], e)
|
||||
err = str(e)
|
||||
async with lock:
|
||||
route_errors[base] = err
|
||||
is_timeout = "timeout" in err.lower()
|
||||
level = logger.warning if is_timeout else logger.warning
|
||||
level("[{}] route.fetch {} '{}': {}",
|
||||
ch_id, "timeout" if is_timeout else "ошибка", fname, err[:150])
|
||||
try:
|
||||
await route.continue_()
|
||||
except Exception:
|
||||
@@ -292,7 +491,7 @@ async def get_chapter_images_and_download(
|
||||
ok = await _navigate(page, load_url, referer=referer)
|
||||
if not ok:
|
||||
await page.unroute("**/*", route_handler)
|
||||
logger.error("Не удалось открыть главу: {}", chapter_url)
|
||||
logger.error("[{}] Не удалось открыть главу после всех retry: {}", ch_id, chapter_url)
|
||||
return []
|
||||
|
||||
# 2. Ждём readerInit
|
||||
@@ -302,63 +501,165 @@ async def get_chapter_images_and_download(
|
||||
".some(s => s.textContent.includes('readerInit'))",
|
||||
timeout=15_000,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("readerInit не появился за 15с")
|
||||
except Exception as e:
|
||||
logger.warning("[{}] readerInit не появился за 15с ({}). "
|
||||
"Продолжаем через DOM-fallback.", ch_id, str(e)[:80])
|
||||
|
||||
# 3. Извлекаем список URL
|
||||
image_urls = await _extract_images_from_js(page)
|
||||
if not image_urls:
|
||||
logger.debug("[{}] JS readerInit не дал URL, пробуем DOM-парсинг", ch_id)
|
||||
image_urls = await _extract_images_from_dom(page)
|
||||
if not image_urls:
|
||||
await page.unroute("**/*", route_handler)
|
||||
logger.error("Список изображений пуст: {}", chapter_url)
|
||||
try:
|
||||
page_info = await page.evaluate("() => document.title + ' | ' + location.href")
|
||||
except Exception:
|
||||
page_info = "?"
|
||||
logger.error("[{}] Список изображений пуст. Текущая страница: {}", ch_id, page_info)
|
||||
return []
|
||||
|
||||
logger.info("Найдено изображений: {}", len(image_urls))
|
||||
logger.info("[{}] Найдено изображений: {}", ch_id, len(image_urls))
|
||||
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
|
||||
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
|
||||
total = len(image_urls)
|
||||
|
||||
# 4. Пролистываем читалку — reader грузит страницы по мере листания
|
||||
def _count_matched() -> int:
|
||||
count = 0
|
||||
for base_url in captured:
|
||||
if base_url in url_to_idx or base_url.split("/")[-1] in filename_to_idx:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
# 4. Пролистываем читалку
|
||||
await asyncio.sleep(1)
|
||||
for i in range(total + 10):
|
||||
async with lock:
|
||||
done = len(captured)
|
||||
stall_count = 0
|
||||
prev_done = -1
|
||||
for i in range(total + 20):
|
||||
done = _count_matched()
|
||||
if done >= total:
|
||||
break
|
||||
try:
|
||||
await page.keyboard.press("ArrowRight")
|
||||
await asyncio.sleep(0.5)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logger.warning("[{}] Ошибка листания на шаге {}: {}", ch_id, i + 1, e)
|
||||
break
|
||||
if i % 20 == 19:
|
||||
async with lock:
|
||||
done = len(captured)
|
||||
logger.debug("Пролистано {}, загружено: {}/{}", i + 1, done, total)
|
||||
done = _count_matched()
|
||||
logger.debug("[{}] Пролистано {}, загружено: {}/{}", ch_id, i + 1, done, total)
|
||||
if done == prev_done:
|
||||
stall_count += 1
|
||||
if stall_count >= 3:
|
||||
logger.warning("[{}] Прогресс завис ({}/{}) после {} листаний — прерываем",
|
||||
ch_id, done, total, i + 1)
|
||||
break
|
||||
else:
|
||||
stall_count = 0
|
||||
prev_done = done
|
||||
|
||||
# Финальное ожидание
|
||||
await asyncio.sleep(3)
|
||||
|
||||
# 5. Retry для страниц с timeout через браузерный JS fetch
|
||||
async with lock:
|
||||
timeout_bases = [u for u, e in route_errors.items()
|
||||
if "timeout" in e.lower() and u not in captured]
|
||||
if timeout_bases:
|
||||
logger.info("[{}] Retry {} страниц с timeout через JS fetch...",
|
||||
ch_id, len(timeout_bases))
|
||||
for retry_base in timeout_bases:
|
||||
if retry_base in captured:
|
||||
continue
|
||||
fname = retry_base.split("/")[-1]
|
||||
try:
|
||||
data_b64 = await page.evaluate("""async (url) => {
|
||||
try {
|
||||
const r = await fetch(url, {credentials: 'include'});
|
||||
if (!r.ok) return null;
|
||||
const buf = await r.arrayBuffer();
|
||||
const bytes = new Uint8Array(buf);
|
||||
let bin = '';
|
||||
for (let b of bytes) bin += String.fromCharCode(b);
|
||||
return btoa(bin);
|
||||
} catch(e) { return null; }
|
||||
}""", retry_base)
|
||||
if data_b64:
|
||||
import base64
|
||||
body = base64.b64decode(data_b64)
|
||||
if len(body) > 500:
|
||||
async with lock:
|
||||
captured[retry_base] = body
|
||||
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
|
||||
else:
|
||||
logger.warning("[{}] Retry вернул {} байт для '{}' — игнорируем",
|
||||
ch_id, len(body), fname)
|
||||
else:
|
||||
logger.warning("[{}] Retry вернул null для '{}' | {}",
|
||||
ch_id, fname, retry_base[-70:])
|
||||
except Exception as e2:
|
||||
logger.warning("[{}] Retry JS ошибка для '{}': {}", ch_id, fname, e2)
|
||||
|
||||
await page.unroute("**/*", route_handler)
|
||||
|
||||
async with lock:
|
||||
done = len(captured)
|
||||
logger.info("Перехвачено: {}/{}", done, total)
|
||||
done = _count_matched()
|
||||
elapsed = time.monotonic() - t_start
|
||||
logger.info("[{}] Перехвачено: {}/{} за {:.1f}с", ch_id, done, total, elapsed)
|
||||
|
||||
# 6. Сохраняем в правильном порядке
|
||||
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
|
||||
|
||||
# 5. Сохраняем в правильном порядке
|
||||
paths: dict[int, Path] = {}
|
||||
unmatched_other: list[str] = []
|
||||
for base_url, body in captured.items():
|
||||
if base_url not in url_to_idx:
|
||||
idx = url_to_idx.get(base_url)
|
||||
if idx is None:
|
||||
fname = base_url.split("/")[-1]
|
||||
idx = filename_to_idx.get(fname)
|
||||
if idx is None:
|
||||
if not BANNER_RE.search(base_url):
|
||||
unmatched_other.append(base_url.split("/")[-1])
|
||||
continue
|
||||
idx = url_to_idx[base_url]
|
||||
ext = _get_ext(base_url)
|
||||
p = dest_dir / f"{idx:04d}{ext}"
|
||||
p.write_bytes(body)
|
||||
paths[idx] = p
|
||||
|
||||
missing = total - len(paths)
|
||||
if missing:
|
||||
logger.warning("Не загружено страниц: {}", missing)
|
||||
if unmatched_other:
|
||||
logger.debug("[{}] Перехвачено, но не совпало с readerInit ({}): {}",
|
||||
ch_id, len(unmatched_other), unmatched_other)
|
||||
|
||||
# 7. Итоговый отчёт по пропущенным страницам
|
||||
missing_idxs = [i for i in range(total) if i not in paths]
|
||||
if missing_idxs:
|
||||
missing_files = [_base(image_urls[i]).split("/")[-1] for i in missing_idxs]
|
||||
missing_full = [_base(image_urls[i]) for i in missing_idxs]
|
||||
|
||||
timeout_miss = [missing_files[j] for j, i in enumerate(missing_idxs)
|
||||
if missing_full[j] in route_errors
|
||||
and "timeout" in route_errors[missing_full[j]].lower()]
|
||||
http_miss = [f"{missing_files[j]}(HTTP {route_statuses.get(missing_full[j], '?')})"
|
||||
for j, i in enumerate(missing_idxs)
|
||||
if missing_full[j] in route_statuses]
|
||||
unrcv = [missing_files[j] for j, i in enumerate(missing_idxs)
|
||||
if missing_full[j] not in route_errors
|
||||
and missing_full[j] not in route_statuses]
|
||||
|
||||
reasons = []
|
||||
if timeout_miss:
|
||||
reasons.append(f"timeout×{len(timeout_miss)}: {timeout_miss}")
|
||||
if http_miss:
|
||||
reasons.append(f"HTTP-err×{len(http_miss)}: {http_miss}")
|
||||
if unrcv:
|
||||
reasons.append(f"не_перехвачено×{len(unrcv)}: {unrcv}")
|
||||
|
||||
logger.warning(
|
||||
"[{}] Пропущено {}/{} стр. | №: {} | причины: {}",
|
||||
ch_id, len(missing_idxs), total,
|
||||
[i + 1 for i in missing_idxs],
|
||||
" | ".join(reasons) if reasons else "неизвестно",
|
||||
)
|
||||
logger.debug("[{}] Полные URL пропущенных: {}", ch_id, missing_full)
|
||||
|
||||
return [paths[i] for i in sorted(paths.keys())]
|
||||
|
||||
|
||||
|
||||
|
||||
215
src/state.py
215
src/state.py
@@ -13,10 +13,31 @@ DB_PATH = Path("/app/state/progress.db")
|
||||
class StateDB:
|
||||
def __init__(self, db_path: Path = DB_PATH):
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.conn = sqlite3.connect(str(db_path))
|
||||
self.conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
||||
self.conn.row_factory = sqlite3.Row
|
||||
self._init()
|
||||
|
||||
def _init(self):
|
||||
self.conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS mangas (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
url TEXT UNIQUE,
|
||||
title TEXT,
|
||||
title_ru TEXT,
|
||||
title_full TEXT,
|
||||
pub_status TEXT DEFAULT 'unknown',
|
||||
auto_update INTEGER DEFAULT 0,
|
||||
last_checked_at TEXT,
|
||||
status TEXT DEFAULT 'queued',
|
||||
format TEXT DEFAULT 'cbz',
|
||||
chapters_total INTEGER DEFAULT 0,
|
||||
chapters_done INTEGER DEFAULT 0,
|
||||
added_at TEXT,
|
||||
updated_at TEXT,
|
||||
started_at TEXT,
|
||||
finished_at TEXT
|
||||
)
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS chapters (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -26,14 +47,137 @@ class StateDB:
|
||||
number REAL,
|
||||
volume INTEGER,
|
||||
status TEXT DEFAULT 'pending',
|
||||
pages_total INTEGER DEFAULT 0,
|
||||
pages_done INTEGER DEFAULT 0,
|
||||
output_cbz TEXT,
|
||||
output_pdf TEXT,
|
||||
output_epub TEXT,
|
||||
updated_at TEXT
|
||||
)
|
||||
""")
|
||||
self.conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
manga_url TEXT NOT NULL,
|
||||
event_type TEXT NOT NULL,
|
||||
chapter_url TEXT,
|
||||
chapter_title TEXT,
|
||||
chapter_number REAL,
|
||||
volume INTEGER,
|
||||
details TEXT,
|
||||
created_at TEXT
|
||||
)
|
||||
""")
|
||||
# Migrate old DB: add missing columns
|
||||
migrations = [
|
||||
("chapters", "pages_total", "INTEGER DEFAULT 0"),
|
||||
("chapters", "pages_done", "INTEGER DEFAULT 0"),
|
||||
("mangas", "title_ru", "TEXT"),
|
||||
("mangas", "title_full", "TEXT"),
|
||||
("mangas", "pub_status", "TEXT DEFAULT 'unknown'"),
|
||||
("mangas", "auto_update", "INTEGER DEFAULT 0"),
|
||||
("mangas", "last_checked_at", "TEXT"),
|
||||
("mangas", "started_at", "TEXT"),
|
||||
("mangas", "finished_at", "TEXT"),
|
||||
]
|
||||
for table, col, typedef in migrations:
|
||||
try:
|
||||
self.conn.execute(f"ALTER TABLE {table} ADD COLUMN {col} {typedef}")
|
||||
except Exception:
|
||||
pass
|
||||
self.conn.commit()
|
||||
|
||||
# ── Mangas ────────────────────────────────────
|
||||
|
||||
def add_manga(self, url: str, fmt: str = "cbz") -> bool:
|
||||
"""Добавляет мангу в очередь. Возвращает True если новая."""
|
||||
cur = self.conn.execute("SELECT id FROM mangas WHERE url=?", (url,))
|
||||
if cur.fetchone():
|
||||
return False
|
||||
self.conn.execute("""
|
||||
INSERT INTO mangas (url, format, status, added_at, updated_at)
|
||||
VALUES (?, ?, 'queued', ?, ?)
|
||||
""", (url, fmt, _now(), _now()))
|
||||
self.conn.commit()
|
||||
return True
|
||||
|
||||
def update_manga_info(self, url: str, title: str, chapters_total: int,
|
||||
title_ru: str = "", title_full: str = "",
|
||||
pub_status: str = "unknown"):
|
||||
self.conn.execute("""
|
||||
UPDATE mangas SET title=?, title_ru=?, title_full=?, pub_status=?,
|
||||
chapters_total=?, updated_at=? WHERE url=?
|
||||
""", (title, title_ru, title_full, pub_status, chapters_total, _now(), url))
|
||||
self.conn.commit()
|
||||
|
||||
def set_auto_update(self, url: str, enabled: bool):
|
||||
self.conn.execute("""
|
||||
UPDATE mangas SET auto_update=?, updated_at=? WHERE url=?
|
||||
""", (1 if enabled else 0, _now(), url))
|
||||
self.conn.commit()
|
||||
|
||||
def set_last_checked(self, url: str):
|
||||
self.conn.execute("""
|
||||
UPDATE mangas SET last_checked_at=?, updated_at=? WHERE url=?
|
||||
""", (_now(), _now(), url))
|
||||
self.conn.commit()
|
||||
|
||||
def update_manga_status(self, url: str, status: str):
|
||||
self.conn.execute("""
|
||||
UPDATE mangas SET status=?, updated_at=? WHERE url=?
|
||||
""", (status, _now(), url))
|
||||
self.conn.commit()
|
||||
|
||||
def mark_started(self, url: str) -> str:
|
||||
"""Записывает время начала загрузки. Возвращает timestamp."""
|
||||
ts = _now()
|
||||
self.conn.execute("""
|
||||
UPDATE mangas SET started_at=?, finished_at=NULL, updated_at=? WHERE url=?
|
||||
""", (ts, ts, url))
|
||||
self.conn.commit()
|
||||
return ts
|
||||
|
||||
def mark_finished(self, url: str) -> str:
|
||||
"""Записывает время окончания загрузки. Возвращает timestamp."""
|
||||
ts = _now()
|
||||
self.conn.execute("""
|
||||
UPDATE mangas SET finished_at=?, updated_at=? WHERE url=?
|
||||
""", (ts, ts, url))
|
||||
self.conn.commit()
|
||||
return ts
|
||||
|
||||
def sync_chapters_done(self, url: str):
|
||||
"""Синхронизирует chapters_done из реального счёта таблицы chapters."""
|
||||
count = self.conn.execute(
|
||||
"SELECT COUNT(*) FROM chapters WHERE manga_url=? AND status='done'", (url,)
|
||||
).fetchone()[0]
|
||||
self.conn.execute(
|
||||
"UPDATE mangas SET chapters_done=?, updated_at=? WHERE url=?",
|
||||
(count, _now(), url)
|
||||
)
|
||||
self.conn.commit()
|
||||
return count
|
||||
|
||||
def increment_manga_chapters_done(self, url: str):
|
||||
# Оставлен для совместимости, но не используется в воркере
|
||||
pass
|
||||
|
||||
def get_manga(self, url: str) -> Optional[dict]:
|
||||
cur = self.conn.execute("SELECT * FROM mangas WHERE url=?", (url,))
|
||||
row = cur.fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def get_all_mangas(self) -> list[dict]:
|
||||
cur = self.conn.execute("SELECT * FROM mangas ORDER BY added_at DESC")
|
||||
return [dict(r) for r in cur.fetchall()]
|
||||
|
||||
def get_manga_format(self, url: str) -> str:
|
||||
cur = self.conn.execute("SELECT format FROM mangas WHERE url=?", (url,))
|
||||
row = cur.fetchone()
|
||||
return row["format"] if row else "cbz"
|
||||
|
||||
# ── Chapters ──────────────────────────────────
|
||||
|
||||
def upsert_chapter(self, manga_url: str, chapter_url: str,
|
||||
title: str = "", number: float = 0, volume: int = 0):
|
||||
self.conn.execute("""
|
||||
@@ -46,6 +190,14 @@ class StateDB:
|
||||
""", (manga_url, chapter_url, title, number, volume, _now()))
|
||||
self.conn.commit()
|
||||
|
||||
def reset_chapter(self, chapter_url: str):
|
||||
self.conn.execute("""
|
||||
UPDATE chapters SET status='pending', pages_total=0, pages_done=0,
|
||||
output_cbz=NULL, output_pdf=NULL, output_epub=NULL, updated_at=?
|
||||
WHERE chapter_url=?
|
||||
""", (_now(), chapter_url))
|
||||
self.conn.commit()
|
||||
|
||||
def mark_done(self, chapter_url: str, fmt: str, output_path: str):
|
||||
col = f"output_{fmt}"
|
||||
self.conn.execute(f"""
|
||||
@@ -60,6 +212,12 @@ class StateDB:
|
||||
""", (_now(), chapter_url))
|
||||
self.conn.commit()
|
||||
|
||||
def update_chapter_pages(self, chapter_url: str, pages_total: int, pages_done: int):
|
||||
self.conn.execute("""
|
||||
UPDATE chapters SET pages_total=?, pages_done=?, updated_at=? WHERE chapter_url=?
|
||||
""", (pages_total, pages_done, _now(), chapter_url))
|
||||
self.conn.commit()
|
||||
|
||||
def get_pending(self, manga_url: str) -> list[dict]:
|
||||
cur = self.conn.execute("""
|
||||
SELECT chapter_url, title, number, volume
|
||||
@@ -67,21 +225,64 @@ class StateDB:
|
||||
WHERE manga_url=? AND status != 'done'
|
||||
ORDER BY volume, number
|
||||
""", (manga_url,))
|
||||
cols = [d[0] for d in cur.description]
|
||||
return [dict(zip(cols, row)) for row in cur.fetchall()]
|
||||
return [dict(r) for r in cur.fetchall()]
|
||||
|
||||
def get_all(self, manga_url: str) -> list[dict]:
|
||||
def get_all_chapters(self, manga_url: str) -> list[dict]:
|
||||
cur = self.conn.execute("""
|
||||
SELECT * FROM chapters WHERE manga_url=? ORDER BY volume, number
|
||||
""", (manga_url,))
|
||||
cols = [d[0] for d in cur.description]
|
||||
return [dict(zip(cols, row)) for row in cur.fetchall()]
|
||||
return [dict(r) for r in cur.fetchall()]
|
||||
|
||||
def chapter_status(self, chapter_url: str) -> Optional[str]:
|
||||
cur = self.conn.execute(
|
||||
"SELECT status FROM chapters WHERE chapter_url=?", (chapter_url,))
|
||||
row = cur.fetchone()
|
||||
return row[0] if row else None
|
||||
return row["status"] if row else None
|
||||
|
||||
def get_all(self, manga_url: str) -> list[dict]:
|
||||
return self.get_all_chapters(manga_url)
|
||||
|
||||
# ── History ───────────────────────────────────
|
||||
|
||||
def add_history(self, manga_url: str, event_type: str,
|
||||
chapter_url: str = "", chapter_title: str = "",
|
||||
chapter_number: float = 0, volume: int = 0,
|
||||
details: str = ""):
|
||||
"""
|
||||
event_type: downloaded | auto_downloaded | new_chapter_found |
|
||||
check_started | check_done
|
||||
"""
|
||||
self.conn.execute("""
|
||||
INSERT INTO history
|
||||
(manga_url, event_type, chapter_url, chapter_title, chapter_number,
|
||||
volume, details, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (manga_url, event_type, chapter_url, chapter_title, chapter_number,
|
||||
volume, details, _now()))
|
||||
self.conn.commit()
|
||||
|
||||
def get_history(self, limit: int = 200, manga_url: str = "") -> list[dict]:
|
||||
if manga_url:
|
||||
cur = self.conn.execute("""
|
||||
SELECT h.*, m.title as manga_title, m.title_ru
|
||||
FROM history h LEFT JOIN mangas m ON h.manga_url = m.url
|
||||
WHERE h.manga_url=? ORDER BY h.created_at DESC LIMIT ?
|
||||
""", (manga_url, limit))
|
||||
else:
|
||||
cur = self.conn.execute("""
|
||||
SELECT h.*, m.title as manga_title, m.title_ru
|
||||
FROM history h LEFT JOIN mangas m ON h.manga_url = m.url
|
||||
ORDER BY h.created_at DESC LIMIT ?
|
||||
""", (limit,))
|
||||
return [dict(r) for r in cur.fetchall()]
|
||||
|
||||
def get_autos(self) -> list[dict]:
|
||||
"""Манги с включённым авто-обновлением."""
|
||||
cur = self.conn.execute("""
|
||||
SELECT * FROM mangas
|
||||
WHERE auto_update=1 AND status NOT IN ('downloading')
|
||||
""")
|
||||
return [dict(r) for r in cur.fetchall()]
|
||||
|
||||
def close(self):
|
||||
self.conn.close()
|
||||
|
||||
380
src/worker.py
Normal file
380
src/worker.py
Normal file
@@ -0,0 +1,380 @@
|
||||
"""
|
||||
Воркер скачивания манги с поддержкой событий прогресса.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from .browser import BrowserManager
|
||||
from .scraper import get_manga_info, get_chapter_images_and_download, Chapter
|
||||
from .exporter import export, MangaMeta
|
||||
from .state import StateDB
|
||||
|
||||
OUTPUT_DIR = Path("/app/output")
|
||||
|
||||
# Читаем из переменных окружения; можно переопределить в docker-compose
|
||||
CHAPTER_CONCURRENCY = int(os.getenv("CHAPTER_CONCURRENCY", "3"))
|
||||
|
||||
|
||||
def _safe_name(s: str) -> str:
|
||||
return re.sub(r'[^\w\s\-]', '', s).strip().replace(" ", "_")[:80]
|
||||
|
||||
|
||||
def _safe_chapter_name(ch: Chapter) -> str:
|
||||
vol = f"v{ch.volume:02d}_" if ch.volume else ""
|
||||
return f"{vol}ch{ch.number:06.1f}"
|
||||
|
||||
|
||||
async def download_manga(
|
||||
url: str,
|
||||
fmt: str = "cbz",
|
||||
output_dir: Path = OUTPUT_DIR,
|
||||
resume: bool = True,
|
||||
is_update: bool = False,
|
||||
on_event: Optional[Callable] = None,
|
||||
chapter_concurrency: int = CHAPTER_CONCURRENCY,
|
||||
):
|
||||
"""Скачать мангу. Главы обрабатываются параллельно (chapter_concurrency штук)."""
|
||||
|
||||
async def emit(event: dict):
|
||||
if on_event:
|
||||
try:
|
||||
await on_event(event)
|
||||
except Exception as e:
|
||||
logger.debug("on_event error: {}", e)
|
||||
|
||||
db = StateDB()
|
||||
db_lock = asyncio.Lock() # защита от параллельных записей в SQLite
|
||||
|
||||
async def db_call(fn, *args, **kwargs):
|
||||
"""Обёртка: все обращения к db идут через общий asyncio.Lock."""
|
||||
async with db_lock:
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
try:
|
||||
await db_call(db.update_manga_status, url, "downloading")
|
||||
started_ts = await db_call(db.mark_started, url)
|
||||
await emit({"type": "manga_start", "url": url, "started_at": started_ts})
|
||||
|
||||
async with BrowserManager(headless=True) as bm:
|
||||
ctx, info_page = await bm.new_page()
|
||||
|
||||
manga = await get_manga_info(info_page, url)
|
||||
await info_page.close()
|
||||
|
||||
if not manga:
|
||||
await db_call(db.update_manga_status, url, "failed")
|
||||
await emit({"type": "manga_failed", "url": url,
|
||||
"error": "Не удалось получить информацию о манге"})
|
||||
return
|
||||
|
||||
await db_call(
|
||||
db.update_manga_info,
|
||||
url,
|
||||
title=manga.title_ru or manga.title,
|
||||
chapters_total=len(manga.chapters),
|
||||
title_ru=manga.title_ru,
|
||||
title_full=manga.title_full,
|
||||
pub_status=manga.pub_status,
|
||||
)
|
||||
await emit({
|
||||
"type": "manga_info",
|
||||
"url": url,
|
||||
"title": manga.title_ru or manga.title,
|
||||
"title_ru": manga.title_ru,
|
||||
"title_full": manga.title_full,
|
||||
"pub_status": manga.pub_status,
|
||||
"chapters_total": len(manga.chapters),
|
||||
})
|
||||
|
||||
folder_name = _safe_name(manga.title_ru or manga.title)
|
||||
manga_dir = output_dir / folder_name
|
||||
manga_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for ch in manga.chapters:
|
||||
await db_call(db.upsert_chapter, url, ch.url, ch.title, ch.number, ch.volume)
|
||||
|
||||
formats = ["cbz", "pdf", "epub"] if fmt == "all" else [fmt]
|
||||
|
||||
# ── Разделяем главы: пропустить / скачать ────────────────────
|
||||
to_skip = []
|
||||
to_download = []
|
||||
for ch in manga.chapters:
|
||||
if resume and (await db_call(db.chapter_status, ch.url)) == "done":
|
||||
to_skip.append(ch)
|
||||
else:
|
||||
to_download.append(ch)
|
||||
|
||||
# Счётчик и блокировка для безопасного обновления из параллельных задач
|
||||
counter_lock = asyncio.Lock()
|
||||
# Начинаем с 0: to_skip-цикл сам доберёт до len(to_skip),
|
||||
# иначе sync_chapters_done() + len(to_skip) = двойной счёт
|
||||
chapters_done = 0
|
||||
|
||||
# Сообщаем о пропущенных главах (уже скачаны)
|
||||
for ch in to_skip:
|
||||
chapters_done += 1
|
||||
await emit({
|
||||
"type": "chapter_skipped",
|
||||
"url": url,
|
||||
"chapter_url": ch.url,
|
||||
"chapter_number": ch.number,
|
||||
"chapter_title": ch.title,
|
||||
"volume": ch.volume,
|
||||
"chapters_done": chapters_done,
|
||||
"chapters_total": len(manga.chapters),
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Параллельность: {} гл одновременно. Пропущено: {}, скачать: {}",
|
||||
chapter_concurrency, len(to_skip), len(to_download),
|
||||
)
|
||||
|
||||
# ── Семафор ограничивает одновременно открытые страницы ───────
|
||||
sem = asyncio.Semaphore(chapter_concurrency)
|
||||
|
||||
async def process_chapter(ch: Chapter) -> None:
|
||||
nonlocal chapters_done
|
||||
async with sem:
|
||||
# Повторная проверка (другая горутина могла скачать)
|
||||
if (await db_call(db.chapter_status, ch.url)) == "done":
|
||||
async with counter_lock:
|
||||
chapters_done += 1
|
||||
done_snap = chapters_done
|
||||
await emit({
|
||||
"type": "chapter_skipped",
|
||||
"url": url,
|
||||
"chapter_url": ch.url,
|
||||
"chapter_number": ch.number,
|
||||
"chapter_title": ch.title,
|
||||
"volume": ch.volume,
|
||||
"chapters_done": done_snap,
|
||||
"chapters_total": len(manga.chapters),
|
||||
})
|
||||
return
|
||||
|
||||
await emit({
|
||||
"type": "chapter_start",
|
||||
"url": url,
|
||||
"chapter_url": ch.url,
|
||||
"chapter_title": ch.title,
|
||||
"chapter_number": ch.number,
|
||||
"volume": ch.volume,
|
||||
"chapters_done": chapters_done,
|
||||
"chapters_total": len(manga.chapters),
|
||||
})
|
||||
|
||||
ch_page = await ctx.new_page()
|
||||
try:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmp_path = Path(tmpdir)
|
||||
pages_done_count = [0]
|
||||
|
||||
async def on_page(page_idx: int, pages_total: int):
|
||||
pages_done_count[0] += 1
|
||||
await db_call(db.update_chapter_pages,
|
||||
ch.url, pages_total, pages_done_count[0])
|
||||
await emit({
|
||||
"type": "page_done",
|
||||
"url": url,
|
||||
"chapter_url": ch.url,
|
||||
"page_idx": page_idx,
|
||||
"pages_done": pages_done_count[0],
|
||||
"pages_total": pages_total,
|
||||
})
|
||||
|
||||
image_paths = await get_chapter_images_and_download(
|
||||
ch_page, ch.url,
|
||||
dest_dir=tmp_path,
|
||||
manga_url=url,
|
||||
on_page=on_page,
|
||||
)
|
||||
|
||||
if not image_paths:
|
||||
logger.error(
|
||||
"Т{} Гл.{} '{}' — get_chapter_images вернул пустой список. "
|
||||
"URL: {}",
|
||||
ch.volume, ch.number, ch.title, ch.url,
|
||||
)
|
||||
await db_call(db.mark_failed, ch.url)
|
||||
await emit({"type": "chapter_failed", "url": url,
|
||||
"chapter_url": ch.url})
|
||||
return
|
||||
|
||||
ch_name = _safe_chapter_name(ch)
|
||||
ch_meta = MangaMeta(
|
||||
series=manga.title_ru or manga.title,
|
||||
series_full=manga.title_full or "",
|
||||
chapter_title=ch.title,
|
||||
number=ch.number,
|
||||
volume=ch.volume,
|
||||
chapters_total=len(manga.chapters),
|
||||
pub_status=manga.pub_status,
|
||||
source_url=url,
|
||||
summary=manga.description,
|
||||
genre=", ".join(manga.genres) if manga.genres else "",
|
||||
)
|
||||
for f in formats:
|
||||
out_file = manga_dir / f"{ch_name}.{f}"
|
||||
try:
|
||||
export(image_paths, out_file, f, meta=ch_meta)
|
||||
await db_call(db.mark_done, ch.url, f, str(out_file))
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
"Ошибка экспорта Т{} Гл.{} → {} | {}: {}",
|
||||
ch.volume, ch.number, f, out_file.name, e,
|
||||
)
|
||||
|
||||
event_type = "auto_downloaded" if is_update else "downloaded"
|
||||
await db_call(
|
||||
db.add_history,
|
||||
manga_url=url,
|
||||
event_type=event_type,
|
||||
chapter_url=ch.url,
|
||||
chapter_title=ch.title,
|
||||
chapter_number=ch.number,
|
||||
volume=ch.volume,
|
||||
)
|
||||
|
||||
async with counter_lock:
|
||||
chapters_done += 1
|
||||
done_snap = chapters_done
|
||||
|
||||
await emit({
|
||||
"type": "chapter_done",
|
||||
"url": url,
|
||||
"chapter_url": ch.url,
|
||||
"chapter_title": ch.title,
|
||||
"chapter_number": ch.number,
|
||||
"volume": ch.volume,
|
||||
"chapters_done": done_snap,
|
||||
"chapters_total": len(manga.chapters),
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
"Необработанное исключение в Т{} Гл.{} '{}' | {}: {}",
|
||||
ch.volume, ch.number, ch.title, ch.url, e,
|
||||
)
|
||||
await db_call(db.mark_failed, ch.url)
|
||||
await emit({"type": "chapter_failed", "url": url,
|
||||
"chapter_url": ch.url, "error": str(e)})
|
||||
finally:
|
||||
await ch_page.close()
|
||||
|
||||
# ── Запускаем все задачи сразу; семафор дозирует параллельность ──
|
||||
tasks = [process_chapter(ch) for ch in to_download]
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Логируем неожиданные исключения из gather
|
||||
for ch, res in zip(to_download, results):
|
||||
if isinstance(res, Exception) and not isinstance(res, asyncio.CancelledError):
|
||||
logger.exception(
|
||||
"gather: необработанное исключение Т{} Гл.{} '{}': {}",
|
||||
ch.volume, ch.number, ch.title, res,
|
||||
)
|
||||
|
||||
real_done = await db_call(db.sync_chapters_done, url)
|
||||
await db_call(db.update_manga_status, url, "done")
|
||||
finished_ts = await db_call(db.mark_finished, url)
|
||||
await db_call(db.set_last_checked, url)
|
||||
await emit({
|
||||
"type": "manga_done",
|
||||
"url": url,
|
||||
"chapters_done": real_done,
|
||||
"chapters_total": len(manga.chapters),
|
||||
"finished_at": finished_ts,
|
||||
})
|
||||
await ctx.close()
|
||||
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Manga worker error {}: {}", url, e)
|
||||
await db_call(db.update_manga_status, url, "failed")
|
||||
finished_ts = await db_call(db.mark_finished, url)
|
||||
await emit({"type": "manga_failed", "url": url, "error": str(e), "finished_at": finished_ts})
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def check_for_updates(
|
||||
url: str,
|
||||
on_event: Optional[Callable] = None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Проверяет наличие новых глав для манги.
|
||||
Возвращает список новых chapter_url.
|
||||
"""
|
||||
async def emit(event: dict):
|
||||
if on_event:
|
||||
try:
|
||||
await on_event(event)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
db = StateDB()
|
||||
try:
|
||||
db.set_last_checked(url)
|
||||
db.add_history(manga_url=url, event_type="check_started")
|
||||
await emit({"type": "check_started", "url": url})
|
||||
|
||||
async with BrowserManager(headless=True) as bm:
|
||||
_, page = await bm.new_page()
|
||||
manga = await get_manga_info(page, url)
|
||||
await page.close()
|
||||
if not manga:
|
||||
return []
|
||||
|
||||
# Обновляем pub_status и количество глав
|
||||
db.update_manga_info(
|
||||
url,
|
||||
title=manga.title_ru or manga.title,
|
||||
chapters_total=len(manga.chapters),
|
||||
title_ru=manga.title_ru,
|
||||
title_full=manga.title_full,
|
||||
pub_status=manga.pub_status,
|
||||
)
|
||||
|
||||
# Находим главы которых ещё нет в БД
|
||||
known = {ch["chapter_url"] for ch in db.get_all_chapters(url)}
|
||||
new_chapters = [ch for ch in manga.chapters if ch.url not in known]
|
||||
|
||||
for ch in new_chapters:
|
||||
db.upsert_chapter(url, ch.url, ch.title, ch.number, ch.volume)
|
||||
db.add_history(
|
||||
manga_url=url,
|
||||
event_type="new_chapter_found",
|
||||
chapter_url=ch.url,
|
||||
chapter_title=ch.title,
|
||||
chapter_number=ch.number,
|
||||
volume=ch.volume,
|
||||
)
|
||||
await emit({
|
||||
"type": "new_chapter_found",
|
||||
"url": url,
|
||||
"chapter_url": ch.url,
|
||||
"chapter_title": ch.title,
|
||||
"chapter_number": ch.number,
|
||||
})
|
||||
|
||||
db.add_history(
|
||||
manga_url=url,
|
||||
event_type="check_done",
|
||||
details=f"Найдено новых: {len(new_chapters)}",
|
||||
)
|
||||
await emit({
|
||||
"type": "check_done",
|
||||
"url": url,
|
||||
"new_chapters": len(new_chapters),
|
||||
})
|
||||
|
||||
return [ch.url for ch in new_chapters]
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
Reference in New Issue
Block a user