diff --git a/frontend/index.html b/frontend/index.html
index 1a80227..4ba7fbb 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -108,7 +108,7 @@
Добавить мангу
-
+
-
+
+
+
+
+
+
+ 🔗 Источник:
+
+
+
+
⚠ Домен не распознан. Выберите источник вручную:
+
@@ -132,6 +145,8 @@
class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">🔔 Новости
+
@@ -177,6 +192,36 @@
Загрузка...
+
+
+
+
+
Источники
+
Источники определяются в коде приложения. Здесь можно управлять доменами для каждого источника.
+
+
+
+
+
+
+
+
+
+
↔ Сменить источник
+
+
+
+
+
+
@@ -281,6 +326,7 @@ const state = {
mangas: {}, // url → manga object
chapters: {}, // manga_url → [chapter, ...]
filter: 'all',
+ sources: [], // [{id, slug, display_name, domains}]
};
// ── Auth ─────────────────────────────────────
@@ -407,8 +453,12 @@ function handleEvent(msg) {
case 'manga_queued':
if(!state.mangas[msg.url]) {
+ const srcInfo = msg.source_id ? (state.sources.find(s => s.id === msg.source_id) || null) : null;
state.mangas[msg.url] = { url: msg.url, title: msg.url, status: 'queued', format: msg.format,
- chapters_total: 0, chapters_done: 0, size_human: '—' };
+ chapters_total: 0, chapters_done: 0, size_human: '—',
+ source: srcInfo ? {id: srcInfo.id, slug: srcInfo.slug, display_name: srcInfo.display_name} : null };
+ } else {
+ state.mangas[msg.url].status = 'queued';
}
renderList();
loadStats();
@@ -597,6 +647,25 @@ function handleEvent(msg) {
renderList();
loadStats();
break;
+
+ case 'source_unknown':
+ _showNotification('⚠ Источник не определён для ' + (state.mangas[msg.url]?.title || msg.url) + '. Выберите источник.', 'warn');
+ if(state.mangas[msg.url]) { state.mangas[msg.url].status = 'failed'; renderList(); }
+ break;
+
+ case 'source_domain_added':
+ case 'source_domain_removed':
+ loadSources();
+ break;
+
+ case 'source_switched':
+ if(state.mangas[msg.url]) {
+ // Обновляем source у манги из актуального списка источников
+ const newSrc = state.sources.find(s => s.id === msg.new_source_id);
+ if(newSrc) state.mangas[msg.url].source = {id: newSrc.id, slug: newSrc.slug, display_name: newSrc.display_name};
+ updateMangaRow(msg.url);
+ }
+ break;
}
}
@@ -604,7 +673,7 @@ function handleEvent(msg) {
let newsUnreadCount = 0;
function switchTab(tab) {
- ['mangas', 'news', 'history'].forEach(t => {
+ ['mangas', 'news', 'history', 'settings'].forEach(t => {
document.getElementById('tab-content-'+t).classList.toggle('hidden', t !== tab);
const btn = document.getElementById('tab-'+t);
btn.className = t === tab
@@ -614,6 +683,8 @@ function switchTab(tab) {
document.getElementById('manga-filters').classList.toggle('hidden', tab !== 'mangas');
if(tab === 'history') loadHistory();
if(tab === 'news') { newsUnreadCount = 0; updateNewsBadge(); loadNews(); }
+ if(tab === 'settings') loadSources();
+}
}
function updateNewsBadge() {
@@ -773,6 +844,66 @@ async function checkNowBtn(btn, url) {
}
}
+// ── Source detection ─────────────────────────
+let _resolveTimer = null;
+let _resolvedSourceId = null; // null = found via domain, undefined = unknown
+
+async function onUrlInputChange() {
+ clearTimeout(_resolveTimer);
+ _resolveTimer = setTimeout(_resolveSource, 400);
+}
+
+async function _resolveSource() {
+ const raw = document.getElementById('url-input').value.trim();
+ const hint = document.getElementById('source-hint');
+ const hintFound = document.getElementById('source-hint-found');
+ const hintUnknown = document.getElementById('source-hint-unknown');
+
+ // Берём первый непустой URL
+ const url = raw.split('\n').map(u=>u.trim()).filter(Boolean)[0];
+ if(!url) {
+ hint.classList.add('hidden');
+ _resolvedSourceId = null;
+ document.getElementById('add-btn').disabled = false;
+ return;
+ }
+
+ try {
+ const r = await fetch('/api/resolve-source?url=' + encodeURIComponent(url));
+ const data = await r.json();
+ hint.classList.remove('hidden');
+
+ if(data.source) {
+ hintFound.classList.remove('hidden');
+ hintUnknown.classList.add('hidden');
+ document.getElementById('source-hint-name').textContent = data.source.display_name;
+ _resolvedSourceId = data.source.id;
+ document.getElementById('add-btn').disabled = false;
+ } else {
+ hintFound.classList.add('hidden');
+ hintUnknown.classList.remove('hidden');
+ _resolvedSourceId = undefined; // неизвестен — нужен ручной выбор
+ document.getElementById('add-btn').disabled = true;
+ // Заполняем список источников
+ const sel = document.getElementById('source-manual-select');
+ sel.innerHTML = '';
+ (state.sources || []).forEach(s => {
+ const opt = document.createElement('option');
+ opt.value = s.id;
+ opt.textContent = s.display_name;
+ sel.appendChild(opt);
+ });
+ sel.onchange = () => {
+ document.getElementById('add-btn').disabled = !sel.value;
+ };
+ }
+ } catch(e) {
+ hint.classList.add('hidden');
+ _resolvedSourceId = null;
+ document.getElementById('add-btn').disabled = false;
+ }
+}
+
// ── API ──────────────────────────────────────
async function loadStats() {
try {
@@ -788,17 +919,35 @@ async function addToQueue() {
const urls = raw.split('\n').map(u=>u.trim()).filter(Boolean);
if(!urls.length) return;
+ // Определяем source_id
+ let sourceId = null;
+ if(_resolvedSourceId === undefined) {
+ // Неизвестный домен — нужен ручной выбор
+ const manualVal = document.getElementById('source-manual-select').value;
+ if(!manualVal) { alert('Выберите источник для добавления манги'); return; }
+ sourceId = parseInt(manualVal);
+ } else if(_resolvedSourceId !== null) {
+ sourceId = _resolvedSourceId;
+ }
+
try {
+ const body = {urls, format: fmt};
+ if(sourceId !== null) body.source_id = sourceId;
const r = await fetch('/api/queue', {
method:'POST',
headers:{'Content-Type':'application/json'},
- body: JSON.stringify({urls, format: fmt})
+ body: JSON.stringify(body)
});
const data = await r.json();
const msg = document.getElementById('add-msg');
msg.textContent = `✓ Добавлено: ${data.added.length}, уже есть: ${data.skipped.length}`;
msg.classList.remove('hidden');
- if(data.added.length) document.getElementById('url-input').value = '';
+ if(data.added.length) {
+ document.getElementById('url-input').value = '';
+ document.getElementById('source-hint').classList.add('hidden');
+ _resolvedSourceId = null;
+ document.getElementById('add-btn').disabled = false;
+ }
setTimeout(()=>msg.classList.add('hidden'), 4000);
} catch(e) {
alert('Ошибка: ' + e.message);
@@ -825,6 +974,193 @@ async function resumeManga(url) {
}
}
+// ── Sources ───────────────────────────────────
+async function loadSources() {
+ try {
+ const r = await fetch('/api/sources');
+ if(r.ok) {
+ state.sources = await r.json();
+ if(!document.getElementById('tab-content-settings').classList.contains('hidden')) {
+ renderSources();
+ }
+ }
+ } catch(e) {}
+}
+
+function renderSources() {
+ const container = document.getElementById('sources-list');
+ if(!container) return;
+ if(!state.sources.length) {
+ container.innerHTML = 'Нет доступных источников
';
+ return;
+ }
+ container.innerHTML = state.sources.map(s => `
+
+
+
+ ${escHtml(s.display_name)}
+ slug: ${escHtml(s.slug)}
+
+
+
+ ${s.domains.map(d => `
+
+ ${escHtml(d)}
+
+
+ `).join('')}
+
+
+
+
+
+ `).join('');
+}
+
+function showAddDomain(sourceId) {
+ const area = document.getElementById('add-domain-area-' + sourceId);
+ if(!area) return;
+ area.innerHTML = `
+
+
+
+
+
+ `;
+ setTimeout(() => document.getElementById('new-domain-input-' + sourceId)?.focus(), 50);
+}
+
+async function addDomain(sourceId) {
+ const input = document.getElementById('new-domain-input-' + sourceId);
+ if(!input) return;
+ const domain = input.value.trim().toLowerCase();
+ if(!domain) return;
+ try {
+ const r = await fetch(`/api/sources/${sourceId}/domains`, {
+ method: 'POST',
+ headers: {'Content-Type': 'application/json'},
+ body: JSON.stringify({domain}),
+ });
+ if(!r.ok) {
+ const err = await r.json();
+ _showNotification('Ошибка: ' + (err.detail || 'неизвестная ошибка'), 'error');
+ return;
+ }
+ await loadSources();
+ } catch(e) {
+ _showNotification('Ошибка: ' + e.message, 'error');
+ }
+}
+
+async function removeDomain(sourceId, domain) {
+ if(!confirm(`Удалить домен «${domain}»?`)) return;
+ try {
+ const r = await fetch(`/api/sources/${sourceId}/domains/${encodeURIComponent(domain)}`, {method: 'DELETE'});
+ if(!r.ok) {
+ const err = await r.json();
+ _showNotification('Ошибка: ' + (err.detail || 'неизвестная ошибка'), 'error');
+ return;
+ }
+ await loadSources();
+ } catch(e) {
+ _showNotification('Ошибка: ' + e.message, 'error');
+ }
+}
+
+// ── Switch Source Modal ───────────────────────
+let _switchSourceUrl = null;
+
+function openSwitchSourceModal(url) {
+ _switchSourceUrl = url;
+ const manga = state.mangas[url];
+ const modal = document.getElementById('switch-source-modal');
+ const sel = document.getElementById('switch-source-select');
+ const warning = document.getElementById('switch-source-warning');
+
+ document.getElementById('switch-source-current').textContent =
+ 'Текущий источник: ' + (manga?.source?.display_name || 'не определён');
+
+ sel.innerHTML = '';
+ state.sources.forEach(s => {
+ const opt = document.createElement('option');
+ opt.value = s.id;
+ opt.textContent = s.display_name;
+ if(manga?.source?.id === s.id) opt.selected = true;
+ sel.appendChild(opt);
+ });
+
+ try {
+ const domain = new URL(url).hostname.replace(/^www\./, '');
+ warning.textContent = `⚠ Домен «${domain}» будет перепривязан к выбранному источнику. Это затронет все манги с этого домена.`;
+ warning.classList.remove('hidden');
+ } catch(e) { warning.classList.add('hidden'); }
+
+ modal.classList.remove('hidden');
+ modal.classList.add('flex');
+}
+
+function closeSwitchSourceModal() {
+ _switchSourceUrl = null;
+ const modal = document.getElementById('switch-source-modal');
+ modal.classList.add('hidden');
+ modal.classList.remove('flex');
+}
+
+async function confirmSwitchSource() {
+ const url = _switchSourceUrl;
+ const sourceId = parseInt(document.getElementById('switch-source-select').value);
+ if(!url || !sourceId) return;
+ try {
+ const r = await fetch('/api/mangas/switch-source', {
+ method: 'POST',
+ headers: {'Content-Type': 'application/json'},
+ body: JSON.stringify({url, source_id: sourceId}),
+ });
+ if(!r.ok) {
+ const err = await r.json();
+ _showNotification('Ошибка: ' + (err.detail || 'неизвестная ошибка'), 'error');
+ return;
+ }
+ const data = await r.json();
+ closeSwitchSourceModal();
+ _showNotification(
+ `✓ Источник изменён на «${data.source_name}»` +
+ (data.chapters_reset ? `. Сброшено глав: ${data.chapters_reset}` : ''), 'ok'
+ );
+ if(state.mangas[url]) {
+ const src = state.sources.find(s => s.id === sourceId);
+ if(src) state.mangas[url].source = {id: src.id, slug: src.slug, display_name: src.display_name};
+ updateMangaRow(url);
+ }
+ } catch(e) {
+ _showNotification('Ошибка: ' + e.message, 'error');
+ }
+}
+
+document.addEventListener('click', function(e) {
+ const modal = document.getElementById('switch-source-modal');
+ if(modal && !modal.classList.contains('hidden') && e.target === modal) closeSwitchSourceModal();
+});
+
+// ── Notification helper ───────────────────────
+function _showNotification(text, type='ok') {
+ const el = document.getElementById('add-msg');
+ if(!el) return;
+ el.textContent = text;
+ el.style.color = type === 'error' ? '#f87171' : type === 'warn' ? '#fbbf24' : '#4ade80';
+ el.classList.remove('hidden');
+ setTimeout(() => el.classList.add('hidden'), 5000);
+}
+
// ── Delete modal ─────────────────────────────
let _deleteUrl = null;
let _deleteFilesChecked = false;
@@ -1011,6 +1347,12 @@ function pubStatusPill(s) {
return `${map[s]}`;
}
+function _sourceBadge(source) {
+ if(!source) return 'Источник неизвестен';
+ if(source.slug === 'unknown') return '' + escHtml(source.display_name) + '';
+ return '' + escHtml(source.display_name) + '';
+}
+
// ── Время загрузки ────────────────────────────
// Храним интервал живого таймера: url → intervalId
const _timerIntervals = {};
@@ -1126,6 +1468,7 @@ function renderMangaRow(m) {
${statusPill(m.status)}
${pubStatusPill(m.pub_status || 'unknown')}
+ ${_sourceBadge(m.source)}
${escHtml(m.title || m.url)}
@@ -1170,6 +1513,11 @@ function _rowButtons(m) {
title="${m.errors_count} проблем при загрузке"
style="background:#450a0a;color:#fca5a5;padding:4px 8px;border-radius:6px;font-size:0.75rem;cursor:pointer">⚠️ ${m.errors_count}`
: ''}
+ ${!isActive
+ ? ``
+ : ''}
${isActive
? ``
: ''}
@@ -1282,6 +1630,7 @@ function _patchRow(el, m) {
set('status', statusPill(m.status));
set('pubstatus', pubStatusPill(m.pub_status || 'unknown'));
+ set('source', _sourceBadge(m.source));
set('title', escHtml(m.title || m.url));
set('chcount', `📖 ${chDone}/${chTotal} глав`);
set('size', `💾 ${m.size_human || '—'}`);
@@ -1661,6 +2010,7 @@ async function saveRenameFolder() {
async function initApp() {
_initDeleteModal();
await loadStats();
+ await loadSources();
connectWS();
// Загружаем список манги
try {
diff --git a/src/api.py b/src/api.py
index 9aa22f7..244e7b5 100644
--- a/src/api.py
+++ b/src/api.py
@@ -20,6 +20,7 @@ from loguru import logger
from .state import StateDB
from .worker import download_manga, check_for_updates
from .exporter import patch_meta, MangaMeta
+from .sources import registry, get_source_for_url, extract_domain
OUTPUT_DIR = Path("/app/output")
FRONTEND_DIR = Path("/app/frontend")
@@ -172,6 +173,16 @@ async def _queue_worker_loop():
@app.on_event("startup")
async def startup_event():
+ # Синхронизируем источники с кодом и мигрируем существующие манги
+ _db = StateDB()
+ try:
+ _db.sync_sources(registry)
+ migrated = _db.migrate_manga_sources()
+ if migrated:
+ logger.info("Авто-миграция: проставлен source_id для {} манг", migrated)
+ finally:
+ _db.close()
+
asyncio.create_task(queue_worker())
asyncio.create_task(update_scheduler())
# Восстанавливаем очередь из БД (незавершённые задачи)
@@ -365,6 +376,16 @@ def _enrich_manga(m: dict, db: StateDB) -> dict:
AND pages_total > 0 AND pages_done < pages_total""",
(m["url"],)
).fetchone()[0]
+
+ # Источник
+ source_info = None
+ if m.get("source_id"):
+ src = db.get_source_by_id(m["source_id"])
+ if src:
+ source_info = {"id": src["id"], "slug": src["slug"], "display_name": src["display_name"]}
+ else:
+ source_info = {"id": m["source_id"], "slug": "unknown", "display_name": "Источник недоступен"}
+
return {
**m,
"chapters_done": ch_done_count,
@@ -375,6 +396,7 @@ def _enrich_manga(m: dict, db: StateDB) -> dict:
"errors_count": ch_failed + ch_partial,
"started_at": m.get("started_at"),
"finished_at": m.get("finished_at"),
+ "source": source_info,
}
@@ -454,6 +476,7 @@ def _manga_detail(manga: dict, db: StateDB) -> dict:
class AddMangaRequest(BaseModel):
urls: List[str]
format: str = "cbz"
+ source_id: Optional[int] = None # явный выбор источника (для неизвестных доменов)
# ── Auth API ─────────────────────────────────
@@ -536,7 +559,24 @@ async def add_to_queue(body: AddMangaRequest):
url = url.strip()
if not url:
continue
- is_new = db.add_manga(url, body.format)
+
+ # Определяем source_id: явный из запроса или авто по домену
+ source_id = body.source_id
+ if source_id is None:
+ domain = extract_domain(url)
+ source_row = db.get_source_by_domain(domain)
+ if source_row:
+ source_id = source_row["id"]
+
+ # Если источник указан явно — привязываем домен к нему
+ if body.source_id is not None:
+ domain = extract_domain(url)
+ existing = db.get_source_by_domain(domain)
+ if existing and existing["id"] != body.source_id:
+ db.remove_domain(existing["id"], domain)
+ db.add_domain(body.source_id, domain)
+
+ is_new = db.add_manga(url, body.format, source_id=source_id)
if is_new:
await download_queue.put({"url": url, "fmt": body.format})
added.append(url)
@@ -544,9 +584,9 @@ async def add_to_queue(body: AddMangaRequest):
"type": "manga_queued",
"url": url,
"format": body.format,
+ "source_id": source_id,
})
await _broadcast_queue_positions()
- # Запускаем фоновую задачу предпросмотра (без Chromium — быстро)
asyncio.create_task(_fetch_preview(url))
else:
skipped.append(url)
@@ -559,15 +599,27 @@ async def _fetch_preview(url: str):
"""Быстро получает название и количество глав сразу после добавления."""
try:
from .browser import BrowserManager
- from .scraper import get_manga_info
- async with BrowserManager(headless=True) as bm:
- _, page = await bm.new_page()
- manga = await get_manga_info(page, url)
- if not manga:
- return
db = StateDB()
try:
- db.update_manga_info(
+ source = get_source_for_url(url, db)
+ if source is None:
+ manga_row = db.get_manga(url)
+ if manga_row and manga_row.get("source_id"):
+ source = registry.get_by_db_id(manga_row["source_id"], db)
+ finally:
+ db.close()
+
+ if source is None:
+ return
+
+ async with BrowserManager(headless=True) as bm:
+ _, page = await bm.new_page()
+ manga = await source.get_manga_info(page, url)
+ if not manga:
+ return
+ db2 = StateDB()
+ try:
+ db2.update_manga_info(
url,
title=manga.title_ru or manga.title,
chapters_total=len(manga.chapters),
@@ -576,7 +628,7 @@ async def _fetch_preview(url: str):
pub_status=manga.pub_status,
)
finally:
- db.close()
+ db2.close()
await ws_manager.broadcast({
"type": "manga_preview",
"url": url,
@@ -996,6 +1048,151 @@ async def delete_manga(url: str, delete_files: bool = False):
db.close()
+
+# ── Sources API ───────────────────────────────
+
+class DomainAdd(BaseModel):
+ domain: str
+
+
+class SwitchSourceRequest(BaseModel):
+ url: str
+ source_id: int
+
+
+@app.get("/api/sources")
+async def list_sources():
+ """Список всех источников с доменами."""
+ db = StateDB()
+ try:
+ return db.get_all_sources()
+ finally:
+ db.close()
+
+
+@app.get("/api/resolve-source")
+async def resolve_source(url: str):
+ """Определить источник по URL. Возвращает {id, slug, display_name} или null."""
+ db = StateDB()
+ try:
+ domain = extract_domain(url)
+ row = db.get_source_by_domain(domain)
+ if not row:
+ return {"source": None, "domain": domain}
+ return {
+ "source": {
+ "id": row["id"],
+ "slug": row["slug"],
+ "display_name": row["display_name"],
+ },
+ "domain": domain,
+ }
+ finally:
+ db.close()
+
+
+@app.post("/api/sources/{source_id}/domains")
+async def add_domain(source_id: int, body: DomainAdd):
+ """Добавить домен к источнику."""
+ db = StateDB()
+ try:
+ source = db.get_source_by_id(source_id)
+ if not source:
+ raise HTTPException(status_code=404, detail="Источник не найден")
+ domain = body.domain.lower().strip()
+ if not domain:
+ raise HTTPException(status_code=400, detail="Домен не может быть пустым")
+ # Проверяем не занят ли домен другим источником
+ existing = db.get_source_by_domain(domain)
+ if existing and existing["id"] != source_id:
+ raise HTTPException(
+ status_code=409,
+ detail=f"Домен уже привязан к источнику «{existing['display_name']}»"
+ )
+ ok = db.add_domain(source_id, domain)
+ if not ok:
+ raise HTTPException(status_code=409, detail="Домен уже существует")
+ await ws_manager.broadcast({
+ "type": "source_domain_added",
+ "source_id": source_id,
+ "domain": domain,
+ })
+ return {"ok": True, "domain": domain}
+ finally:
+ db.close()
+
+
+@app.delete("/api/sources/{source_id}/domains/{domain:path}")
+async def remove_domain(source_id: int, domain: str):
+ """Удалить домен у источника."""
+ db = StateDB()
+ try:
+ source = db.get_source_by_id(source_id)
+ if not source:
+ raise HTTPException(status_code=404, detail="Источник не найден")
+ ok = db.remove_domain(source_id, domain)
+ if not ok:
+ raise HTTPException(status_code=404, detail="Домен не найден")
+ await ws_manager.broadcast({
+ "type": "source_domain_removed",
+ "source_id": source_id,
+ "domain": domain,
+ })
+ return {"ok": True}
+ finally:
+ db.close()
+
+
+@app.post("/api/mangas/switch-source")
+async def switch_manga_source(body: SwitchSourceRequest):
+ """Сменить источник у манги + перепривязать домен."""
+ db = StateDB()
+ try:
+ manga = db.get_manga(body.url)
+ if not manga:
+ raise HTTPException(status_code=404, detail="Манга не найдена")
+ if manga["status"] == "downloading" and body.url in active_tasks:
+ raise HTTPException(status_code=400, detail="Нельзя сменить источник во время загрузки")
+
+ new_source = db.get_source_by_id(body.source_id)
+ if not new_source:
+ raise HTTPException(status_code=404, detail="Источник не найден")
+
+ old_source_id = manga.get("source_id")
+ domain = extract_domain(body.url)
+
+ # Перепривязываем домен
+ if domain:
+ existing_domain = db.get_source_by_domain(domain)
+ if existing_domain and existing_domain["id"] != body.source_id:
+ db.remove_domain(existing_domain["id"], domain)
+ db.add_domain(body.source_id, domain)
+
+ # Меняем источник у манги
+ db.set_manga_source(body.url, body.source_id)
+
+ # Сбрасываем failed/partial главы → pending
+ reset_count = db.reset_failed_chapters(body.url)
+
+ await ws_manager.broadcast({
+ "type": "source_switched",
+ "url": body.url,
+ "old_source_id": old_source_id,
+ "new_source_id": body.source_id,
+ "new_source_name": new_source["display_name"],
+ "domain_rebound": bool(domain),
+ "chapters_reset": reset_count,
+ })
+ return {
+ "ok": True,
+ "source_id": body.source_id,
+ "source_name": new_source["display_name"],
+ "chapters_reset": reset_count,
+ }
+ finally:
+ db.close()
+
+
@app.get("/api/stats")
async def global_stats():
db = StateDB()
diff --git a/src/scraper.py b/src/scraper.py
index 27f9063..46aa6da 100644
--- a/src/scraper.py
+++ b/src/scraper.py
@@ -1,665 +1,19 @@
"""
-Парсер readmanga.ru: список глав и URL/байты изображений внутри главы.
+Обратно-совместимый shim: делегирует вызовы ReadmangaSource.
+Не используйте напрямую в новом коде — используйте src.sources.registry.
"""
-import asyncio
-import re
-import time
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Optional
+from .sources.base import Chapter, MangaInfo # noqa: F401 — реэкспорт для импортёров
+from .sources.readmanga import ReadmangaSource
-from loguru import logger
-from playwright.async_api import Page
-
-from .browser import BrowserManager
+_instance = ReadmangaSource()
-# ──────────────────────────────────────────────
-# Модели данных
-# ──────────────────────────────────────────────
-
-@dataclass
-class Chapter:
- title: str
- url: str
- number: float = 0.0
- volume: int = 0
+async def get_manga_info(page, url):
+ return await _instance.get_manga_info(page, url)
-@dataclass
-class MangaInfo:
- title: str
- url: str
- chapters: list[Chapter] = field(default_factory=list)
- pub_status: str = "unknown" # completed / ongoing / unknown
- title_ru: str = "" # Только русский тайтл (для папки)
- title_full: str = "" # Полный тайтл как на странице
- description: str = "" # Описание/синопсис
- genres: list[str] = field(default_factory=list) # Жанры
-
-
-# ──────────────────────────────────────────────
-# Страница манги — список глав
-# ──────────────────────────────────────────────
-
-async def get_manga_info(page: Page, url: str) -> Optional[MangaInfo]:
- """Открывает страницу манги и возвращает список всех глав."""
- logger.info("Загружаем страницу манги: {}", url)
- ok = await _navigate(page, url)
- if not ok:
- return None
-
- title_full = await page.title()
- title_full = re.sub(r"\s*[-–|].*$", "", title_full).strip()
-
- # Пробуем взять русский тайтл напрямую из DOM
- title_ru = await _extract_ru_title_from_dom(page)
- if not title_ru:
- title_ru = _parse_ru_title(title_full)
-
- logger.info("Манга: {} | ru: {}", title_full, title_ru)
-
- pub_status = await _extract_pub_status(page)
- logger.info("Статус выпуска: {}", pub_status)
-
- description = await _extract_description(page)
- genres = await _extract_genres(page)
-
- await _expand_chapters(page)
- chapters = await _extract_chapters(page)
- if not chapters:
- chapters = await _extract_chapters_alt(page)
-
- logger.info("Найдено глав: {}", len(chapters))
- return MangaInfo(
- title=title_ru or title_full,
- url=url,
- chapters=chapters,
- pub_status=pub_status,
- title_ru=title_ru,
- title_full=title_full,
- description=description,
- genres=genres,
+async def get_chapter_images_and_download(page, chapter_url, dest_dir,
+ manga_url=None, on_page=None):
+ return await _instance.get_chapter_images_and_download(
+ page, chapter_url, dest_dir, manga_url=manga_url, on_page=on_page
)
-
-
-async def _extract_ru_title_from_dom(page: Page) -> str:
- """Ищет русский тайтл в структуре страницы readmanga."""
- try:
- result = await page.evaluate("""
- () => {
- // readmanga: основной тайтл в span.name внутри .names
- const selectors = [
- '.names .name',
- 'h1.manga-title',
- 'h1 .name',
- '.name-block .name',
- ];
- for (const sel of selectors) {
- const el = document.querySelector(sel);
- if (el && el.textContent.trim()) return el.textContent.trim();
- }
- return '';
- }
- """)
- return (result or "").strip()
- except Exception:
- return ""
-
-
-def _parse_ru_title(full_title: str) -> str:
- """Извлекает русский тайтл из полной строки тайтла.
-
- Примеры:
- 'Манга Режим — АД. Хардкорный геймер ... (Hellmode)' → 'Режим — АД. Хардкорный геймер ...'
- 'Манга Магическая битва (Sorcery Fight) Гэгэ онлайн' → 'Магическая битва'
- 'Авантюрист Monster Eater Adventurer' → 'Авантюрист'
- """
- t = full_title.strip()
- # Убираем префикс "Манга "
- t = re.sub(r'^Манга\s+', '', t).strip()
- # Берём только до первой скобки (начало английского тайтла)
- t = re.split(r'\s*[\(\[]', t)[0].strip()
- # Убираем суффикс " онлайн"
- t = re.sub(r'\s+онлайн\s*$', '', t, flags=re.IGNORECASE).strip()
-
- # Обрезаем хвост из латинских слов.
- # Правило: стоп только на токене содержащем латиницу (a-zA-Z).
- # Пунктуация между кириллическими словами (—, –, ., :, !) — сохраняем.
- words = t.split()
- result = []
- for w in words:
- if re.search(r'[а-яёА-ЯЁ]', w):
- result.append(w)
- elif re.search(r'[a-zA-Z]', w):
- # Первое латинское слово после кириллических — обрезаем здесь
- if result:
- break
- else:
- # Чисто пунктуационный токен (—, –, ., :, …)
- # Добавляем только если уже есть кириллические слова (связка внутри)
- if result:
- result.append(w)
-
- # Убираем висячую пунктуацию в конце (если последнее слово — не кириллица)
- while result and not re.search(r'[а-яёА-ЯЁ]', result[-1]):
- result.pop()
-
- if result:
- t = ' '.join(result)
- return t
-
-
-async def _extract_pub_status(page: Page) -> str:
- """Извлекает статус выпуска: completed / ongoing / unknown."""
- try:
- result = await page.evaluate("""
- () => {
- // readmanga хранит статус в .elem_status .value или похожих блоках
- const statusSelectors = [
- '.elem_status .value',
- '.manga-info .status',
- '[class*="status"] .value',
- '.property .status',
- ];
- for (const sel of statusSelectors) {
- const el = document.querySelector(sel);
- if (el) {
- const t = el.textContent.toLowerCase();
- if (t.includes('завершён') || t.includes('завершен') || t.includes('complete')) return 'completed';
- if (t.includes('продолжает') || t.includes('ongoing')) return 'ongoing';
- }
- }
- // Fallback: сканируем весь текст страницы
- const bodyText = document.body ? document.body.innerText.toLowerCase() : '';
- if (bodyText.includes('выпуск завершён') || bodyText.includes('выпуск завершен')) return 'completed';
- if (bodyText.includes('продолжается')) return 'ongoing';
- return 'unknown';
- }
- """)
- return result or "unknown"
- except Exception:
- return "unknown"
-
-
-async def _extract_description(page: Page) -> str:
- """Извлекает описание/синопсис манги."""
- try:
- result = await page.evaluate("""
- () => {
- const selectors = [
- '.manga-description',
- '.elem_descr .value',
- '#tab-description .description-text',
- '.description',
- '[itemprop="description"]',
- ];
- for (const sel of selectors) {
- const el = document.querySelector(sel);
- if (el && el.textContent.trim()) return el.textContent.trim();
- }
- return '';
- }
- """)
- return (result or "").strip()[:2000] # обрезаем до 2000 символов
- except Exception:
- return ""
-
-
-async def _extract_genres(page: Page) -> list[str]:
- """Извлекает список жанров манги."""
- try:
- result = await page.evaluate("""
- () => {
- const selectors = [
- '.elem_genre .value a',
- '.genres a',
- '[itemprop="genre"]',
- '.genre-list a',
- ];
- for (const sel of selectors) {
- const els = document.querySelectorAll(sel);
- if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
- }
- return [];
- }
- """)
- return result or []
- except Exception:
- return []
-
-
-async def _navigate(page: Page, url: str, retries: int = 3,
- referer: str | None = None) -> bool:
- from urllib.parse import urlparse
- if referer is None:
- p = urlparse(url)
- referer = f"{p.scheme}://{p.netloc}/"
- for attempt in range(1, retries + 1):
- try:
- resp = await page.goto(url, wait_until="domcontentloaded",
- timeout=60_000, referer=referer)
- if resp and resp.status >= 400:
- logger.warning("Попытка {}/{}: HTTP {}", attempt, retries, resp.status)
- await asyncio.sleep(3 * attempt)
- continue
- try:
- await page.wait_for_load_state("networkidle", timeout=10_000)
- except Exception:
- pass
- return True
- except Exception as e:
- logger.warning("Попытка {}/{}: {}", attempt, retries, e)
- await asyncio.sleep(3 * attempt)
- return False
-
-
-async def _expand_chapters(page: Page):
- for sel in ["a.chapter-link.all", "button:has-text('Все главы')",
- "a:has-text('Все главы')"]:
- try:
- el = page.locator(sel).first
- if await el.is_visible(timeout=2000):
- await el.click()
- await page.wait_for_load_state("networkidle", timeout=10_000)
- return
- except Exception:
- pass
-
-
-async def _extract_chapters(page: Page) -> list[Chapter]:
- """Основной парсер: #chapters-list → tr.item-row → td[data-num] a.chapter-link"""
- rows = await page.query_selector_all("#chapters-list tr.item-row")
- chapters = []
- for row in rows:
- link = await row.query_selector("td[class*='item-title'] a")
- if not link:
- continue
- href = await link.get_attribute("href") or ""
- text = (await link.inner_text()).strip()
- if not href:
- continue
- td = await row.query_selector("td[data-num]")
- vol = int((await td.get_attribute("data-vol") or "0")) if td else 0
- num_raw = int((await td.get_attribute("data-num") or "0")) if td else 0
- number = num_raw / 10.0
- full_url = href if href.startswith("http") else _base_url(page.url) + href
- chapters.append(Chapter(title=text, url=full_url, number=number, volume=vol))
- return chapters
-
-
-async def _extract_chapters_alt(page: Page) -> list[Chapter]:
- result = await page.evaluate("""
- () => {
- const links = Array.from(document.querySelectorAll('a[href*="/vol"]'));
- return links.map(a => ({ href: a.href, text: a.textContent.trim() }))
- .filter(x => x.href && x.text);
- }
- """)
- return [Chapter(title=x["text"], url=x["href"],
- number=_parse_num(x["text"]), volume=_parse_vol(x["text"]))
- for x in result]
-
-
-def _base_url(url: str) -> str:
- m = re.match(r"(https?://[^/]+)", url)
- return m.group(1) if m else "https://readmanga.ru"
-
-
-def _parse_num(text: str) -> float:
- m = re.search(r"[\d]+(?:[.,]\d+)?", text.replace(",", "."))
- return float(m.group()) if m else 0.0
-
-
-def _parse_vol(text: str) -> int:
- m = re.search(r"Том\s+(\d+)", text, re.IGNORECASE)
- return int(m.group(1)) if m else 0
-
-
-# ──────────────────────────────────────────────
-# Страница главы — получение URL изображений
-# ──────────────────────────────────────────────
-
-async def _extract_images_from_js(page: Page) -> list[str]:
- """
- Извлекает URL из rm_h.readerInit(chapterInfo, [[base, '', path, w, h], ...]).
- Считает скобки для точного захвата массива.
- """
- try:
- result = await page.evaluate("""
- () => {
- for (const s of document.querySelectorAll('script')) {
- const text = s.textContent || '';
- const mi = text.indexOf('readerInit');
- if (mi === -1) continue;
- const ai = text.indexOf('[', mi);
- if (ai === -1) continue;
- let depth = 0, end = -1;
- for (let i = ai; i < text.length; i++) {
- if (text[i] === '[') depth++;
- else if (text[i] === ']') { depth--; if (!depth) { end = i+1; break; } }
- }
- if (end === -1) continue;
- try {
- const arr = eval(text.slice(ai, end));
- if (Array.isArray(arr) && arr.length)
- return arr.map(item => Array.isArray(item) && item.length >= 3
- ? item[0] + item[2] : null).filter(Boolean);
- } catch(e) {}
- }
- return [];
- }
- """)
- if result:
- logger.debug("JS readerInit нашёл {} изображений", len(result))
- return result or []
- except Exception as e:
- logger.debug("JS-метод не сработал: {}", e)
- return []
-
-
-async def _extract_images_from_dom(page: Page) -> list[str]:
- try:
- result = await page.evaluate("""
- () => {
- for (const sel of ['img.manga-page', '.page-image img', '#mangaReader img', 'img[data-src]']) {
- const found = Array.from(document.querySelectorAll(sel));
- if (found.length) return found.map(i => i.src || i.dataset.src).filter(Boolean);
- }
- return [];
- }
- """)
- return result or []
- except Exception:
- return []
-
-
-def _get_ext(url: str) -> str:
- m = re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, re.IGNORECASE)
- if m:
- ext = m.group(1).lower()
- return ".jpg" if ext == "jpeg" else f".{ext}"
- return ".jpg"
-
-
-# ──────────────────────────────────────────────
-# Скачивание главы
-# ──────────────────────────────────────────────
-
-async def get_chapter_images_and_download(
- page: Page,
- chapter_url: str,
- dest_dir: Path,
- manga_url: str | None = None,
- on_page: object = None,
-) -> list[Path]:
- """
- 1. Открывает страницу главы (устанавливает DDoS-Guard cookies для CDN).
- 2. Извлекает список URL из readerInit.
- 3. Перехватывает img-запросы через page.route() + route.fetch()
- (браузерный стек — правильные Sec-Fetch-* заголовки, cookies).
- 4. Пролистывает читалку клавишей ArrowRight чтобы загрузить все страницы.
- 5. Retry для страниц с timeout через JS fetch.
- """
- t_start = time.monotonic()
- ch_id = chapter_url.split("/")[-1] # короткий идентификатор для логов
- logger.info("[{}] Загружаем главу: {}", ch_id, chapter_url)
-
- from urllib.parse import urlparse
- parsed = urlparse(chapter_url)
- parts = parsed.path.strip("/").split("/")
- manga_slug = parts[0] if parts else ""
- referer = manga_url or f"{parsed.scheme}://{parsed.netloc}/{manga_slug}"
-
- load_url = chapter_url + ("?mtr=1" if "?" not in chapter_url else "&mtr=1")
- dest_dir.mkdir(parents=True, exist_ok=True)
-
- def _base(u: str) -> str:
- return u.split("?")[0]
-
- # Баннеры/рекламные изображения — игнорируем без логирования
- BANNER_RE = re.compile(r"466_p\.|570_p\.|banner|advert", re.I)
-
- def _is_manga_image(url: str) -> bool:
- base = _base(url)
- if not re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", base, re.I):
- return False
- if "resrmr." in url or "/static/" in url:
- return False
- return bool(re.search(r"one-way\.work|staticfa\.|rm\.one-way|cdnmanga|reimg", url, re.I))
-
- captured: dict[str, bytes] = {} # base_url → bytes
- route_errors: dict[str, str] = {} # base_url → текст ошибки
- route_statuses: dict[str, int] = {} # base_url → HTTP status (не 200/206)
- lock = asyncio.Lock()
-
- async def route_handler(route, request):
- url = request.url
- base = _base(url)
- if not _is_manga_image(url):
- await route.continue_()
- return
- if BANNER_RE.search(base):
- await route.continue_()
- return
- async with lock:
- already = base in captured
- if already:
- await route.continue_()
- return
- fname = base.split("/")[-1]
- try:
- response = await route.fetch()
- status = response.status
- body = await response.body()
- if body and len(body) > 500 and status in (200, 206):
- async with lock:
- if base not in captured:
- captured[base] = body
- logger.debug("[{}] ✓ {}: {} байт", ch_id, fname, len(body))
- if on_page:
- try:
- asyncio.ensure_future(on_page(0, 0))
- except Exception:
- pass
- else:
- async with lock:
- route_statuses[base] = status
- if status not in (200, 206):
- logger.warning("[{}] CDN HTTP {} для '{}' | {}",
- ch_id, status, fname, base[-70:])
- else:
- logger.warning("[{}] Слишком мал ответ ({} байт) для '{}'",
- ch_id, len(body), fname)
- await route.fulfill(response=response)
- except Exception as e:
- err = str(e)
- async with lock:
- route_errors[base] = err
- is_timeout = "timeout" in err.lower()
- level = logger.warning if is_timeout else logger.warning
- level("[{}] route.fetch {} '{}': {}",
- ch_id, "timeout" if is_timeout else "ошибка", fname, err[:150])
- try:
- await route.continue_()
- except Exception:
- pass
-
- await page.route("**/*", route_handler)
-
- # 1. Открываем главу
- ok = await _navigate(page, load_url, referer=referer)
- if not ok:
- await page.unroute("**/*", route_handler)
- logger.error("[{}] Не удалось открыть главу после всех retry: {}", ch_id, chapter_url)
- return []
-
- # 2. Ждём readerInit
- try:
- await page.wait_for_function(
- "() => Array.from(document.querySelectorAll('script'))"
- ".some(s => s.textContent.includes('readerInit'))",
- timeout=15_000,
- )
- except Exception as e:
- logger.warning("[{}] readerInit не появился за 15с ({}). "
- "Продолжаем через DOM-fallback.", ch_id, str(e)[:80])
-
- # 3. Извлекаем список URL
- image_urls = await _extract_images_from_js(page)
- if not image_urls:
- logger.debug("[{}] JS readerInit не дал URL, пробуем DOM-парсинг", ch_id)
- image_urls = await _extract_images_from_dom(page)
- if not image_urls:
- await page.unroute("**/*", route_handler)
- try:
- page_info = await page.evaluate("() => document.title + ' | ' + location.href")
- except Exception:
- page_info = "?"
- logger.error("[{}] Список изображений пуст. Текущая страница: {}", ch_id, page_info)
- return []
-
- logger.info("[{}] Найдено изображений: {}", ch_id, len(image_urls))
- url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
- filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
- total = len(image_urls)
-
- def _count_matched() -> int:
- count = 0
- for base_url in captured:
- if base_url in url_to_idx or base_url.split("/")[-1] in filename_to_idx:
- count += 1
- return count
-
- # 4. Пролистываем читалку
- await asyncio.sleep(1)
- stall_count = 0
- prev_done = -1
- for i in range(total + 20):
- done = _count_matched()
- if done >= total:
- break
- try:
- await page.keyboard.press("ArrowRight")
- await asyncio.sleep(0.5)
- except Exception as e:
- logger.warning("[{}] Ошибка листания на шаге {}: {}", ch_id, i + 1, e)
- break
- if i % 20 == 19:
- done = _count_matched()
- logger.debug("[{}] Пролистано {}, загружено: {}/{}", ch_id, i + 1, done, total)
- if done == prev_done:
- stall_count += 1
- if stall_count >= 3:
- logger.warning("[{}] Прогресс завис ({}/{}) после {} листаний — прерываем",
- ch_id, done, total, i + 1)
- break
- else:
- stall_count = 0
- prev_done = done
-
- # Финальное ожидание
- await asyncio.sleep(3)
-
- # 5. Retry для страниц с timeout через браузерный JS fetch
- async with lock:
- timeout_bases = [u for u, e in route_errors.items()
- if "timeout" in e.lower() and u not in captured]
- if timeout_bases:
- logger.info("[{}] Retry {} страниц с timeout через JS fetch...",
- ch_id, len(timeout_bases))
- for retry_base in timeout_bases:
- if retry_base in captured:
- continue
- fname = retry_base.split("/")[-1]
- try:
- data_b64 = await page.evaluate("""async (url) => {
- try {
- const r = await fetch(url, {credentials: 'include'});
- if (!r.ok) return null;
- const buf = await r.arrayBuffer();
- const bytes = new Uint8Array(buf);
- let bin = '';
- for (let b of bytes) bin += String.fromCharCode(b);
- return btoa(bin);
- } catch(e) { return null; }
- }""", retry_base)
- if data_b64:
- import base64
- body = base64.b64decode(data_b64)
- if len(body) > 500:
- async with lock:
- captured[retry_base] = body
- logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
- else:
- logger.warning("[{}] Retry вернул {} байт для '{}' — игнорируем",
- ch_id, len(body), fname)
- else:
- logger.warning("[{}] Retry вернул null для '{}' | {}",
- ch_id, fname, retry_base[-70:])
- except Exception as e2:
- logger.warning("[{}] Retry JS ошибка для '{}': {}", ch_id, fname, e2)
-
- await page.unroute("**/*", route_handler)
-
- done = _count_matched()
- elapsed = time.monotonic() - t_start
- logger.info("[{}] Перехвачено: {}/{} за {:.1f}с", ch_id, done, total, elapsed)
-
- # 6. Сохраняем в правильном порядке
- filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
-
- paths: dict[int, Path] = {}
- unmatched_other: list[str] = []
- for base_url, body in captured.items():
- idx = url_to_idx.get(base_url)
- if idx is None:
- fname = base_url.split("/")[-1]
- idx = filename_to_idx.get(fname)
- if idx is None:
- if not BANNER_RE.search(base_url):
- unmatched_other.append(base_url.split("/")[-1])
- continue
- ext = _get_ext(base_url)
- p = dest_dir / f"{idx:04d}{ext}"
- p.write_bytes(body)
- paths[idx] = p
-
- if unmatched_other:
- logger.debug("[{}] Перехвачено, но не совпало с readerInit ({}): {}",
- ch_id, len(unmatched_other), unmatched_other)
-
- # 7. Итоговый отчёт по пропущенным страницам
- missing_idxs = [i for i in range(total) if i not in paths]
- if missing_idxs:
- missing_files = [_base(image_urls[i]).split("/")[-1] for i in missing_idxs]
- missing_full = [_base(image_urls[i]) for i in missing_idxs]
-
- timeout_miss = [missing_files[j] for j, i in enumerate(missing_idxs)
- if missing_full[j] in route_errors
- and "timeout" in route_errors[missing_full[j]].lower()]
- http_miss = [f"{missing_files[j]}(HTTP {route_statuses.get(missing_full[j], '?')})"
- for j, i in enumerate(missing_idxs)
- if missing_full[j] in route_statuses]
- unrcv = [missing_files[j] for j, i in enumerate(missing_idxs)
- if missing_full[j] not in route_errors
- and missing_full[j] not in route_statuses]
-
- reasons = []
- if timeout_miss:
- reasons.append(f"timeout×{len(timeout_miss)}: {timeout_miss}")
- if http_miss:
- reasons.append(f"HTTP-err×{len(http_miss)}: {http_miss}")
- if unrcv:
- reasons.append(f"не_перехвачено×{len(unrcv)}: {unrcv}")
-
- logger.warning(
- "[{}] Пропущено {}/{} стр. | №: {} | причины: {}",
- ch_id, len(missing_idxs), total,
- [i + 1 for i in missing_idxs],
- " | ".join(reasons) if reasons else "неизвестно",
- )
- logger.debug("[{}] Полные URL пропущенных: {}", ch_id, missing_full)
-
- return [paths[i] for i in sorted(paths.keys())]
-
diff --git a/src/sources/__init__.py b/src/sources/__init__.py
new file mode 100644
index 0000000..95666f0
--- /dev/null
+++ b/src/sources/__init__.py
@@ -0,0 +1,74 @@
+"""
+Реестр источников манги.
+
+Для добавления нового источника:
+1. Создать файл src/sources/mysource.py с классом, реализующим MangaSourceProtocol
+2. Импортировать его здесь и добавить в список SOURCES
+"""
+from urllib.parse import urlparse
+from typing import Optional
+
+from .base import MangaSourceProtocol
+from .readmanga import ReadmangaSource
+
+# ── Регистрация источников ─────────────────────
+# Добавьте новые источники сюда:
+SOURCES: list = [
+ ReadmangaSource(),
+]
+
+# Быстрый поиск по slug
+_BY_SLUG: dict[str, object] = {s.slug: s for s in SOURCES}
+
+
+class SourceRegistry:
+ """Реестр источников. Источники определяются только в коде."""
+
+ def get_by_slug(self, slug: str) -> Optional[object]:
+ return _BY_SLUG.get(slug)
+
+ def get_by_db_id(self, source_id: int, db) -> Optional[object]:
+ """Резолвит адаптер через БД: source_id → slug → экземпляр."""
+ row = db.get_source_by_id(source_id)
+ if not row:
+ return None
+ return _BY_SLUG.get(row["slug"])
+
+ def all_sources(self) -> list:
+ return list(SOURCES)
+
+ def all_slugs(self) -> list[str]:
+ return [s.slug for s in SOURCES]
+
+
+registry = SourceRegistry()
+
+
+def get_source_for_url(url: str, db) -> Optional[object]:
+ """
+ Определяет источник по домену URL.
+ Ищет домен в таблице source_domains → возвращает адаптер.
+ Если домен не зарегистрирован — возвращает None.
+ """
+ try:
+ domain = urlparse(url).netloc.lower()
+ if domain.startswith("www."):
+ domain = domain[4:]
+ row = db.get_source_by_domain(domain)
+ if not row:
+ return None
+ return _BY_SLUG.get(row["slug"])
+ except Exception:
+ return None
+
+
+def extract_domain(url: str) -> str:
+ """Извлекает домен без www."""
+ try:
+ domain = urlparse(url).netloc.lower()
+ if domain.startswith("www."):
+ domain = domain[4:]
+ return domain
+ except Exception:
+ return ""
+
diff --git a/src/sources/base.py b/src/sources/base.py
new file mode 100644
index 0000000..9438340
--- /dev/null
+++ b/src/sources/base.py
@@ -0,0 +1,58 @@
+"""
+Базовые модели данных и Protocol-интерфейс для источников манги.
+"""
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional, Protocol, runtime_checkable
+
+from playwright.async_api import Page
+
+
+# ──────────────────────────────────────────────
+# Модели данных (общие для всех источников)
+# ──────────────────────────────────────────────
+
+@dataclass
+class Chapter:
+ title: str
+ url: str
+ number: float = 0.0
+ volume: int = 0
+
+
+@dataclass
+class MangaInfo:
+ title: str
+ url: str
+ chapters: list[Chapter] = field(default_factory=list)
+ pub_status: str = "unknown" # completed / ongoing / unknown
+ title_ru: str = ""
+ title_full: str = ""
+ description: str = ""
+ genres: list[str] = field(default_factory=list)
+
+
+# ──────────────────────────────────────────────
+# Интерфейс источника
+# ──────────────────────────────────────────────
+
+@runtime_checkable
+class MangaSourceProtocol(Protocol):
+ slug: str # уникальный код источника в коде ("readmanga")
+ display_name: str # название для UI ("ReadManga")
+
+ async def get_manga_info(self, page: Page, url: str) -> Optional[MangaInfo]:
+ """Возвращает информацию о манге и список глав."""
+ ...
+
+ async def get_chapter_images_and_download(
+ self,
+ page: Page,
+ chapter_url: str,
+ dest_dir: Path,
+ manga_url: Optional[str] = None,
+ on_page: object = None,
+ ) -> list[Path]:
+ """Скачивает страницы главы в dest_dir и возвращает список путей."""
+ ...
+
diff --git a/src/sources/readmanga.py b/src/sources/readmanga.py
new file mode 100644
index 0000000..71241c7
--- /dev/null
+++ b/src/sources/readmanga.py
@@ -0,0 +1,589 @@
+"""
+Адаптер ReadManga: поддерживает readmanga.ru и все его клоны.
+"""
+import asyncio
+import base64
+import re
+import time
+from pathlib import Path
+from typing import Optional
+
+from loguru import logger
+from playwright.async_api import Page
+
+from .base import Chapter, MangaInfo
+
+
+class ReadmangaSource:
+ slug = "readmanga"
+ display_name = "ReadManga"
+
+ # CDN-домены из которых принимаем картинки глав
+ cdn_patterns = ["one-way.work", "staticfa.", "rm.one-way", "cdnmanga", "reimg"]
+
+ # ──────────────────────────────────────────────
+ # Страница манги — список глав
+ # ──────────────────────────────────────────────
+
+ async def get_manga_info(self, page: Page, url: str) -> Optional[MangaInfo]:
+ """Открывает страницу манги и возвращает список всех глав."""
+ logger.info("Загружаем страницу манги: {}", url)
+ ok = await _navigate(page, url)
+ if not ok:
+ return None
+
+ title_full = await page.title()
+ title_full = re.sub(r"\s*[-–|].*$", "", title_full).strip()
+
+ title_ru = await _extract_ru_title_from_dom(page)
+ if not title_ru:
+ title_ru = _parse_ru_title(title_full)
+
+ logger.info("Манга: {} | ru: {}", title_full, title_ru)
+
+ pub_status = await _extract_pub_status(page)
+ logger.info("Статус выпуска: {}", pub_status)
+
+ description = await _extract_description(page)
+ genres = await _extract_genres(page)
+
+ await _expand_chapters(page)
+ chapters = await _extract_chapters(page)
+ if not chapters:
+ chapters = await _extract_chapters_alt(page)
+
+ logger.info("Найдено глав: {}", len(chapters))
+ return MangaInfo(
+ title=title_ru or title_full,
+ url=url,
+ chapters=chapters,
+ pub_status=pub_status,
+ title_ru=title_ru,
+ title_full=title_full,
+ description=description,
+ genres=genres,
+ )
+
+ # ──────────────────────────────────────────────
+ # Скачивание главы
+ # ──────────────────────────────────────────────
+
+ async def get_chapter_images_and_download(
+ self,
+ page: Page,
+ chapter_url: str,
+ dest_dir: Path,
+ manga_url: Optional[str] = None,
+ on_page: object = None,
+ ) -> list[Path]:
+ """
+ 1. Открывает страницу главы.
+ 2. Извлекает список URL из readerInit.
+ 3. Перехватывает img-запросы через page.route().
+ 4. Пролистывает читалку клавишей ArrowRight.
+ 5. Retry для страниц с timeout через JS fetch.
+ """
+ cdn_patterns = self.cdn_patterns
+ t_start = time.monotonic()
+ ch_id = chapter_url.split("/")[-1]
+ logger.info("[{}] Загружаем главу: {}", ch_id, chapter_url)
+
+ from urllib.parse import urlparse
+ parsed = urlparse(chapter_url)
+ parts = parsed.path.strip("/").split("/")
+ manga_slug = parts[0] if parts else ""
+ referer = manga_url or f"{parsed.scheme}://{parsed.netloc}/{manga_slug}"
+
+ load_url = chapter_url + ("?mtr=1" if "?" not in chapter_url else "&mtr=1")
+ dest_dir.mkdir(parents=True, exist_ok=True)
+
+ def _base(u: str) -> str:
+ return u.split("?")[0]
+
+ BANNER_RE = re.compile(r"466_p\.|570_p\.|banner|advert", re.I)
+
+ def _is_manga_image(url: str) -> bool:
+ base = _base(url)
+ if not re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", base, re.I):
+ return False
+ if "resrmr." in url or "/static/" in url:
+ return False
+ pattern = "|".join(re.escape(p) for p in cdn_patterns)
+ return bool(re.search(pattern, url, re.I))
+
+ captured: dict[str, bytes] = {}
+ route_errors: dict[str, str] = {}
+ route_statuses: dict[str, int] = {}
+ lock = asyncio.Lock()
+
+ async def route_handler(route, request):
+ url = request.url
+ base = _base(url)
+ if not _is_manga_image(url):
+ await route.continue_()
+ return
+ if BANNER_RE.search(base):
+ await route.continue_()
+ return
+ async with lock:
+ already = base in captured
+ if already:
+ await route.continue_()
+ return
+ fname = base.split("/")[-1]
+ try:
+ response = await route.fetch()
+ status = response.status
+ body = await response.body()
+ if body and len(body) > 500 and status in (200, 206):
+ async with lock:
+ if base not in captured:
+ captured[base] = body
+ logger.debug("[{}] ✓ {}: {} байт", ch_id, fname, len(body))
+ if on_page:
+ try:
+ asyncio.ensure_future(on_page(0, 0))
+ except Exception:
+ pass
+ else:
+ async with lock:
+ route_statuses[base] = status
+ if status not in (200, 206):
+ logger.warning("[{}] CDN HTTP {} для '{}' | {}",
+ ch_id, status, fname, base[-70:])
+ else:
+ logger.warning("[{}] Слишком мал ответ ({} байт) для '{}'",
+ ch_id, len(body), fname)
+ await route.fulfill(response=response)
+ except Exception as e:
+ err = str(e)
+ async with lock:
+ route_errors[base] = err
+ is_timeout = "timeout" in err.lower()
+ logger.warning("[{}] route.fetch {} '{}': {}",
+ ch_id, "timeout" if is_timeout else "ошибка", fname, err[:150])
+ try:
+ await route.continue_()
+ except Exception:
+ pass
+
+ await page.route("**/*", route_handler)
+
+ ok = await _navigate(page, load_url, referer=referer)
+ if not ok:
+ await page.unroute("**/*", route_handler)
+ logger.error("[{}] Не удалось открыть главу: {}", ch_id, chapter_url)
+ return []
+
+ try:
+ await page.wait_for_function(
+ "() => Array.from(document.querySelectorAll('script'))"
+ ".some(s => s.textContent.includes('readerInit'))",
+ timeout=15_000,
+ )
+ except Exception as e:
+ logger.warning("[{}] readerInit не появился за 15с ({}). DOM-fallback.", ch_id, str(e)[:80])
+
+ image_urls = await _extract_images_from_js(page)
+ if not image_urls:
+ logger.debug("[{}] JS readerInit не дал URL, пробуем DOM-парсинг", ch_id)
+ image_urls = await _extract_images_from_dom(page)
+ if not image_urls:
+ await page.unroute("**/*", route_handler)
+ try:
+ page_info = await page.evaluate("() => document.title + ' | ' + location.href")
+ except Exception:
+ page_info = "?"
+ logger.error("[{}] Список изображений пуст. Страница: {}", ch_id, page_info)
+ return []
+
+ logger.info("[{}] Найдено изображений: {}", ch_id, len(image_urls))
+ url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
+ filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
+ total = len(image_urls)
+
+ def _count_matched() -> int:
+ count = 0
+ for base_url in captured:
+ if base_url in url_to_idx or base_url.split("/")[-1] in filename_to_idx:
+ count += 1
+ return count
+
+ await asyncio.sleep(1)
+ stall_count = 0
+ prev_done = -1
+ for i in range(total + 20):
+ done = _count_matched()
+ if done >= total:
+ break
+ try:
+ await page.keyboard.press("ArrowRight")
+ await asyncio.sleep(0.5)
+ except Exception as e:
+ logger.warning("[{}] Ошибка листания на шаге {}: {}", ch_id, i + 1, e)
+ break
+ if i % 20 == 19:
+ done = _count_matched()
+ logger.debug("[{}] Пролистано {}, загружено: {}/{}", ch_id, i + 1, done, total)
+ if done == prev_done:
+ stall_count += 1
+ if stall_count >= 3:
+ logger.warning("[{}] Прогресс завис ({}/{}) — прерываем", ch_id, done, total)
+ break
+ else:
+ stall_count = 0
+ prev_done = done
+
+ await asyncio.sleep(3)
+
+ # Retry timeout через JS fetch
+ async with lock:
+ timeout_bases = [u for u, e in route_errors.items()
+ if "timeout" in e.lower() and u not in captured]
+ if timeout_bases:
+ logger.info("[{}] Retry {} страниц с timeout...", ch_id, len(timeout_bases))
+ for retry_base in timeout_bases:
+ if retry_base in captured:
+ continue
+ fname = retry_base.split("/")[-1]
+ try:
+ data_b64 = await page.evaluate("""async (url) => {
+ try {
+ const r = await fetch(url, {credentials: 'include'});
+ if (!r.ok) return null;
+ const buf = await r.arrayBuffer();
+ const bytes = new Uint8Array(buf);
+ let bin = '';
+ for (let b of bytes) bin += String.fromCharCode(b);
+ return btoa(bin);
+ } catch(e) { return null; }
+ }""", retry_base)
+ if data_b64:
+ body = base64.b64decode(data_b64)
+ if len(body) > 500:
+ async with lock:
+ captured[retry_base] = body
+ logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
+ else:
+ logger.warning("[{}] Retry вернул {} байт — игнорируем", ch_id, len(body))
+ else:
+ logger.warning("[{}] Retry null для '{}'", ch_id, fname)
+ except Exception as e2:
+ logger.warning("[{}] Retry JS ошибка '{}': {}", ch_id, fname, e2)
+
+ await page.unroute("**/*", route_handler)
+
+ done = _count_matched()
+ elapsed = time.monotonic() - t_start
+ logger.info("[{}] Перехвачено: {}/{} за {:.1f}с", ch_id, done, total, elapsed)
+
+ filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
+
+ paths: dict[int, Path] = {}
+ unmatched_other: list[str] = []
+ for base_url, body in captured.items():
+ idx = url_to_idx.get(base_url)
+ if idx is None:
+ fname = base_url.split("/")[-1]
+ idx = filename_to_idx.get(fname)
+ if idx is None:
+ if not BANNER_RE.search(base_url):
+ unmatched_other.append(base_url.split("/")[-1])
+ continue
+ ext = _get_ext(base_url)
+ p = dest_dir / f"{idx:04d}{ext}"
+ p.write_bytes(body)
+ paths[idx] = p
+
+ if unmatched_other:
+ logger.debug("[{}] Не совпало с readerInit ({}): {}", ch_id, len(unmatched_other), unmatched_other)
+
+ missing_idxs = [i for i in range(total) if i not in paths]
+ if missing_idxs:
+ missing_files = [_base(image_urls[i]).split("/")[-1] for i in missing_idxs]
+ missing_full = [_base(image_urls[i]) for i in missing_idxs]
+
+ timeout_miss = [missing_files[j] for j, i in enumerate(missing_idxs)
+ if missing_full[j] in route_errors
+ and "timeout" in route_errors[missing_full[j]].lower()]
+ http_miss = [f"{missing_files[j]}(HTTP {route_statuses.get(missing_full[j], '?')})"
+ for j, i in enumerate(missing_idxs)
+ if missing_full[j] in route_statuses]
+ unrcv = [missing_files[j] for j, i in enumerate(missing_idxs)
+ if missing_full[j] not in route_errors
+ and missing_full[j] not in route_statuses]
+
+ reasons = []
+ if timeout_miss:
+ reasons.append(f"timeout×{len(timeout_miss)}: {timeout_miss}")
+ if http_miss:
+ reasons.append(f"HTTP-err×{len(http_miss)}: {http_miss}")
+ if unrcv:
+ reasons.append(f"не_перехвачено×{len(unrcv)}: {unrcv}")
+
+ logger.warning(
+ "[{}] Пропущено {}/{} стр. | №: {} | причины: {}",
+ ch_id, len(missing_idxs), total,
+ [i + 1 for i in missing_idxs],
+ " | ".join(reasons) if reasons else "неизвестно",
+ )
+
+ return [paths[i] for i in sorted(paths.keys())]
+
+
+# ──────────────────────────────────────────────
+# Вспомогательные функции (приватные)
+# ──────────────────────────────────────────────
+
+async def _navigate(page: Page, url: str, retries: int = 3,
+ referer: str | None = None) -> bool:
+ from urllib.parse import urlparse
+ if referer is None:
+ p = urlparse(url)
+ referer = f"{p.scheme}://{p.netloc}/"
+ for attempt in range(1, retries + 1):
+ try:
+ resp = await page.goto(url, wait_until="domcontentloaded",
+ timeout=60_000, referer=referer)
+ if resp and resp.status >= 400:
+ logger.warning("Попытка {}/{}: HTTP {}", attempt, retries, resp.status)
+ await asyncio.sleep(3 * attempt)
+ continue
+ try:
+ await page.wait_for_load_state("networkidle", timeout=10_000)
+ except Exception:
+ pass
+ return True
+ except Exception as e:
+ logger.warning("Попытка {}/{}: {}", attempt, retries, e)
+ await asyncio.sleep(3 * attempt)
+ return False
+
+
+async def _extract_ru_title_from_dom(page: Page) -> str:
+ try:
+ result = await page.evaluate("""
+ () => {
+ const selectors = [
+ '.names .name', 'h1.manga-title', 'h1 .name', '.name-block .name',
+ ];
+ for (const sel of selectors) {
+ const el = document.querySelector(sel);
+ if (el && el.textContent.trim()) return el.textContent.trim();
+ }
+ return '';
+ }
+ """)
+ return (result or "").strip()
+ except Exception:
+ return ""
+
+
+def _parse_ru_title(full_title: str) -> str:
+ t = full_title.strip()
+ t = re.sub(r'^Манга\s+', '', t).strip()
+ t = re.split(r'\s*[\(\[]', t)[0].strip()
+ t = re.sub(r'\s+онлайн\s*$', '', t, flags=re.IGNORECASE).strip()
+ words = t.split()
+ result = []
+ for w in words:
+ if re.search(r'[а-яёА-ЯЁ]', w):
+ result.append(w)
+ elif re.search(r'[a-zA-Z]', w):
+ if result:
+ break
+ else:
+ if result:
+ result.append(w)
+ while result and not re.search(r'[а-яёА-ЯЁ]', result[-1]):
+ result.pop()
+ if result:
+ t = ' '.join(result)
+ return t
+
+
+async def _extract_pub_status(page: Page) -> str:
+ try:
+ result = await page.evaluate("""
+ () => {
+ const statusSelectors = [
+ '.elem_status .value', '.manga-info .status',
+ '[class*="status"] .value', '.property .status',
+ ];
+ for (const sel of statusSelectors) {
+ const el = document.querySelector(sel);
+ if (el) {
+ const t = el.textContent.toLowerCase();
+ if (t.includes('завершён') || t.includes('завершен') || t.includes('complete')) return 'completed';
+ if (t.includes('продолжает') || t.includes('ongoing')) return 'ongoing';
+ }
+ }
+ const bodyText = document.body ? document.body.innerText.toLowerCase() : '';
+ if (bodyText.includes('выпуск завершён') || bodyText.includes('выпуск завершен')) return 'completed';
+ if (bodyText.includes('продолжается')) return 'ongoing';
+ return 'unknown';
+ }
+ """)
+ return result or "unknown"
+ except Exception:
+ return "unknown"
+
+
+async def _extract_description(page: Page) -> str:
+ try:
+ result = await page.evaluate("""
+ () => {
+ const selectors = [
+ '.manga-description', '.elem_descr .value',
+ '#tab-description .description-text', '.description',
+ '[itemprop="description"]',
+ ];
+ for (const sel of selectors) {
+ const el = document.querySelector(sel);
+ if (el && el.textContent.trim()) return el.textContent.trim();
+ }
+ return '';
+ }
+ """)
+ return (result or "").strip()[:2000]
+ except Exception:
+ return ""
+
+
+async def _extract_genres(page: Page) -> list[str]:
+ try:
+ result = await page.evaluate("""
+ () => {
+ const selectors = [
+ '.elem_genre .value a', '.genres a',
+ '[itemprop="genre"]', '.genre-list a',
+ ];
+ for (const sel of selectors) {
+ const els = document.querySelectorAll(sel);
+ if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
+ }
+ return [];
+ }
+ """)
+ return result or []
+ except Exception:
+ return []
+
+
+async def _expand_chapters(page: Page):
+ for sel in ["a.chapter-link.all", "button:has-text('Все главы')", "a:has-text('Все главы')"]:
+ try:
+ el = page.locator(sel).first
+ if await el.is_visible(timeout=2000):
+ await el.click()
+ await page.wait_for_load_state("networkidle", timeout=10_000)
+ return
+ except Exception:
+ pass
+
+
+async def _extract_chapters(page: Page) -> list[Chapter]:
+ rows = await page.query_selector_all("#chapters-list tr.item-row")
+ chapters = []
+ for row in rows:
+ link = await row.query_selector("td[class*='item-title'] a")
+ if not link:
+ continue
+ href = await link.get_attribute("href") or ""
+ text = (await link.inner_text()).strip()
+ if not href:
+ continue
+ td = await row.query_selector("td[data-num]")
+ vol = int((await td.get_attribute("data-vol") or "0")) if td else 0
+ num_raw = int((await td.get_attribute("data-num") or "0")) if td else 0
+ number = num_raw / 10.0
+ full_url = href if href.startswith("http") else _base_url(page.url) + href
+ chapters.append(Chapter(title=text, url=full_url, number=number, volume=vol))
+ return chapters
+
+
+async def _extract_chapters_alt(page: Page) -> list[Chapter]:
+ result = await page.evaluate("""
+ () => {
+ const links = Array.from(document.querySelectorAll('a[href*="/vol"]'));
+ return links.map(a => ({ href: a.href, text: a.textContent.trim() }))
+ .filter(x => x.href && x.text);
+ }
+ """)
+ return [Chapter(title=x["text"], url=x["href"],
+ number=_parse_num(x["text"]), volume=_parse_vol(x["text"]))
+ for x in result]
+
+
+async def _extract_images_from_js(page: Page) -> list[str]:
+ try:
+ result = await page.evaluate("""
+ () => {
+ for (const s of document.querySelectorAll('script')) {
+ const text = s.textContent || '';
+ const mi = text.indexOf('readerInit');
+ if (mi === -1) continue;
+ const ai = text.indexOf('[', mi);
+ if (ai === -1) continue;
+ let depth = 0, end = -1;
+ for (let i = ai; i < text.length; i++) {
+ if (text[i] === '[') depth++;
+ else if (text[i] === ']') { depth--; if (!depth) { end = i+1; break; } }
+ }
+ if (end === -1) continue;
+ try {
+ const arr = eval(text.slice(ai, end));
+ if (Array.isArray(arr) && arr.length)
+ return arr.map(item => Array.isArray(item) && item.length >= 3
+ ? item[0] + item[2] : null).filter(Boolean);
+ } catch(e) {}
+ }
+ return [];
+ }
+ """)
+ if result:
+ logger.debug("JS readerInit нашёл {} изображений", len(result))
+ return result or []
+ except Exception as e:
+ logger.debug("JS-метод не сработал: {}", e)
+ return []
+
+
+async def _extract_images_from_dom(page: Page) -> list[str]:
+ try:
+ result = await page.evaluate("""
+ () => {
+ for (const sel of ['img.manga-page', '.page-image img', '#mangaReader img', 'img[data-src]']) {
+ const found = Array.from(document.querySelectorAll(sel));
+ if (found.length) return found.map(i => i.src || i.dataset.src).filter(Boolean);
+ }
+ return [];
+ }
+ """)
+ return result or []
+ except Exception:
+ return []
+
+
+def _get_ext(url: str) -> str:
+ m = re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, re.IGNORECASE)
+ if m:
+ ext = m.group(1).lower()
+ return ".jpg" if ext == "jpeg" else f".{ext}"
+ return ".jpg"
+
+
+def _base_url(url: str) -> str:
+ m = re.match(r"(https?://[^/]+)", url)
+ return m.group(1) if m else "https://readmanga.ru"
+
+
+def _parse_num(text: str) -> float:
+ m = re.search(r"[\d]+(?:[.,]\d+)?", text.replace(",", "."))
+ return float(m.group()) if m else 0.0
+
+
+def _parse_vol(text: str) -> int:
+ m = re.search(r"Том\s+(\d+)", text, re.IGNORECASE)
+ return int(m.group(1)) if m else 0
+
diff --git a/src/state.py b/src/state.py
index 773100b..4a2bc68 100644
--- a/src/state.py
+++ b/src/state.py
@@ -1,14 +1,25 @@
"""
Хранение состояния скачивания в SQLite.
"""
+import json
import sqlite3
from datetime import datetime
from pathlib import Path
from typing import Optional
+from urllib.parse import urlparse
DB_PATH = Path("/app/state/progress.db")
+# Домены ReadManga по умолчанию (сидинг при первом запуске)
+_DEFAULT_READMANGA_DOMAINS = [
+ "readmanga.ru",
+ "readmanga.live",
+ "readmanga.me",
+ "readmanga.io",
+ "3.readmanga.ru",
+]
+
class StateDB:
def __init__(self, db_path: Path = DB_PATH):
@@ -68,18 +79,35 @@ class StateDB:
created_at TEXT
)
""")
+ self.conn.execute("""
+ CREATE TABLE IF NOT EXISTS sources (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ slug TEXT UNIQUE NOT NULL,
+ display_name TEXT NOT NULL,
+ settings TEXT DEFAULT '{}',
+ created_at TEXT
+ )
+ """)
+ self.conn.execute("""
+ CREATE TABLE IF NOT EXISTS source_domains (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ source_id INTEGER NOT NULL REFERENCES sources(id),
+ domain TEXT UNIQUE NOT NULL
+ )
+ """)
# Migrate old DB: add missing columns
migrations = [
- ("chapters", "pages_total", "INTEGER DEFAULT 0"),
- ("chapters", "pages_done", "INTEGER DEFAULT 0"),
- ("mangas", "title_ru", "TEXT"),
- ("mangas", "title_full", "TEXT"),
- ("mangas", "pub_status", "TEXT DEFAULT 'unknown'"),
- ("mangas", "auto_update", "INTEGER DEFAULT 0"),
- ("mangas", "last_checked_at", "TEXT"),
- ("mangas", "started_at", "TEXT"),
- ("mangas", "finished_at", "TEXT"),
- ("mangas", "folder_name", "TEXT"),
+ ("chapters", "pages_total", "INTEGER DEFAULT 0"),
+ ("chapters", "pages_done", "INTEGER DEFAULT 0"),
+ ("mangas", "title_ru", "TEXT"),
+ ("mangas", "title_full", "TEXT"),
+ ("mangas", "pub_status", "TEXT DEFAULT 'unknown'"),
+ ("mangas", "auto_update", "INTEGER DEFAULT 0"),
+ ("mangas", "last_checked_at","TEXT"),
+ ("mangas", "started_at", "TEXT"),
+ ("mangas", "finished_at", "TEXT"),
+ ("mangas", "folder_name", "TEXT"),
+ ("mangas", "source_id", "INTEGER REFERENCES sources(id)"),
]
for table, col, typedef in migrations:
try:
@@ -88,17 +116,184 @@ class StateDB:
pass
self.conn.commit()
+ def sync_sources(self, registry) -> None:
+ """
+ Синхронизирует таблицу sources с реестром из кода.
+ Вызывается при старте приложения.
+ При первом запуске создаёт записи и засеивает домены ReadManga.
+ """
+ from loguru import logger
+ for source in registry.all_sources():
+ existing = self.conn.execute(
+ "SELECT id, display_name FROM sources WHERE slug=?", (source.slug,)
+ ).fetchone()
+ if not existing:
+ self.conn.execute(
+ "INSERT INTO sources (slug, display_name, settings, created_at) VALUES (?,?,?,?)",
+ (source.slug, source.display_name, "{}", _now())
+ )
+ logger.info("Источник добавлен в БД: {} ({})", source.display_name, source.slug)
+ else:
+ if existing["display_name"] != source.display_name:
+ self.conn.execute(
+ "UPDATE sources SET display_name=? WHERE slug=?",
+ (source.display_name, source.slug)
+ )
+ self.conn.commit()
+
+ # Сидинг доменов ReadManga при первом запуске
+ rm = self.conn.execute("SELECT id FROM sources WHERE slug='readmanga'").fetchone()
+ if rm:
+ count = self.conn.execute(
+ "SELECT COUNT(*) FROM source_domains WHERE source_id=?", (rm["id"],)
+ ).fetchone()[0]
+ if count == 0:
+ for domain in _DEFAULT_READMANGA_DOMAINS:
+ try:
+ self.conn.execute(
+ "INSERT INTO source_domains (source_id, domain) VALUES (?,?)",
+ (rm["id"], domain)
+ )
+ except Exception:
+ pass
+ self.conn.commit()
+ logger.info("Сидинг доменов ReadManga: {} доменов", len(_DEFAULT_READMANGA_DOMAINS))
+
+ # Логируем источники в БД без кода (не в реестре)
+ known_slugs = set(registry.all_slugs())
+ db_slugs = [r["slug"] for r in self.conn.execute("SELECT slug FROM sources").fetchall()]
+ for slug in db_slugs:
+ if slug not in known_slugs:
+ logger.warning("Источник '{}' есть в БД, но отсутствует в реестре — манги недоступны", slug)
+
+ def migrate_manga_sources(self) -> int:
+ """
+ Авто-миграция: проставляет source_id для манг с source_id IS NULL.
+ Определяет источник по домену URL через source_domains.
+ Возвращает количество обновлённых манг.
+ """
+ nulls = self.conn.execute(
+ "SELECT url FROM mangas WHERE source_id IS NULL"
+ ).fetchall()
+ updated = 0
+ for row in nulls:
+ url = row["url"]
+ domain = _extract_domain(url)
+ source_row = self.get_source_by_domain(domain)
+ if source_row:
+ self.conn.execute(
+ "UPDATE mangas SET source_id=? WHERE url=?",
+ (source_row["id"], url)
+ )
+ updated += 1
+ if updated:
+ self.conn.commit()
+ return updated
+
+ # ── Sources ───────────────────────────────────
+
+ def get_source_by_id(self, source_id: int) -> Optional[dict]:
+ row = self.conn.execute("SELECT * FROM sources WHERE id=?", (source_id,)).fetchone()
+ return dict(row) if row else None
+
+ def get_source_by_slug(self, slug: str) -> Optional[dict]:
+ row = self.conn.execute("SELECT * FROM sources WHERE slug=?", (slug,)).fetchone()
+ return dict(row) if row else None
+
+ def get_source_by_domain(self, domain: str) -> Optional[dict]:
+ """Возвращает запись source по домену (через source_domains JOIN)."""
+ row = self.conn.execute("""
+ SELECT s.* FROM sources s
+ JOIN source_domains sd ON sd.source_id = s.id
+ WHERE sd.domain=?
+ """, (domain.lower(),)).fetchone()
+ return dict(row) if row else None
+
+ def get_all_sources(self) -> list[dict]:
+ """Возвращает все источники с вложенным списком доменов."""
+ sources = self.conn.execute("SELECT * FROM sources ORDER BY id").fetchall()
+ result = []
+ for s in sources:
+ s_dict = dict(s)
+ domains = self.conn.execute(
+ "SELECT domain FROM source_domains WHERE source_id=? ORDER BY domain",
+ (s["id"],)
+ ).fetchall()
+ s_dict["domains"] = [d["domain"] for d in domains]
+ try:
+ s_dict["settings"] = json.loads(s_dict.get("settings") or "{}")
+ except Exception:
+ s_dict["settings"] = {}
+ result.append(s_dict)
+ return result
+
+ def add_domain(self, source_id: int, domain: str) -> bool:
+ """Добавляет домен к источнику. Возвращает False если уже существует."""
+ domain = domain.lower().strip()
+ try:
+ self.conn.execute(
+ "INSERT INTO source_domains (source_id, domain) VALUES (?,?)",
+ (source_id, domain)
+ )
+ self.conn.commit()
+ return True
+ except Exception:
+ return False
+
+ def remove_domain(self, source_id: int, domain: str) -> bool:
+ """Удаляет домен у источника. Возвращает True если удалён."""
+ cur = self.conn.execute(
+ "DELETE FROM source_domains WHERE source_id=? AND domain=?",
+ (source_id, domain.lower())
+ )
+ self.conn.commit()
+ return cur.rowcount > 0
+
+ def set_manga_source(self, manga_url: str, source_id: int) -> None:
+ """Меняет источник у манги."""
+ self.conn.execute(
+ "UPDATE mangas SET source_id=?, updated_at=? WHERE url=?",
+ (source_id, _now(), manga_url)
+ )
+ self.conn.commit()
+
+ def reset_failed_chapters(self, manga_url: str) -> int:
+ """Сбрасывает failed и partial главы в pending. Возвращает количество."""
+ now = _now()
+ c1 = self.conn.execute(
+ "UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=? "
+ "WHERE manga_url=? AND status='failed'",
+ (now, manga_url)
+ ).rowcount
+ c2 = self.conn.execute(
+ """UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=?
+ WHERE manga_url=? AND status='done'
+ AND pages_total > 0 AND pages_done < pages_total""",
+ (now, manga_url)
+ ).rowcount
+ self.conn.commit()
+ return c1 + c2
+
+ def count_mangas_by_source_domain(self, domain: str) -> int:
+ """Считает манги с указанным доменом (для предупреждений в UI)."""
+ source = self.get_source_by_domain(domain)
+ if not source:
+ return 0
+ return self.conn.execute(
+ "SELECT COUNT(*) FROM mangas WHERE source_id=?", (source["id"],)
+ ).fetchone()[0]
+
# ── Mangas ────────────────────────────────────
- def add_manga(self, url: str, fmt: str = "cbz") -> bool:
+ def add_manga(self, url: str, fmt: str = "cbz", source_id: Optional[int] = None) -> bool:
"""Добавляет мангу в очередь. Возвращает True если новая."""
cur = self.conn.execute("SELECT id FROM mangas WHERE url=?", (url,))
if cur.fetchone():
return False
self.conn.execute("""
- INSERT INTO mangas (url, format, status, added_at, updated_at)
- VALUES (?, ?, 'queued', ?, ?)
- """, (url, fmt, _now(), _now()))
+ INSERT INTO mangas (url, format, status, source_id, added_at, updated_at)
+ VALUES (?, ?, 'queued', ?, ?, ?)
+ """, (url, fmt, source_id, _now(), _now()))
self.conn.commit()
return True
@@ -318,3 +513,15 @@ class StateDB:
def _now() -> str:
return datetime.utcnow().isoformat()
+
+def _extract_domain(url: str) -> str:
+ """Извлекает домен без www."""
+ try:
+ domain = urlparse(url).netloc.lower()
+ if domain.startswith("www."):
+ domain = domain[4:]
+ return domain
+ except Exception:
+ return ""
+
+
diff --git a/src/worker.py b/src/worker.py
index 202781d..03daf7a 100644
--- a/src/worker.py
+++ b/src/worker.py
@@ -11,7 +11,9 @@ from typing import Callable, Optional
from loguru import logger
from .browser import BrowserManager
-from .scraper import get_manga_info, get_chapter_images_and_download, Chapter
+from .sources import registry, get_source_for_url, extract_domain
+from .sources.base import Chapter, MangaInfo
+from .scraper import get_manga_info, get_chapter_images_and_download # shim для обратной совместимости
from .exporter import export, MangaMeta
from .state import StateDB
@@ -61,10 +63,23 @@ async def download_manga(
started_ts = await db_call(db.mark_started, url)
await emit({"type": "manga_start", "url": url, "started_at": started_ts})
+ # Резолвим источник
+ source = get_source_for_url(url, db)
+ if source is None:
+ # Последний шанс: по source_id в БД
+ manga_row = await db_call(db.get_manga, url)
+ if manga_row and manga_row.get("source_id"):
+ source = registry.get_by_db_id(manga_row["source_id"], db)
+ if source is None:
+ await db_call(db.update_manga_status, url, "failed")
+ await emit({"type": "source_unknown", "url": url,
+ "error": "Источник не определён. Выберите источник в настройках манги."})
+ return
+
async with BrowserManager(headless=True) as bm:
ctx, info_page = await bm.new_page()
- manga = await get_manga_info(info_page, url)
+ manga = await source.get_manga_info(info_page, url)
await info_page.close()
if not manga:
@@ -193,7 +208,7 @@ async def download_manga(
"pages_total": pages_total,
})
- image_paths = await get_chapter_images_and_download(
+ image_paths = await source.get_chapter_images_and_download(
ch_page, ch.url,
dest_dir=tmp_path,
manga_url=url,
@@ -329,9 +344,19 @@ async def check_for_updates(
db.add_history(manga_url=url, event_type="check_started")
await emit({"type": "check_started", "url": url})
+ # Резолвим источник
+ source = get_source_for_url(url, db)
+ if source is None:
+ manga_row = db.get_manga(url)
+ if manga_row and manga_row.get("source_id"):
+ source = registry.get_by_db_id(manga_row["source_id"], db)
+ if source is None:
+ await emit({"type": "source_unknown", "url": url})
+ return []
+
async with BrowserManager(headless=True) as bm:
_, page = await bm.new_page()
- manga = await get_manga_info(page, url)
+ manga = await source.get_manga_info(page, url)
await page.close()
if not manga:
return []