This commit is contained in:
2026-04-30 18:54:24 +03:00
parent 88bf301b60
commit 87b692ba49
8 changed files with 1545 additions and 691 deletions

View File

@@ -108,7 +108,7 @@
<div class="card rounded-xl p-5 mb-6"> <div class="card rounded-xl p-5 mb-6">
<h2 class="text-sm font-semibold text-gray-400 uppercase tracking-wider mb-3">Добавить мангу</h2> <h2 class="text-sm font-semibold text-gray-400 uppercase tracking-wider mb-3">Добавить мангу</h2>
<div class="flex flex-col md:flex-row gap-3"> <div class="flex flex-col md:flex-row gap-3">
<textarea id="url-input" rows="2" placeholder="Один или несколько URL (каждый с новой строки)&#10;https://3.readmanga.ru/manga_slug" class="flex-1 px-3 py-2 text-sm resize-none"></textarea> <textarea id="url-input" rows="2" placeholder="Один или несколько URL (каждый с новой строки)&#10;https://3.readmanga.ru/manga_slug" class="flex-1 px-3 py-2 text-sm resize-none" oninput="onUrlInputChange()"></textarea>
<div class="flex flex-col gap-2"> <div class="flex flex-col gap-2">
<select id="fmt-select" class="px-3 py-2 text-sm"> <select id="fmt-select" class="px-3 py-2 text-sm">
<option value="cbz">CBZ</option> <option value="cbz">CBZ</option>
@@ -116,7 +116,20 @@
<option value="epub">EPUB</option> <option value="epub">EPUB</option>
<option value="all">Все форматы</option> <option value="all">Все форматы</option>
</select> </select>
<button onclick="addToQueue()" class="btn-primary text-sm"> В очередь</button> <button onclick="addToQueue()" id="add-btn" class="btn-primary text-sm"> В очередь</button>
</div>
</div>
<!-- Source detection hint -->
<div id="source-hint" class="mt-2 hidden">
<div id="source-hint-found" class="hidden text-xs text-green-400 flex items-center gap-2">
<span>🔗 Источник:</span>
<span id="source-hint-name" class="font-semibold"></span>
</div>
<div id="source-hint-unknown" class="hidden flex flex-col gap-2">
<div class="text-xs text-yellow-400">⚠ Домен не распознан. Выберите источник вручную:</div>
<select id="source-manual-select" class="px-3 py-2 text-sm w-full md:w-72">
<option value="">— выберите источник —</option>
</select>
</div> </div>
</div> </div>
<div id="add-msg" class="mt-2 text-sm text-green-400 hidden"></div> <div id="add-msg" class="mt-2 text-sm text-green-400 hidden"></div>
@@ -132,6 +145,8 @@
class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">🔔 Новости</button> class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">🔔 Новости</button>
<button onclick="switchTab('history')" id="tab-history" <button onclick="switchTab('history')" id="tab-history"
class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">🕒 История</button> class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">🕒 История</button>
<button onclick="switchTab('settings')" id="tab-settings"
class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">⚙️ Настройки</button>
</div> </div>
<div id="manga-filters" class="flex gap-2 py-2"> <div id="manga-filters" class="flex gap-2 py-2">
<button onclick="filterMangas('all')" id="filter-all" class="text-xs px-3 py-1 rounded-full bg-indigo-600 text-white">Все</button> <button onclick="filterMangas('all')" id="filter-all" class="text-xs px-3 py-1 rounded-full bg-indigo-600 text-white">Все</button>
@@ -177,6 +192,36 @@
<div class="px-5 py-8 text-center text-gray-500 text-sm">Загрузка...</div> <div class="px-5 py-8 text-center text-gray-500 text-sm">Загрузка...</div>
</div> </div>
</div> </div>
<!-- Settings -->
<div id="tab-content-settings" class="hidden">
<div class="px-5 py-4">
<h3 class="text-sm font-semibold text-gray-300 uppercase tracking-wider mb-1">Источники</h3>
<p class="text-xs text-gray-500 mb-4">Источники определяются в коде приложения. Здесь можно управлять доменами для каждого источника.</p>
<div id="sources-list" class="flex flex-col gap-3"></div>
</div>
</div>
</div>
</div>
<!-- Switch Source Modal -->
<div id="switch-source-modal" class="fixed inset-0 z-[60] hidden items-center justify-center" style="background:rgba(0,0,0,0.75)">
<div class="card rounded-2xl w-full max-w-md mx-4 p-6 flex flex-col gap-4">
<h3 class="font-semibold text-white text-base">↔ Сменить источник</h3>
<div class="text-sm text-gray-400" id="switch-source-current"></div>
<div class="flex flex-col gap-2">
<label class="text-xs text-gray-400">Новый источник</label>
<select id="switch-source-select" class="px-3 py-2 text-sm w-full"></select>
<div id="switch-source-warning" class="text-xs text-yellow-400 hidden"></div>
</div>
<div class="flex gap-3 justify-end mt-2">
<button onclick="closeSwitchSourceModal()"
class="px-4 py-2 rounded-lg text-sm text-gray-400 hover:text-white"
style="background:#1e293b">Отмена</button>
<button onclick="confirmSwitchSource()"
class="px-4 py-2 rounded-lg text-sm font-semibold text-white"
style="background:#312e81">Применить</button>
</div>
</div> </div>
</div> </div>
@@ -281,6 +326,7 @@ const state = {
mangas: {}, // url → manga object mangas: {}, // url → manga object
chapters: {}, // manga_url → [chapter, ...] chapters: {}, // manga_url → [chapter, ...]
filter: 'all', filter: 'all',
sources: [], // [{id, slug, display_name, domains}]
}; };
// ── Auth ───────────────────────────────────── // ── Auth ─────────────────────────────────────
@@ -407,8 +453,12 @@ function handleEvent(msg) {
case 'manga_queued': case 'manga_queued':
if(!state.mangas[msg.url]) { if(!state.mangas[msg.url]) {
const srcInfo = msg.source_id ? (state.sources.find(s => s.id === msg.source_id) || null) : null;
state.mangas[msg.url] = { url: msg.url, title: msg.url, status: 'queued', format: msg.format, state.mangas[msg.url] = { url: msg.url, title: msg.url, status: 'queued', format: msg.format,
chapters_total: 0, chapters_done: 0, size_human: '—' }; chapters_total: 0, chapters_done: 0, size_human: '—',
source: srcInfo ? {id: srcInfo.id, slug: srcInfo.slug, display_name: srcInfo.display_name} : null };
} else {
state.mangas[msg.url].status = 'queued';
} }
renderList(); renderList();
loadStats(); loadStats();
@@ -597,6 +647,25 @@ function handleEvent(msg) {
renderList(); renderList();
loadStats(); loadStats();
break; break;
case 'source_unknown':
_showNotification('⚠ Источник не определён для ' + (state.mangas[msg.url]?.title || msg.url) + '. Выберите источник.', 'warn');
if(state.mangas[msg.url]) { state.mangas[msg.url].status = 'failed'; renderList(); }
break;
case 'source_domain_added':
case 'source_domain_removed':
loadSources();
break;
case 'source_switched':
if(state.mangas[msg.url]) {
// Обновляем source у манги из актуального списка источников
const newSrc = state.sources.find(s => s.id === msg.new_source_id);
if(newSrc) state.mangas[msg.url].source = {id: newSrc.id, slug: newSrc.slug, display_name: newSrc.display_name};
updateMangaRow(msg.url);
}
break;
} }
} }
@@ -604,7 +673,7 @@ function handleEvent(msg) {
let newsUnreadCount = 0; let newsUnreadCount = 0;
function switchTab(tab) { function switchTab(tab) {
['mangas', 'news', 'history'].forEach(t => { ['mangas', 'news', 'history', 'settings'].forEach(t => {
document.getElementById('tab-content-'+t).classList.toggle('hidden', t !== tab); document.getElementById('tab-content-'+t).classList.toggle('hidden', t !== tab);
const btn = document.getElementById('tab-'+t); const btn = document.getElementById('tab-'+t);
btn.className = t === tab btn.className = t === tab
@@ -614,6 +683,8 @@ function switchTab(tab) {
document.getElementById('manga-filters').classList.toggle('hidden', tab !== 'mangas'); document.getElementById('manga-filters').classList.toggle('hidden', tab !== 'mangas');
if(tab === 'history') loadHistory(); if(tab === 'history') loadHistory();
if(tab === 'news') { newsUnreadCount = 0; updateNewsBadge(); loadNews(); } if(tab === 'news') { newsUnreadCount = 0; updateNewsBadge(); loadNews(); }
if(tab === 'settings') loadSources();
}
} }
function updateNewsBadge() { function updateNewsBadge() {
@@ -773,6 +844,66 @@ async function checkNowBtn(btn, url) {
} }
} }
// ── Source detection ─────────────────────────
let _resolveTimer = null;
let _resolvedSourceId = null; // null = found via domain, undefined = unknown
async function onUrlInputChange() {
clearTimeout(_resolveTimer);
_resolveTimer = setTimeout(_resolveSource, 400);
}
async function _resolveSource() {
const raw = document.getElementById('url-input').value.trim();
const hint = document.getElementById('source-hint');
const hintFound = document.getElementById('source-hint-found');
const hintUnknown = document.getElementById('source-hint-unknown');
// Берём первый непустой URL
const url = raw.split('\n').map(u=>u.trim()).filter(Boolean)[0];
if(!url) {
hint.classList.add('hidden');
_resolvedSourceId = null;
document.getElementById('add-btn').disabled = false;
return;
}
try {
const r = await fetch('/api/resolve-source?url=' + encodeURIComponent(url));
const data = await r.json();
hint.classList.remove('hidden');
if(data.source) {
hintFound.classList.remove('hidden');
hintUnknown.classList.add('hidden');
document.getElementById('source-hint-name').textContent = data.source.display_name;
_resolvedSourceId = data.source.id;
document.getElementById('add-btn').disabled = false;
} else {
hintFound.classList.add('hidden');
hintUnknown.classList.remove('hidden');
_resolvedSourceId = undefined; // неизвестен — нужен ручной выбор
document.getElementById('add-btn').disabled = true;
// Заполняем список источников
const sel = document.getElementById('source-manual-select');
sel.innerHTML = '<option value="">— выберите источник —</option>';
(state.sources || []).forEach(s => {
const opt = document.createElement('option');
opt.value = s.id;
opt.textContent = s.display_name;
sel.appendChild(opt);
});
sel.onchange = () => {
document.getElementById('add-btn').disabled = !sel.value;
};
}
} catch(e) {
hint.classList.add('hidden');
_resolvedSourceId = null;
document.getElementById('add-btn').disabled = false;
}
}
// ── API ────────────────────────────────────── // ── API ──────────────────────────────────────
async function loadStats() { async function loadStats() {
try { try {
@@ -788,17 +919,35 @@ async function addToQueue() {
const urls = raw.split('\n').map(u=>u.trim()).filter(Boolean); const urls = raw.split('\n').map(u=>u.trim()).filter(Boolean);
if(!urls.length) return; if(!urls.length) return;
// Определяем source_id
let sourceId = null;
if(_resolvedSourceId === undefined) {
// Неизвестный домен — нужен ручной выбор
const manualVal = document.getElementById('source-manual-select').value;
if(!manualVal) { alert('Выберите источник для добавления манги'); return; }
sourceId = parseInt(manualVal);
} else if(_resolvedSourceId !== null) {
sourceId = _resolvedSourceId;
}
try { try {
const body = {urls, format: fmt};
if(sourceId !== null) body.source_id = sourceId;
const r = await fetch('/api/queue', { const r = await fetch('/api/queue', {
method:'POST', method:'POST',
headers:{'Content-Type':'application/json'}, headers:{'Content-Type':'application/json'},
body: JSON.stringify({urls, format: fmt}) body: JSON.stringify(body)
}); });
const data = await r.json(); const data = await r.json();
const msg = document.getElementById('add-msg'); const msg = document.getElementById('add-msg');
msg.textContent = `✓ Добавлено: ${data.added.length}, уже есть: ${data.skipped.length}`; msg.textContent = `✓ Добавлено: ${data.added.length}, уже есть: ${data.skipped.length}`;
msg.classList.remove('hidden'); msg.classList.remove('hidden');
if(data.added.length) document.getElementById('url-input').value = ''; if(data.added.length) {
document.getElementById('url-input').value = '';
document.getElementById('source-hint').classList.add('hidden');
_resolvedSourceId = null;
document.getElementById('add-btn').disabled = false;
}
setTimeout(()=>msg.classList.add('hidden'), 4000); setTimeout(()=>msg.classList.add('hidden'), 4000);
} catch(e) { } catch(e) {
alert('Ошибка: ' + e.message); alert('Ошибка: ' + e.message);
@@ -825,6 +974,193 @@ async function resumeManga(url) {
} }
} }
// ── Sources ───────────────────────────────────
async function loadSources() {
try {
const r = await fetch('/api/sources');
if(r.ok) {
state.sources = await r.json();
if(!document.getElementById('tab-content-settings').classList.contains('hidden')) {
renderSources();
}
}
} catch(e) {}
}
function renderSources() {
const container = document.getElementById('sources-list');
if(!container) return;
if(!state.sources.length) {
container.innerHTML = '<div class="text-sm text-gray-500">Нет доступных источников</div>';
return;
}
container.innerHTML = state.sources.map(s => `
<div class="rounded-lg p-4" style="background:#0f172a;border:1px solid #1e293b">
<div class="flex items-center justify-between mb-3">
<div>
<span class="text-sm font-semibold text-white">${escHtml(s.display_name)}</span>
<span class="ml-2 text-xs text-gray-500">slug: ${escHtml(s.slug)}</span>
</div>
</div>
<div class="flex flex-wrap gap-2 items-center">
${s.domains.map(d => `
<span class="flex items-center gap-1 text-xs px-2 py-1 rounded" style="background:#1e293b;color:#94a3b8">
${escHtml(d)}
<button onclick="removeDomain(${s.id}, '${escHtml(d)}')"
title="Удалить домен"
style="color:#ef4444;background:none;border:none;cursor:pointer;padding:0 2px;font-size:0.8rem;line-height:1">✕</button>
</span>
`).join('')}
<span id="add-domain-area-${s.id}">
<button onclick="showAddDomain(${s.id})"
style="font-size:0.7rem;padding:3px 8px;border-radius:4px;background:#1e293b;color:#6ee7b7;border:1px dashed #334155;cursor:pointer">
+ домен
</button>
</span>
</div>
</div>
`).join('');
}
function showAddDomain(sourceId) {
const area = document.getElementById('add-domain-area-' + sourceId);
if(!area) return;
area.innerHTML = `
<span class="flex items-center gap-1">
<input id="new-domain-input-${sourceId}" type="text" placeholder="example.com"
class="text-xs px-2 py-1 rounded" style="background:#1e293b;color:#e2e8f0;border:1px solid #334155;width:140px"
onkeydown="if(event.key==='Enter') addDomain(${sourceId}); if(event.key==='Escape') renderSources();">
<button onclick="addDomain(${sourceId})"
style="font-size:0.75rem;padding:3px 8px;border-radius:4px;background:#166534;color:#86efac;cursor:pointer">✓</button>
<button onclick="renderSources()"
style="font-size:0.75rem;padding:3px 8px;border-radius:4px;background:#1e293b;color:#94a3b8;cursor:pointer">✕</button>
</span>
`;
setTimeout(() => document.getElementById('new-domain-input-' + sourceId)?.focus(), 50);
}
async function addDomain(sourceId) {
const input = document.getElementById('new-domain-input-' + sourceId);
if(!input) return;
const domain = input.value.trim().toLowerCase();
if(!domain) return;
try {
const r = await fetch(`/api/sources/${sourceId}/domains`, {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({domain}),
});
if(!r.ok) {
const err = await r.json();
_showNotification('Ошибка: ' + (err.detail || 'неизвестная ошибка'), 'error');
return;
}
await loadSources();
} catch(e) {
_showNotification('Ошибка: ' + e.message, 'error');
}
}
async function removeDomain(sourceId, domain) {
if(!confirm(`Удалить домен «${domain}»?`)) return;
try {
const r = await fetch(`/api/sources/${sourceId}/domains/${encodeURIComponent(domain)}`, {method: 'DELETE'});
if(!r.ok) {
const err = await r.json();
_showNotification('Ошибка: ' + (err.detail || 'неизвестная ошибка'), 'error');
return;
}
await loadSources();
} catch(e) {
_showNotification('Ошибка: ' + e.message, 'error');
}
}
// ── Switch Source Modal ───────────────────────
let _switchSourceUrl = null;
function openSwitchSourceModal(url) {
_switchSourceUrl = url;
const manga = state.mangas[url];
const modal = document.getElementById('switch-source-modal');
const sel = document.getElementById('switch-source-select');
const warning = document.getElementById('switch-source-warning');
document.getElementById('switch-source-current').textContent =
'Текущий источник: ' + (manga?.source?.display_name || 'не определён');
sel.innerHTML = '<option value="">— выберите источник —</option>';
state.sources.forEach(s => {
const opt = document.createElement('option');
opt.value = s.id;
opt.textContent = s.display_name;
if(manga?.source?.id === s.id) opt.selected = true;
sel.appendChild(opt);
});
try {
const domain = new URL(url).hostname.replace(/^www\./, '');
warning.textContent = `⚠ Домен «${domain}» будет перепривязан к выбранному источнику. Это затронет все манги с этого домена.`;
warning.classList.remove('hidden');
} catch(e) { warning.classList.add('hidden'); }
modal.classList.remove('hidden');
modal.classList.add('flex');
}
function closeSwitchSourceModal() {
_switchSourceUrl = null;
const modal = document.getElementById('switch-source-modal');
modal.classList.add('hidden');
modal.classList.remove('flex');
}
async function confirmSwitchSource() {
const url = _switchSourceUrl;
const sourceId = parseInt(document.getElementById('switch-source-select').value);
if(!url || !sourceId) return;
try {
const r = await fetch('/api/mangas/switch-source', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({url, source_id: sourceId}),
});
if(!r.ok) {
const err = await r.json();
_showNotification('Ошибка: ' + (err.detail || 'неизвестная ошибка'), 'error');
return;
}
const data = await r.json();
closeSwitchSourceModal();
_showNotification(
`✓ Источник изменён на «${data.source_name}»` +
(data.chapters_reset ? `. Сброшено глав: ${data.chapters_reset}` : ''), 'ok'
);
if(state.mangas[url]) {
const src = state.sources.find(s => s.id === sourceId);
if(src) state.mangas[url].source = {id: src.id, slug: src.slug, display_name: src.display_name};
updateMangaRow(url);
}
} catch(e) {
_showNotification('Ошибка: ' + e.message, 'error');
}
}
document.addEventListener('click', function(e) {
const modal = document.getElementById('switch-source-modal');
if(modal && !modal.classList.contains('hidden') && e.target === modal) closeSwitchSourceModal();
});
// ── Notification helper ───────────────────────
function _showNotification(text, type='ok') {
const el = document.getElementById('add-msg');
if(!el) return;
el.textContent = text;
el.style.color = type === 'error' ? '#f87171' : type === 'warn' ? '#fbbf24' : '#4ade80';
el.classList.remove('hidden');
setTimeout(() => el.classList.add('hidden'), 5000);
}
// ── Delete modal ───────────────────────────── // ── Delete modal ─────────────────────────────
let _deleteUrl = null; let _deleteUrl = null;
let _deleteFilesChecked = false; let _deleteFilesChecked = false;
@@ -1011,6 +1347,12 @@ function pubStatusPill(s) {
return `<span class="pill pill-pub-${s}">${map[s]}</span>`; return `<span class="pill pill-pub-${s}">${map[s]}</span>`;
} }
function _sourceBadge(source) {
if(!source) return '<span style="font-size:0.65rem;padding:2px 6px;border-radius:4px;background:#1e293b;color:#64748b">Источник неизвестен</span>';
if(source.slug === 'unknown') return '<span style="font-size:0.65rem;padding:2px 6px;border-radius:4px;background:#450a0a;color:#fca5a5">' + escHtml(source.display_name) + '</span>';
return '<span style="font-size:0.65rem;padding:2px 6px;border-radius:4px;background:#0f2a1e;color:#6ee7b7">' + escHtml(source.display_name) + '</span>';
}
// ── Время загрузки ──────────────────────────── // ── Время загрузки ────────────────────────────
// Храним интервал живого таймера: url → intervalId // Храним интервал живого таймера: url → intervalId
const _timerIntervals = {}; const _timerIntervals = {};
@@ -1126,6 +1468,7 @@ function renderMangaRow(m) {
<div class="flex items-center gap-2 flex-wrap"> <div class="flex items-center gap-2 flex-wrap">
<span data-r="status">${statusPill(m.status)}</span> <span data-r="status">${statusPill(m.status)}</span>
<span data-r="pubstatus">${pubStatusPill(m.pub_status || 'unknown')}</span> <span data-r="pubstatus">${pubStatusPill(m.pub_status || 'unknown')}</span>
<span data-r="source">${_sourceBadge(m.source)}</span>
<span class="text-sm font-medium text-white truncate" data-r="title">${escHtml(m.title || m.url)}</span> <span class="text-sm font-medium text-white truncate" data-r="title">${escHtml(m.title || m.url)}</span>
</div> </div>
<div class="text-xs text-gray-500 mt-0.5 flex gap-3 flex-wrap"> <div class="text-xs text-gray-500 mt-0.5 flex gap-3 flex-wrap">
@@ -1170,6 +1513,11 @@ function _rowButtons(m) {
title="${m.errors_count} проблем при загрузке" title="${m.errors_count} проблем при загрузке"
style="background:#450a0a;color:#fca5a5;padding:4px 8px;border-radius:6px;font-size:0.75rem;cursor:pointer">⚠️ ${m.errors_count}</button>` style="background:#450a0a;color:#fca5a5;padding:4px 8px;border-radius:6px;font-size:0.75rem;cursor:pointer">⚠️ ${m.errors_count}</button>`
: ''} : ''}
${!isActive
? `<button onclick="event.stopPropagation(); openSwitchSourceModal('${u}')"
title="Сменить источник"
style="background:#1e3a2e;color:#6ee7b7;padding:4px 8px;border-radius:6px;font-size:0.75rem;cursor:pointer">↔ Источник</button>`
: ''}
${isActive ${isActive
? `<button onclick="stopManga('${u}')" class="btn-danger" title="Остановить" style="background:#7c2d12;color:#fdba74">⏸</button>` ? `<button onclick="stopManga('${u}')" class="btn-danger" title="Остановить" style="background:#7c2d12;color:#fdba74">⏸</button>`
: ''} : ''}
@@ -1282,6 +1630,7 @@ function _patchRow(el, m) {
set('status', statusPill(m.status)); set('status', statusPill(m.status));
set('pubstatus', pubStatusPill(m.pub_status || 'unknown')); set('pubstatus', pubStatusPill(m.pub_status || 'unknown'));
set('source', _sourceBadge(m.source));
set('title', escHtml(m.title || m.url)); set('title', escHtml(m.title || m.url));
set('chcount', `📖 ${chDone}/${chTotal} глав`); set('chcount', `📖 ${chDone}/${chTotal} глав`);
set('size', `💾 ${m.size_human || '—'}`); set('size', `💾 ${m.size_human || '—'}`);
@@ -1661,6 +2010,7 @@ async function saveRenameFolder() {
async function initApp() { async function initApp() {
_initDeleteModal(); _initDeleteModal();
await loadStats(); await loadStats();
await loadSources();
connectWS(); connectWS();
// Загружаем список манги // Загружаем список манги
try { try {

View File

@@ -20,6 +20,7 @@ from loguru import logger
from .state import StateDB from .state import StateDB
from .worker import download_manga, check_for_updates from .worker import download_manga, check_for_updates
from .exporter import patch_meta, MangaMeta from .exporter import patch_meta, MangaMeta
from .sources import registry, get_source_for_url, extract_domain
OUTPUT_DIR = Path("/app/output") OUTPUT_DIR = Path("/app/output")
FRONTEND_DIR = Path("/app/frontend") FRONTEND_DIR = Path("/app/frontend")
@@ -172,6 +173,16 @@ async def _queue_worker_loop():
@app.on_event("startup") @app.on_event("startup")
async def startup_event(): async def startup_event():
# Синхронизируем источники с кодом и мигрируем существующие манги
_db = StateDB()
try:
_db.sync_sources(registry)
migrated = _db.migrate_manga_sources()
if migrated:
logger.info("Авто-миграция: проставлен source_id для {} манг", migrated)
finally:
_db.close()
asyncio.create_task(queue_worker()) asyncio.create_task(queue_worker())
asyncio.create_task(update_scheduler()) asyncio.create_task(update_scheduler())
# Восстанавливаем очередь из БД (незавершённые задачи) # Восстанавливаем очередь из БД (незавершённые задачи)
@@ -365,6 +376,16 @@ def _enrich_manga(m: dict, db: StateDB) -> dict:
AND pages_total > 0 AND pages_done < pages_total""", AND pages_total > 0 AND pages_done < pages_total""",
(m["url"],) (m["url"],)
).fetchone()[0] ).fetchone()[0]
# Источник
source_info = None
if m.get("source_id"):
src = db.get_source_by_id(m["source_id"])
if src:
source_info = {"id": src["id"], "slug": src["slug"], "display_name": src["display_name"]}
else:
source_info = {"id": m["source_id"], "slug": "unknown", "display_name": "Источник недоступен"}
return { return {
**m, **m,
"chapters_done": ch_done_count, "chapters_done": ch_done_count,
@@ -375,6 +396,7 @@ def _enrich_manga(m: dict, db: StateDB) -> dict:
"errors_count": ch_failed + ch_partial, "errors_count": ch_failed + ch_partial,
"started_at": m.get("started_at"), "started_at": m.get("started_at"),
"finished_at": m.get("finished_at"), "finished_at": m.get("finished_at"),
"source": source_info,
} }
@@ -454,6 +476,7 @@ def _manga_detail(manga: dict, db: StateDB) -> dict:
class AddMangaRequest(BaseModel): class AddMangaRequest(BaseModel):
urls: List[str] urls: List[str]
format: str = "cbz" format: str = "cbz"
source_id: Optional[int] = None # явный выбор источника (для неизвестных доменов)
# ── Auth API ───────────────────────────────── # ── Auth API ─────────────────────────────────
@@ -536,7 +559,24 @@ async def add_to_queue(body: AddMangaRequest):
url = url.strip() url = url.strip()
if not url: if not url:
continue continue
is_new = db.add_manga(url, body.format)
# Определяем source_id: явный из запроса или авто по домену
source_id = body.source_id
if source_id is None:
domain = extract_domain(url)
source_row = db.get_source_by_domain(domain)
if source_row:
source_id = source_row["id"]
# Если источник указан явно — привязываем домен к нему
if body.source_id is not None:
domain = extract_domain(url)
existing = db.get_source_by_domain(domain)
if existing and existing["id"] != body.source_id:
db.remove_domain(existing["id"], domain)
db.add_domain(body.source_id, domain)
is_new = db.add_manga(url, body.format, source_id=source_id)
if is_new: if is_new:
await download_queue.put({"url": url, "fmt": body.format}) await download_queue.put({"url": url, "fmt": body.format})
added.append(url) added.append(url)
@@ -544,9 +584,9 @@ async def add_to_queue(body: AddMangaRequest):
"type": "manga_queued", "type": "manga_queued",
"url": url, "url": url,
"format": body.format, "format": body.format,
"source_id": source_id,
}) })
await _broadcast_queue_positions() await _broadcast_queue_positions()
# Запускаем фоновую задачу предпросмотра (без Chromium — быстро)
asyncio.create_task(_fetch_preview(url)) asyncio.create_task(_fetch_preview(url))
else: else:
skipped.append(url) skipped.append(url)
@@ -559,15 +599,27 @@ async def _fetch_preview(url: str):
"""Быстро получает название и количество глав сразу после добавления.""" """Быстро получает название и количество глав сразу после добавления."""
try: try:
from .browser import BrowserManager from .browser import BrowserManager
from .scraper import get_manga_info
async with BrowserManager(headless=True) as bm:
_, page = await bm.new_page()
manga = await get_manga_info(page, url)
if not manga:
return
db = StateDB() db = StateDB()
try: try:
db.update_manga_info( source = get_source_for_url(url, db)
if source is None:
manga_row = db.get_manga(url)
if manga_row and manga_row.get("source_id"):
source = registry.get_by_db_id(manga_row["source_id"], db)
finally:
db.close()
if source is None:
return
async with BrowserManager(headless=True) as bm:
_, page = await bm.new_page()
manga = await source.get_manga_info(page, url)
if not manga:
return
db2 = StateDB()
try:
db2.update_manga_info(
url, url,
title=manga.title_ru or manga.title, title=manga.title_ru or manga.title,
chapters_total=len(manga.chapters), chapters_total=len(manga.chapters),
@@ -576,7 +628,7 @@ async def _fetch_preview(url: str):
pub_status=manga.pub_status, pub_status=manga.pub_status,
) )
finally: finally:
db.close() db2.close()
await ws_manager.broadcast({ await ws_manager.broadcast({
"type": "manga_preview", "type": "manga_preview",
"url": url, "url": url,
@@ -996,6 +1048,151 @@ async def delete_manga(url: str, delete_files: bool = False):
db.close() db.close()
# ── Sources API ───────────────────────────────
class DomainAdd(BaseModel):
domain: str
class SwitchSourceRequest(BaseModel):
url: str
source_id: int
@app.get("/api/sources")
async def list_sources():
"""Список всех источников с доменами."""
db = StateDB()
try:
return db.get_all_sources()
finally:
db.close()
@app.get("/api/resolve-source")
async def resolve_source(url: str):
"""Определить источник по URL. Возвращает {id, slug, display_name} или null."""
db = StateDB()
try:
domain = extract_domain(url)
row = db.get_source_by_domain(domain)
if not row:
return {"source": None, "domain": domain}
return {
"source": {
"id": row["id"],
"slug": row["slug"],
"display_name": row["display_name"],
},
"domain": domain,
}
finally:
db.close()
@app.post("/api/sources/{source_id}/domains")
async def add_domain(source_id: int, body: DomainAdd):
"""Добавить домен к источнику."""
db = StateDB()
try:
source = db.get_source_by_id(source_id)
if not source:
raise HTTPException(status_code=404, detail="Источник не найден")
domain = body.domain.lower().strip()
if not domain:
raise HTTPException(status_code=400, detail="Домен не может быть пустым")
# Проверяем не занят ли домен другим источником
existing = db.get_source_by_domain(domain)
if existing and existing["id"] != source_id:
raise HTTPException(
status_code=409,
detail=f"Домен уже привязан к источнику «{existing['display_name']}»"
)
ok = db.add_domain(source_id, domain)
if not ok:
raise HTTPException(status_code=409, detail="Домен уже существует")
await ws_manager.broadcast({
"type": "source_domain_added",
"source_id": source_id,
"domain": domain,
})
return {"ok": True, "domain": domain}
finally:
db.close()
@app.delete("/api/sources/{source_id}/domains/{domain:path}")
async def remove_domain(source_id: int, domain: str):
"""Удалить домен у источника."""
db = StateDB()
try:
source = db.get_source_by_id(source_id)
if not source:
raise HTTPException(status_code=404, detail="Источник не найден")
ok = db.remove_domain(source_id, domain)
if not ok:
raise HTTPException(status_code=404, detail="Домен не найден")
await ws_manager.broadcast({
"type": "source_domain_removed",
"source_id": source_id,
"domain": domain,
})
return {"ok": True}
finally:
db.close()
@app.post("/api/mangas/switch-source")
async def switch_manga_source(body: SwitchSourceRequest):
"""Сменить источник у манги + перепривязать домен."""
db = StateDB()
try:
manga = db.get_manga(body.url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
if manga["status"] == "downloading" and body.url in active_tasks:
raise HTTPException(status_code=400, detail="Нельзя сменить источник во время загрузки")
new_source = db.get_source_by_id(body.source_id)
if not new_source:
raise HTTPException(status_code=404, detail="Источник не найден")
old_source_id = manga.get("source_id")
domain = extract_domain(body.url)
# Перепривязываем домен
if domain:
existing_domain = db.get_source_by_domain(domain)
if existing_domain and existing_domain["id"] != body.source_id:
db.remove_domain(existing_domain["id"], domain)
db.add_domain(body.source_id, domain)
# Меняем источник у манги
db.set_manga_source(body.url, body.source_id)
# Сбрасываем failed/partial главы → pending
reset_count = db.reset_failed_chapters(body.url)
await ws_manager.broadcast({
"type": "source_switched",
"url": body.url,
"old_source_id": old_source_id,
"new_source_id": body.source_id,
"new_source_name": new_source["display_name"],
"domain_rebound": bool(domain),
"chapters_reset": reset_count,
})
return {
"ok": True,
"source_id": body.source_id,
"source_name": new_source["display_name"],
"chapters_reset": reset_count,
}
finally:
db.close()
@app.get("/api/stats") @app.get("/api/stats")
async def global_stats(): async def global_stats():
db = StateDB() db = StateDB()

View File

@@ -1,665 +1,19 @@
""" """
Парсер readmanga.ru: список глав и URL/байты изображений внутри главы. Обратно-совместимый shim: делегирует вызовы ReadmangaSource.
Не используйте напрямую в новом коде — используйте src.sources.registry.
""" """
import asyncio from .sources.base import Chapter, MangaInfo # noqa: F401 — реэкспорт для импортёров
import re from .sources.readmanga import ReadmangaSource
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
from loguru import logger _instance = ReadmangaSource()
from playwright.async_api import Page
from .browser import BrowserManager
# ────────────────────────────────────────────── async def get_manga_info(page, url):
# Модели данных return await _instance.get_manga_info(page, url)
# ──────────────────────────────────────────────
@dataclass
class Chapter:
title: str
url: str
number: float = 0.0
volume: int = 0
@dataclass async def get_chapter_images_and_download(page, chapter_url, dest_dir,
class MangaInfo: manga_url=None, on_page=None):
title: str return await _instance.get_chapter_images_and_download(
url: str page, chapter_url, dest_dir, manga_url=manga_url, on_page=on_page
chapters: list[Chapter] = field(default_factory=list)
pub_status: str = "unknown" # completed / ongoing / unknown
title_ru: str = "" # Только русский тайтл (для папки)
title_full: str = "" # Полный тайтл как на странице
description: str = "" # Описание/синопсис
genres: list[str] = field(default_factory=list) # Жанры
# ──────────────────────────────────────────────
# Страница манги — список глав
# ──────────────────────────────────────────────
async def get_manga_info(page: Page, url: str) -> Optional[MangaInfo]:
"""Открывает страницу манги и возвращает список всех глав."""
logger.info("Загружаем страницу манги: {}", url)
ok = await _navigate(page, url)
if not ok:
return None
title_full = await page.title()
title_full = re.sub(r"\s*[-|].*$", "", title_full).strip()
# Пробуем взять русский тайтл напрямую из DOM
title_ru = await _extract_ru_title_from_dom(page)
if not title_ru:
title_ru = _parse_ru_title(title_full)
logger.info("Манга: {} | ru: {}", title_full, title_ru)
pub_status = await _extract_pub_status(page)
logger.info("Статус выпуска: {}", pub_status)
description = await _extract_description(page)
genres = await _extract_genres(page)
await _expand_chapters(page)
chapters = await _extract_chapters(page)
if not chapters:
chapters = await _extract_chapters_alt(page)
logger.info("Найдено глав: {}", len(chapters))
return MangaInfo(
title=title_ru or title_full,
url=url,
chapters=chapters,
pub_status=pub_status,
title_ru=title_ru,
title_full=title_full,
description=description,
genres=genres,
) )
async def _extract_ru_title_from_dom(page: Page) -> str:
"""Ищет русский тайтл в структуре страницы readmanga."""
try:
result = await page.evaluate("""
() => {
// readmanga: основной тайтл в span.name внутри .names
const selectors = [
'.names .name',
'h1.manga-title',
'h1 .name',
'.name-block .name',
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el && el.textContent.trim()) return el.textContent.trim();
}
return '';
}
""")
return (result or "").strip()
except Exception:
return ""
def _parse_ru_title(full_title: str) -> str:
"""Извлекает русский тайтл из полной строки тайтла.
Примеры:
'Манга Режим — АД. Хардкорный геймер ... (Hellmode)''Режим — АД. Хардкорный геймер ...'
'Манга Магическая битва (Sorcery Fight) Гэгэ онлайн''Магическая битва'
'Авантюрист Monster Eater Adventurer''Авантюрист'
"""
t = full_title.strip()
# Убираем префикс "Манга "
t = re.sub(r'^Манга\s+', '', t).strip()
# Берём только до первой скобки (начало английского тайтла)
t = re.split(r'\s*[\(\[]', t)[0].strip()
# Убираем суффикс " онлайн"
t = re.sub(r'\s+онлайн\s*$', '', t, flags=re.IGNORECASE).strip()
# Обрезаем хвост из латинских слов.
# Правило: стоп только на токене содержащем латиницу (a-zA-Z).
# Пунктуация между кириллическими словами (—, , ., :, !) — сохраняем.
words = t.split()
result = []
for w in words:
if re.search(r'[а-яёА-ЯЁ]', w):
result.append(w)
elif re.search(r'[a-zA-Z]', w):
# Первое латинское слово после кириллических — обрезаем здесь
if result:
break
else:
# Чисто пунктуационный токен (—, , ., :, …)
# Добавляем только если уже есть кириллические слова (связка внутри)
if result:
result.append(w)
# Убираем висячую пунктуацию в конце (если последнее слово — не кириллица)
while result and not re.search(r'[а-яёА-ЯЁ]', result[-1]):
result.pop()
if result:
t = ' '.join(result)
return t
async def _extract_pub_status(page: Page) -> str:
"""Извлекает статус выпуска: completed / ongoing / unknown."""
try:
result = await page.evaluate("""
() => {
// readmanga хранит статус в .elem_status .value или похожих блоках
const statusSelectors = [
'.elem_status .value',
'.manga-info .status',
'[class*="status"] .value',
'.property .status',
];
for (const sel of statusSelectors) {
const el = document.querySelector(sel);
if (el) {
const t = el.textContent.toLowerCase();
if (t.includes('завершён') || t.includes('завершен') || t.includes('complete')) return 'completed';
if (t.includes('продолжает') || t.includes('ongoing')) return 'ongoing';
}
}
// Fallback: сканируем весь текст страницы
const bodyText = document.body ? document.body.innerText.toLowerCase() : '';
if (bodyText.includes('выпуск завершён') || bodyText.includes('выпуск завершен')) return 'completed';
if (bodyText.includes('продолжается')) return 'ongoing';
return 'unknown';
}
""")
return result or "unknown"
except Exception:
return "unknown"
async def _extract_description(page: Page) -> str:
"""Извлекает описание/синопсис манги."""
try:
result = await page.evaluate("""
() => {
const selectors = [
'.manga-description',
'.elem_descr .value',
'#tab-description .description-text',
'.description',
'[itemprop="description"]',
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el && el.textContent.trim()) return el.textContent.trim();
}
return '';
}
""")
return (result or "").strip()[:2000] # обрезаем до 2000 символов
except Exception:
return ""
async def _extract_genres(page: Page) -> list[str]:
"""Извлекает список жанров манги."""
try:
result = await page.evaluate("""
() => {
const selectors = [
'.elem_genre .value a',
'.genres a',
'[itemprop="genre"]',
'.genre-list a',
];
for (const sel of selectors) {
const els = document.querySelectorAll(sel);
if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
}
return [];
}
""")
return result or []
except Exception:
return []
async def _navigate(page: Page, url: str, retries: int = 3,
referer: str | None = None) -> bool:
from urllib.parse import urlparse
if referer is None:
p = urlparse(url)
referer = f"{p.scheme}://{p.netloc}/"
for attempt in range(1, retries + 1):
try:
resp = await page.goto(url, wait_until="domcontentloaded",
timeout=60_000, referer=referer)
if resp and resp.status >= 400:
logger.warning("Попытка {}/{}: HTTP {}", attempt, retries, resp.status)
await asyncio.sleep(3 * attempt)
continue
try:
await page.wait_for_load_state("networkidle", timeout=10_000)
except Exception:
pass
return True
except Exception as e:
logger.warning("Попытка {}/{}: {}", attempt, retries, e)
await asyncio.sleep(3 * attempt)
return False
async def _expand_chapters(page: Page):
for sel in ["a.chapter-link.all", "button:has-text('Все главы')",
"a:has-text('Все главы')"]:
try:
el = page.locator(sel).first
if await el.is_visible(timeout=2000):
await el.click()
await page.wait_for_load_state("networkidle", timeout=10_000)
return
except Exception:
pass
async def _extract_chapters(page: Page) -> list[Chapter]:
"""Основной парсер: #chapters-list → tr.item-row → td[data-num] a.chapter-link"""
rows = await page.query_selector_all("#chapters-list tr.item-row")
chapters = []
for row in rows:
link = await row.query_selector("td[class*='item-title'] a")
if not link:
continue
href = await link.get_attribute("href") or ""
text = (await link.inner_text()).strip()
if not href:
continue
td = await row.query_selector("td[data-num]")
vol = int((await td.get_attribute("data-vol") or "0")) if td else 0
num_raw = int((await td.get_attribute("data-num") or "0")) if td else 0
number = num_raw / 10.0
full_url = href if href.startswith("http") else _base_url(page.url) + href
chapters.append(Chapter(title=text, url=full_url, number=number, volume=vol))
return chapters
async def _extract_chapters_alt(page: Page) -> list[Chapter]:
result = await page.evaluate("""
() => {
const links = Array.from(document.querySelectorAll('a[href*="/vol"]'));
return links.map(a => ({ href: a.href, text: a.textContent.trim() }))
.filter(x => x.href && x.text);
}
""")
return [Chapter(title=x["text"], url=x["href"],
number=_parse_num(x["text"]), volume=_parse_vol(x["text"]))
for x in result]
def _base_url(url: str) -> str:
m = re.match(r"(https?://[^/]+)", url)
return m.group(1) if m else "https://readmanga.ru"
def _parse_num(text: str) -> float:
m = re.search(r"[\d]+(?:[.,]\d+)?", text.replace(",", "."))
return float(m.group()) if m else 0.0
def _parse_vol(text: str) -> int:
m = re.search(r"Том\s+(\d+)", text, re.IGNORECASE)
return int(m.group(1)) if m else 0
# ──────────────────────────────────────────────
# Страница главы — получение URL изображений
# ──────────────────────────────────────────────
async def _extract_images_from_js(page: Page) -> list[str]:
"""
Извлекает URL из rm_h.readerInit(chapterInfo, [[base, '', path, w, h], ...]).
Считает скобки для точного захвата массива.
"""
try:
result = await page.evaluate("""
() => {
for (const s of document.querySelectorAll('script')) {
const text = s.textContent || '';
const mi = text.indexOf('readerInit');
if (mi === -1) continue;
const ai = text.indexOf('[', mi);
if (ai === -1) continue;
let depth = 0, end = -1;
for (let i = ai; i < text.length; i++) {
if (text[i] === '[') depth++;
else if (text[i] === ']') { depth--; if (!depth) { end = i+1; break; } }
}
if (end === -1) continue;
try {
const arr = eval(text.slice(ai, end));
if (Array.isArray(arr) && arr.length)
return arr.map(item => Array.isArray(item) && item.length >= 3
? item[0] + item[2] : null).filter(Boolean);
} catch(e) {}
}
return [];
}
""")
if result:
logger.debug("JS readerInit нашёл {} изображений", len(result))
return result or []
except Exception as e:
logger.debug("JS-метод не сработал: {}", e)
return []
async def _extract_images_from_dom(page: Page) -> list[str]:
try:
result = await page.evaluate("""
() => {
for (const sel of ['img.manga-page', '.page-image img', '#mangaReader img', 'img[data-src]']) {
const found = Array.from(document.querySelectorAll(sel));
if (found.length) return found.map(i => i.src || i.dataset.src).filter(Boolean);
}
return [];
}
""")
return result or []
except Exception:
return []
def _get_ext(url: str) -> str:
m = re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, re.IGNORECASE)
if m:
ext = m.group(1).lower()
return ".jpg" if ext == "jpeg" else f".{ext}"
return ".jpg"
# ──────────────────────────────────────────────
# Скачивание главы
# ──────────────────────────────────────────────
async def get_chapter_images_and_download(
page: Page,
chapter_url: str,
dest_dir: Path,
manga_url: str | None = None,
on_page: object = None,
) -> list[Path]:
"""
1. Открывает страницу главы (устанавливает DDoS-Guard cookies для CDN).
2. Извлекает список URL из readerInit.
3. Перехватывает img-запросы через page.route() + route.fetch()
(браузерный стек — правильные Sec-Fetch-* заголовки, cookies).
4. Пролистывает читалку клавишей ArrowRight чтобы загрузить все страницы.
5. Retry для страниц с timeout через JS fetch.
"""
t_start = time.monotonic()
ch_id = chapter_url.split("/")[-1] # короткий идентификатор для логов
logger.info("[{}] Загружаем главу: {}", ch_id, chapter_url)
from urllib.parse import urlparse
parsed = urlparse(chapter_url)
parts = parsed.path.strip("/").split("/")
manga_slug = parts[0] if parts else ""
referer = manga_url or f"{parsed.scheme}://{parsed.netloc}/{manga_slug}"
load_url = chapter_url + ("?mtr=1" if "?" not in chapter_url else "&mtr=1")
dest_dir.mkdir(parents=True, exist_ok=True)
def _base(u: str) -> str:
return u.split("?")[0]
# Баннеры/рекламные изображения — игнорируем без логирования
BANNER_RE = re.compile(r"466_p\.|570_p\.|banner|advert", re.I)
def _is_manga_image(url: str) -> bool:
base = _base(url)
if not re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", base, re.I):
return False
if "resrmr." in url or "/static/" in url:
return False
return bool(re.search(r"one-way\.work|staticfa\.|rm\.one-way|cdnmanga|reimg", url, re.I))
captured: dict[str, bytes] = {} # base_url → bytes
route_errors: dict[str, str] = {} # base_url → текст ошибки
route_statuses: dict[str, int] = {} # base_url → HTTP status (не 200/206)
lock = asyncio.Lock()
async def route_handler(route, request):
url = request.url
base = _base(url)
if not _is_manga_image(url):
await route.continue_()
return
if BANNER_RE.search(base):
await route.continue_()
return
async with lock:
already = base in captured
if already:
await route.continue_()
return
fname = base.split("/")[-1]
try:
response = await route.fetch()
status = response.status
body = await response.body()
if body and len(body) > 500 and status in (200, 206):
async with lock:
if base not in captured:
captured[base] = body
logger.debug("[{}] ✓ {}: {} байт", ch_id, fname, len(body))
if on_page:
try:
asyncio.ensure_future(on_page(0, 0))
except Exception:
pass
else:
async with lock:
route_statuses[base] = status
if status not in (200, 206):
logger.warning("[{}] CDN HTTP {} для '{}' | {}",
ch_id, status, fname, base[-70:])
else:
logger.warning("[{}] Слишком мал ответ ({} байт) для '{}'",
ch_id, len(body), fname)
await route.fulfill(response=response)
except Exception as e:
err = str(e)
async with lock:
route_errors[base] = err
is_timeout = "timeout" in err.lower()
level = logger.warning if is_timeout else logger.warning
level("[{}] route.fetch {} '{}': {}",
ch_id, "timeout" if is_timeout else "ошибка", fname, err[:150])
try:
await route.continue_()
except Exception:
pass
await page.route("**/*", route_handler)
# 1. Открываем главу
ok = await _navigate(page, load_url, referer=referer)
if not ok:
await page.unroute("**/*", route_handler)
logger.error("[{}] Не удалось открыть главу после всех retry: {}", ch_id, chapter_url)
return []
# 2. Ждём readerInit
try:
await page.wait_for_function(
"() => Array.from(document.querySelectorAll('script'))"
".some(s => s.textContent.includes('readerInit'))",
timeout=15_000,
)
except Exception as e:
logger.warning("[{}] readerInit не появился за 15с ({}). "
"Продолжаем через DOM-fallback.", ch_id, str(e)[:80])
# 3. Извлекаем список URL
image_urls = await _extract_images_from_js(page)
if not image_urls:
logger.debug("[{}] JS readerInit не дал URL, пробуем DOM-парсинг", ch_id)
image_urls = await _extract_images_from_dom(page)
if not image_urls:
await page.unroute("**/*", route_handler)
try:
page_info = await page.evaluate("() => document.title + ' | ' + location.href")
except Exception:
page_info = "?"
logger.error("[{}] Список изображений пуст. Текущая страница: {}", ch_id, page_info)
return []
logger.info("[{}] Найдено изображений: {}", ch_id, len(image_urls))
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
total = len(image_urls)
def _count_matched() -> int:
count = 0
for base_url in captured:
if base_url in url_to_idx or base_url.split("/")[-1] in filename_to_idx:
count += 1
return count
# 4. Пролистываем читалку
await asyncio.sleep(1)
stall_count = 0
prev_done = -1
for i in range(total + 20):
done = _count_matched()
if done >= total:
break
try:
await page.keyboard.press("ArrowRight")
await asyncio.sleep(0.5)
except Exception as e:
logger.warning("[{}] Ошибка листания на шаге {}: {}", ch_id, i + 1, e)
break
if i % 20 == 19:
done = _count_matched()
logger.debug("[{}] Пролистано {}, загружено: {}/{}", ch_id, i + 1, done, total)
if done == prev_done:
stall_count += 1
if stall_count >= 3:
logger.warning("[{}] Прогресс завис ({}/{}) после {} листаний — прерываем",
ch_id, done, total, i + 1)
break
else:
stall_count = 0
prev_done = done
# Финальное ожидание
await asyncio.sleep(3)
# 5. Retry для страниц с timeout через браузерный JS fetch
async with lock:
timeout_bases = [u for u, e in route_errors.items()
if "timeout" in e.lower() and u not in captured]
if timeout_bases:
logger.info("[{}] Retry {} страниц с timeout через JS fetch...",
ch_id, len(timeout_bases))
for retry_base in timeout_bases:
if retry_base in captured:
continue
fname = retry_base.split("/")[-1]
try:
data_b64 = await page.evaluate("""async (url) => {
try {
const r = await fetch(url, {credentials: 'include'});
if (!r.ok) return null;
const buf = await r.arrayBuffer();
const bytes = new Uint8Array(buf);
let bin = '';
for (let b of bytes) bin += String.fromCharCode(b);
return btoa(bin);
} catch(e) { return null; }
}""", retry_base)
if data_b64:
import base64
body = base64.b64decode(data_b64)
if len(body) > 500:
async with lock:
captured[retry_base] = body
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
else:
logger.warning("[{}] Retry вернул {} байт для '{}' — игнорируем",
ch_id, len(body), fname)
else:
logger.warning("[{}] Retry вернул null для '{}' | {}",
ch_id, fname, retry_base[-70:])
except Exception as e2:
logger.warning("[{}] Retry JS ошибка для '{}': {}", ch_id, fname, e2)
await page.unroute("**/*", route_handler)
done = _count_matched()
elapsed = time.monotonic() - t_start
logger.info("[{}] Перехвачено: {}/{} за {:.1f}с", ch_id, done, total, elapsed)
# 6. Сохраняем в правильном порядке
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
paths: dict[int, Path] = {}
unmatched_other: list[str] = []
for base_url, body in captured.items():
idx = url_to_idx.get(base_url)
if idx is None:
fname = base_url.split("/")[-1]
idx = filename_to_idx.get(fname)
if idx is None:
if not BANNER_RE.search(base_url):
unmatched_other.append(base_url.split("/")[-1])
continue
ext = _get_ext(base_url)
p = dest_dir / f"{idx:04d}{ext}"
p.write_bytes(body)
paths[idx] = p
if unmatched_other:
logger.debug("[{}] Перехвачено, но не совпало с readerInit ({}): {}",
ch_id, len(unmatched_other), unmatched_other)
# 7. Итоговый отчёт по пропущенным страницам
missing_idxs = [i for i in range(total) if i not in paths]
if missing_idxs:
missing_files = [_base(image_urls[i]).split("/")[-1] for i in missing_idxs]
missing_full = [_base(image_urls[i]) for i in missing_idxs]
timeout_miss = [missing_files[j] for j, i in enumerate(missing_idxs)
if missing_full[j] in route_errors
and "timeout" in route_errors[missing_full[j]].lower()]
http_miss = [f"{missing_files[j]}(HTTP {route_statuses.get(missing_full[j], '?')})"
for j, i in enumerate(missing_idxs)
if missing_full[j] in route_statuses]
unrcv = [missing_files[j] for j, i in enumerate(missing_idxs)
if missing_full[j] not in route_errors
and missing_full[j] not in route_statuses]
reasons = []
if timeout_miss:
reasons.append(f"timeout×{len(timeout_miss)}: {timeout_miss}")
if http_miss:
reasons.append(f"HTTP-err×{len(http_miss)}: {http_miss}")
if unrcv:
reasons.append(f"не_перехвачено×{len(unrcv)}: {unrcv}")
logger.warning(
"[{}] Пропущено {}/{} стр. | №: {} | причины: {}",
ch_id, len(missing_idxs), total,
[i + 1 for i in missing_idxs],
" | ".join(reasons) if reasons else "неизвестно",
)
logger.debug("[{}] Полные URL пропущенных: {}", ch_id, missing_full)
return [paths[i] for i in sorted(paths.keys())]

74
src/sources/__init__.py Normal file
View File

@@ -0,0 +1,74 @@
"""
Реестр источников манги.
Для добавления нового источника:
1. Создать файл src/sources/mysource.py с классом, реализующим MangaSourceProtocol
2. Импортировать его здесь и добавить в список SOURCES
"""
from urllib.parse import urlparse
from typing import Optional
from .base import MangaSourceProtocol
from .readmanga import ReadmangaSource
# ── Регистрация источников ─────────────────────
# Добавьте новые источники сюда:
SOURCES: list = [
ReadmangaSource(),
]
# Быстрый поиск по slug
_BY_SLUG: dict[str, object] = {s.slug: s for s in SOURCES}
class SourceRegistry:
"""Реестр источников. Источники определяются только в коде."""
def get_by_slug(self, slug: str) -> Optional[object]:
return _BY_SLUG.get(slug)
def get_by_db_id(self, source_id: int, db) -> Optional[object]:
"""Резолвит адаптер через БД: source_id → slug → экземпляр."""
row = db.get_source_by_id(source_id)
if not row:
return None
return _BY_SLUG.get(row["slug"])
def all_sources(self) -> list:
return list(SOURCES)
def all_slugs(self) -> list[str]:
return [s.slug for s in SOURCES]
registry = SourceRegistry()
def get_source_for_url(url: str, db) -> Optional[object]:
"""
Определяет источник по домену URL.
Ищет домен в таблице source_domains → возвращает адаптер.
Если домен не зарегистрирован — возвращает None.
"""
try:
domain = urlparse(url).netloc.lower()
if domain.startswith("www."):
domain = domain[4:]
row = db.get_source_by_domain(domain)
if not row:
return None
return _BY_SLUG.get(row["slug"])
except Exception:
return None
def extract_domain(url: str) -> str:
"""Извлекает домен без www."""
try:
domain = urlparse(url).netloc.lower()
if domain.startswith("www."):
domain = domain[4:]
return domain
except Exception:
return ""

58
src/sources/base.py Normal file
View File

@@ -0,0 +1,58 @@
"""
Базовые модели данных и Protocol-интерфейс для источников манги.
"""
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional, Protocol, runtime_checkable
from playwright.async_api import Page
# ──────────────────────────────────────────────
# Модели данных (общие для всех источников)
# ──────────────────────────────────────────────
@dataclass
class Chapter:
title: str
url: str
number: float = 0.0
volume: int = 0
@dataclass
class MangaInfo:
title: str
url: str
chapters: list[Chapter] = field(default_factory=list)
pub_status: str = "unknown" # completed / ongoing / unknown
title_ru: str = ""
title_full: str = ""
description: str = ""
genres: list[str] = field(default_factory=list)
# ──────────────────────────────────────────────
# Интерфейс источника
# ──────────────────────────────────────────────
@runtime_checkable
class MangaSourceProtocol(Protocol):
slug: str # уникальный код источника в коде ("readmanga")
display_name: str # название для UI ("ReadManga")
async def get_manga_info(self, page: Page, url: str) -> Optional[MangaInfo]:
"""Возвращает информацию о манге и список глав."""
...
async def get_chapter_images_and_download(
self,
page: Page,
chapter_url: str,
dest_dir: Path,
manga_url: Optional[str] = None,
on_page: object = None,
) -> list[Path]:
"""Скачивает страницы главы в dest_dir и возвращает список путей."""
...

589
src/sources/readmanga.py Normal file
View File

@@ -0,0 +1,589 @@
"""
Адаптер ReadManga: поддерживает readmanga.ru и все его клоны.
"""
import asyncio
import base64
import re
import time
from pathlib import Path
from typing import Optional
from loguru import logger
from playwright.async_api import Page
from .base import Chapter, MangaInfo
class ReadmangaSource:
slug = "readmanga"
display_name = "ReadManga"
# CDN-домены из которых принимаем картинки глав
cdn_patterns = ["one-way.work", "staticfa.", "rm.one-way", "cdnmanga", "reimg"]
# ──────────────────────────────────────────────
# Страница манги — список глав
# ──────────────────────────────────────────────
async def get_manga_info(self, page: Page, url: str) -> Optional[MangaInfo]:
"""Открывает страницу манги и возвращает список всех глав."""
logger.info("Загружаем страницу манги: {}", url)
ok = await _navigate(page, url)
if not ok:
return None
title_full = await page.title()
title_full = re.sub(r"\s*[-|].*$", "", title_full).strip()
title_ru = await _extract_ru_title_from_dom(page)
if not title_ru:
title_ru = _parse_ru_title(title_full)
logger.info("Манга: {} | ru: {}", title_full, title_ru)
pub_status = await _extract_pub_status(page)
logger.info("Статус выпуска: {}", pub_status)
description = await _extract_description(page)
genres = await _extract_genres(page)
await _expand_chapters(page)
chapters = await _extract_chapters(page)
if not chapters:
chapters = await _extract_chapters_alt(page)
logger.info("Найдено глав: {}", len(chapters))
return MangaInfo(
title=title_ru or title_full,
url=url,
chapters=chapters,
pub_status=pub_status,
title_ru=title_ru,
title_full=title_full,
description=description,
genres=genres,
)
# ──────────────────────────────────────────────
# Скачивание главы
# ──────────────────────────────────────────────
async def get_chapter_images_and_download(
self,
page: Page,
chapter_url: str,
dest_dir: Path,
manga_url: Optional[str] = None,
on_page: object = None,
) -> list[Path]:
"""
1. Открывает страницу главы.
2. Извлекает список URL из readerInit.
3. Перехватывает img-запросы через page.route().
4. Пролистывает читалку клавишей ArrowRight.
5. Retry для страниц с timeout через JS fetch.
"""
cdn_patterns = self.cdn_patterns
t_start = time.monotonic()
ch_id = chapter_url.split("/")[-1]
logger.info("[{}] Загружаем главу: {}", ch_id, chapter_url)
from urllib.parse import urlparse
parsed = urlparse(chapter_url)
parts = parsed.path.strip("/").split("/")
manga_slug = parts[0] if parts else ""
referer = manga_url or f"{parsed.scheme}://{parsed.netloc}/{manga_slug}"
load_url = chapter_url + ("?mtr=1" if "?" not in chapter_url else "&mtr=1")
dest_dir.mkdir(parents=True, exist_ok=True)
def _base(u: str) -> str:
return u.split("?")[0]
BANNER_RE = re.compile(r"466_p\.|570_p\.|banner|advert", re.I)
def _is_manga_image(url: str) -> bool:
base = _base(url)
if not re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", base, re.I):
return False
if "resrmr." in url or "/static/" in url:
return False
pattern = "|".join(re.escape(p) for p in cdn_patterns)
return bool(re.search(pattern, url, re.I))
captured: dict[str, bytes] = {}
route_errors: dict[str, str] = {}
route_statuses: dict[str, int] = {}
lock = asyncio.Lock()
async def route_handler(route, request):
url = request.url
base = _base(url)
if not _is_manga_image(url):
await route.continue_()
return
if BANNER_RE.search(base):
await route.continue_()
return
async with lock:
already = base in captured
if already:
await route.continue_()
return
fname = base.split("/")[-1]
try:
response = await route.fetch()
status = response.status
body = await response.body()
if body and len(body) > 500 and status in (200, 206):
async with lock:
if base not in captured:
captured[base] = body
logger.debug("[{}] ✓ {}: {} байт", ch_id, fname, len(body))
if on_page:
try:
asyncio.ensure_future(on_page(0, 0))
except Exception:
pass
else:
async with lock:
route_statuses[base] = status
if status not in (200, 206):
logger.warning("[{}] CDN HTTP {} для '{}' | {}",
ch_id, status, fname, base[-70:])
else:
logger.warning("[{}] Слишком мал ответ ({} байт) для '{}'",
ch_id, len(body), fname)
await route.fulfill(response=response)
except Exception as e:
err = str(e)
async with lock:
route_errors[base] = err
is_timeout = "timeout" in err.lower()
logger.warning("[{}] route.fetch {} '{}': {}",
ch_id, "timeout" if is_timeout else "ошибка", fname, err[:150])
try:
await route.continue_()
except Exception:
pass
await page.route("**/*", route_handler)
ok = await _navigate(page, load_url, referer=referer)
if not ok:
await page.unroute("**/*", route_handler)
logger.error("[{}] Не удалось открыть главу: {}", ch_id, chapter_url)
return []
try:
await page.wait_for_function(
"() => Array.from(document.querySelectorAll('script'))"
".some(s => s.textContent.includes('readerInit'))",
timeout=15_000,
)
except Exception as e:
logger.warning("[{}] readerInit не появился за 15с ({}). DOM-fallback.", ch_id, str(e)[:80])
image_urls = await _extract_images_from_js(page)
if not image_urls:
logger.debug("[{}] JS readerInit не дал URL, пробуем DOM-парсинг", ch_id)
image_urls = await _extract_images_from_dom(page)
if not image_urls:
await page.unroute("**/*", route_handler)
try:
page_info = await page.evaluate("() => document.title + ' | ' + location.href")
except Exception:
page_info = "?"
logger.error("[{}] Список изображений пуст. Страница: {}", ch_id, page_info)
return []
logger.info("[{}] Найдено изображений: {}", ch_id, len(image_urls))
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
total = len(image_urls)
def _count_matched() -> int:
count = 0
for base_url in captured:
if base_url in url_to_idx or base_url.split("/")[-1] in filename_to_idx:
count += 1
return count
await asyncio.sleep(1)
stall_count = 0
prev_done = -1
for i in range(total + 20):
done = _count_matched()
if done >= total:
break
try:
await page.keyboard.press("ArrowRight")
await asyncio.sleep(0.5)
except Exception as e:
logger.warning("[{}] Ошибка листания на шаге {}: {}", ch_id, i + 1, e)
break
if i % 20 == 19:
done = _count_matched()
logger.debug("[{}] Пролистано {}, загружено: {}/{}", ch_id, i + 1, done, total)
if done == prev_done:
stall_count += 1
if stall_count >= 3:
logger.warning("[{}] Прогресс завис ({}/{}) — прерываем", ch_id, done, total)
break
else:
stall_count = 0
prev_done = done
await asyncio.sleep(3)
# Retry timeout через JS fetch
async with lock:
timeout_bases = [u for u, e in route_errors.items()
if "timeout" in e.lower() and u not in captured]
if timeout_bases:
logger.info("[{}] Retry {} страниц с timeout...", ch_id, len(timeout_bases))
for retry_base in timeout_bases:
if retry_base in captured:
continue
fname = retry_base.split("/")[-1]
try:
data_b64 = await page.evaluate("""async (url) => {
try {
const r = await fetch(url, {credentials: 'include'});
if (!r.ok) return null;
const buf = await r.arrayBuffer();
const bytes = new Uint8Array(buf);
let bin = '';
for (let b of bytes) bin += String.fromCharCode(b);
return btoa(bin);
} catch(e) { return null; }
}""", retry_base)
if data_b64:
body = base64.b64decode(data_b64)
if len(body) > 500:
async with lock:
captured[retry_base] = body
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
else:
logger.warning("[{}] Retry вернул {} байт — игнорируем", ch_id, len(body))
else:
logger.warning("[{}] Retry null для '{}'", ch_id, fname)
except Exception as e2:
logger.warning("[{}] Retry JS ошибка '{}': {}", ch_id, fname, e2)
await page.unroute("**/*", route_handler)
done = _count_matched()
elapsed = time.monotonic() - t_start
logger.info("[{}] Перехвачено: {}/{} за {:.1f}с", ch_id, done, total, elapsed)
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
paths: dict[int, Path] = {}
unmatched_other: list[str] = []
for base_url, body in captured.items():
idx = url_to_idx.get(base_url)
if idx is None:
fname = base_url.split("/")[-1]
idx = filename_to_idx.get(fname)
if idx is None:
if not BANNER_RE.search(base_url):
unmatched_other.append(base_url.split("/")[-1])
continue
ext = _get_ext(base_url)
p = dest_dir / f"{idx:04d}{ext}"
p.write_bytes(body)
paths[idx] = p
if unmatched_other:
logger.debug("[{}] Не совпало с readerInit ({}): {}", ch_id, len(unmatched_other), unmatched_other)
missing_idxs = [i for i in range(total) if i not in paths]
if missing_idxs:
missing_files = [_base(image_urls[i]).split("/")[-1] for i in missing_idxs]
missing_full = [_base(image_urls[i]) for i in missing_idxs]
timeout_miss = [missing_files[j] for j, i in enumerate(missing_idxs)
if missing_full[j] in route_errors
and "timeout" in route_errors[missing_full[j]].lower()]
http_miss = [f"{missing_files[j]}(HTTP {route_statuses.get(missing_full[j], '?')})"
for j, i in enumerate(missing_idxs)
if missing_full[j] in route_statuses]
unrcv = [missing_files[j] for j, i in enumerate(missing_idxs)
if missing_full[j] not in route_errors
and missing_full[j] not in route_statuses]
reasons = []
if timeout_miss:
reasons.append(f"timeout×{len(timeout_miss)}: {timeout_miss}")
if http_miss:
reasons.append(f"HTTP-err×{len(http_miss)}: {http_miss}")
if unrcv:
reasons.append(f"не_перехвачено×{len(unrcv)}: {unrcv}")
logger.warning(
"[{}] Пропущено {}/{} стр. | №: {} | причины: {}",
ch_id, len(missing_idxs), total,
[i + 1 for i in missing_idxs],
" | ".join(reasons) if reasons else "неизвестно",
)
return [paths[i] for i in sorted(paths.keys())]
# ──────────────────────────────────────────────
# Вспомогательные функции (приватные)
# ──────────────────────────────────────────────
async def _navigate(page: Page, url: str, retries: int = 3,
referer: str | None = None) -> bool:
from urllib.parse import urlparse
if referer is None:
p = urlparse(url)
referer = f"{p.scheme}://{p.netloc}/"
for attempt in range(1, retries + 1):
try:
resp = await page.goto(url, wait_until="domcontentloaded",
timeout=60_000, referer=referer)
if resp and resp.status >= 400:
logger.warning("Попытка {}/{}: HTTP {}", attempt, retries, resp.status)
await asyncio.sleep(3 * attempt)
continue
try:
await page.wait_for_load_state("networkidle", timeout=10_000)
except Exception:
pass
return True
except Exception as e:
logger.warning("Попытка {}/{}: {}", attempt, retries, e)
await asyncio.sleep(3 * attempt)
return False
async def _extract_ru_title_from_dom(page: Page) -> str:
try:
result = await page.evaluate("""
() => {
const selectors = [
'.names .name', 'h1.manga-title', 'h1 .name', '.name-block .name',
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el && el.textContent.trim()) return el.textContent.trim();
}
return '';
}
""")
return (result or "").strip()
except Exception:
return ""
def _parse_ru_title(full_title: str) -> str:
t = full_title.strip()
t = re.sub(r'^Манга\s+', '', t).strip()
t = re.split(r'\s*[\(\[]', t)[0].strip()
t = re.sub(r'\s+онлайн\s*$', '', t, flags=re.IGNORECASE).strip()
words = t.split()
result = []
for w in words:
if re.search(r'[а-яёА-ЯЁ]', w):
result.append(w)
elif re.search(r'[a-zA-Z]', w):
if result:
break
else:
if result:
result.append(w)
while result and not re.search(r'[а-яёА-ЯЁ]', result[-1]):
result.pop()
if result:
t = ' '.join(result)
return t
async def _extract_pub_status(page: Page) -> str:
try:
result = await page.evaluate("""
() => {
const statusSelectors = [
'.elem_status .value', '.manga-info .status',
'[class*="status"] .value', '.property .status',
];
for (const sel of statusSelectors) {
const el = document.querySelector(sel);
if (el) {
const t = el.textContent.toLowerCase();
if (t.includes('завершён') || t.includes('завершен') || t.includes('complete')) return 'completed';
if (t.includes('продолжает') || t.includes('ongoing')) return 'ongoing';
}
}
const bodyText = document.body ? document.body.innerText.toLowerCase() : '';
if (bodyText.includes('выпуск завершён') || bodyText.includes('выпуск завершен')) return 'completed';
if (bodyText.includes('продолжается')) return 'ongoing';
return 'unknown';
}
""")
return result or "unknown"
except Exception:
return "unknown"
async def _extract_description(page: Page) -> str:
try:
result = await page.evaluate("""
() => {
const selectors = [
'.manga-description', '.elem_descr .value',
'#tab-description .description-text', '.description',
'[itemprop="description"]',
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el && el.textContent.trim()) return el.textContent.trim();
}
return '';
}
""")
return (result or "").strip()[:2000]
except Exception:
return ""
async def _extract_genres(page: Page) -> list[str]:
try:
result = await page.evaluate("""
() => {
const selectors = [
'.elem_genre .value a', '.genres a',
'[itemprop="genre"]', '.genre-list a',
];
for (const sel of selectors) {
const els = document.querySelectorAll(sel);
if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
}
return [];
}
""")
return result or []
except Exception:
return []
async def _expand_chapters(page: Page):
for sel in ["a.chapter-link.all", "button:has-text('Все главы')", "a:has-text('Все главы')"]:
try:
el = page.locator(sel).first
if await el.is_visible(timeout=2000):
await el.click()
await page.wait_for_load_state("networkidle", timeout=10_000)
return
except Exception:
pass
async def _extract_chapters(page: Page) -> list[Chapter]:
rows = await page.query_selector_all("#chapters-list tr.item-row")
chapters = []
for row in rows:
link = await row.query_selector("td[class*='item-title'] a")
if not link:
continue
href = await link.get_attribute("href") or ""
text = (await link.inner_text()).strip()
if not href:
continue
td = await row.query_selector("td[data-num]")
vol = int((await td.get_attribute("data-vol") or "0")) if td else 0
num_raw = int((await td.get_attribute("data-num") or "0")) if td else 0
number = num_raw / 10.0
full_url = href if href.startswith("http") else _base_url(page.url) + href
chapters.append(Chapter(title=text, url=full_url, number=number, volume=vol))
return chapters
async def _extract_chapters_alt(page: Page) -> list[Chapter]:
result = await page.evaluate("""
() => {
const links = Array.from(document.querySelectorAll('a[href*="/vol"]'));
return links.map(a => ({ href: a.href, text: a.textContent.trim() }))
.filter(x => x.href && x.text);
}
""")
return [Chapter(title=x["text"], url=x["href"],
number=_parse_num(x["text"]), volume=_parse_vol(x["text"]))
for x in result]
async def _extract_images_from_js(page: Page) -> list[str]:
try:
result = await page.evaluate("""
() => {
for (const s of document.querySelectorAll('script')) {
const text = s.textContent || '';
const mi = text.indexOf('readerInit');
if (mi === -1) continue;
const ai = text.indexOf('[', mi);
if (ai === -1) continue;
let depth = 0, end = -1;
for (let i = ai; i < text.length; i++) {
if (text[i] === '[') depth++;
else if (text[i] === ']') { depth--; if (!depth) { end = i+1; break; } }
}
if (end === -1) continue;
try {
const arr = eval(text.slice(ai, end));
if (Array.isArray(arr) && arr.length)
return arr.map(item => Array.isArray(item) && item.length >= 3
? item[0] + item[2] : null).filter(Boolean);
} catch(e) {}
}
return [];
}
""")
if result:
logger.debug("JS readerInit нашёл {} изображений", len(result))
return result or []
except Exception as e:
logger.debug("JS-метод не сработал: {}", e)
return []
async def _extract_images_from_dom(page: Page) -> list[str]:
try:
result = await page.evaluate("""
() => {
for (const sel of ['img.manga-page', '.page-image img', '#mangaReader img', 'img[data-src]']) {
const found = Array.from(document.querySelectorAll(sel));
if (found.length) return found.map(i => i.src || i.dataset.src).filter(Boolean);
}
return [];
}
""")
return result or []
except Exception:
return []
def _get_ext(url: str) -> str:
m = re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, re.IGNORECASE)
if m:
ext = m.group(1).lower()
return ".jpg" if ext == "jpeg" else f".{ext}"
return ".jpg"
def _base_url(url: str) -> str:
m = re.match(r"(https?://[^/]+)", url)
return m.group(1) if m else "https://readmanga.ru"
def _parse_num(text: str) -> float:
m = re.search(r"[\d]+(?:[.,]\d+)?", text.replace(",", "."))
return float(m.group()) if m else 0.0
def _parse_vol(text: str) -> int:
m = re.search(r"Том\s+(\d+)", text, re.IGNORECASE)
return int(m.group(1)) if m else 0

View File

@@ -1,14 +1,25 @@
""" """
Хранение состояния скачивания в SQLite. Хранение состояния скачивания в SQLite.
""" """
import json
import sqlite3 import sqlite3
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from urllib.parse import urlparse
DB_PATH = Path("/app/state/progress.db") DB_PATH = Path("/app/state/progress.db")
# Домены ReadManga по умолчанию (сидинг при первом запуске)
_DEFAULT_READMANGA_DOMAINS = [
"readmanga.ru",
"readmanga.live",
"readmanga.me",
"readmanga.io",
"3.readmanga.ru",
]
class StateDB: class StateDB:
def __init__(self, db_path: Path = DB_PATH): def __init__(self, db_path: Path = DB_PATH):
@@ -68,6 +79,22 @@ class StateDB:
created_at TEXT created_at TEXT
) )
""") """)
self.conn.execute("""
CREATE TABLE IF NOT EXISTS sources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
slug TEXT UNIQUE NOT NULL,
display_name TEXT NOT NULL,
settings TEXT DEFAULT '{}',
created_at TEXT
)
""")
self.conn.execute("""
CREATE TABLE IF NOT EXISTS source_domains (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER NOT NULL REFERENCES sources(id),
domain TEXT UNIQUE NOT NULL
)
""")
# Migrate old DB: add missing columns # Migrate old DB: add missing columns
migrations = [ migrations = [
("chapters", "pages_total", "INTEGER DEFAULT 0"), ("chapters", "pages_total", "INTEGER DEFAULT 0"),
@@ -76,10 +103,11 @@ class StateDB:
("mangas", "title_full", "TEXT"), ("mangas", "title_full", "TEXT"),
("mangas", "pub_status", "TEXT DEFAULT 'unknown'"), ("mangas", "pub_status", "TEXT DEFAULT 'unknown'"),
("mangas", "auto_update", "INTEGER DEFAULT 0"), ("mangas", "auto_update", "INTEGER DEFAULT 0"),
("mangas", "last_checked_at", "TEXT"), ("mangas", "last_checked_at","TEXT"),
("mangas", "started_at", "TEXT"), ("mangas", "started_at", "TEXT"),
("mangas", "finished_at", "TEXT"), ("mangas", "finished_at", "TEXT"),
("mangas", "folder_name", "TEXT"), ("mangas", "folder_name", "TEXT"),
("mangas", "source_id", "INTEGER REFERENCES sources(id)"),
] ]
for table, col, typedef in migrations: for table, col, typedef in migrations:
try: try:
@@ -88,17 +116,184 @@ class StateDB:
pass pass
self.conn.commit() self.conn.commit()
def sync_sources(self, registry) -> None:
"""
Синхронизирует таблицу sources с реестром из кода.
Вызывается при старте приложения.
При первом запуске создаёт записи и засеивает домены ReadManga.
"""
from loguru import logger
for source in registry.all_sources():
existing = self.conn.execute(
"SELECT id, display_name FROM sources WHERE slug=?", (source.slug,)
).fetchone()
if not existing:
self.conn.execute(
"INSERT INTO sources (slug, display_name, settings, created_at) VALUES (?,?,?,?)",
(source.slug, source.display_name, "{}", _now())
)
logger.info("Источник добавлен в БД: {} ({})", source.display_name, source.slug)
else:
if existing["display_name"] != source.display_name:
self.conn.execute(
"UPDATE sources SET display_name=? WHERE slug=?",
(source.display_name, source.slug)
)
self.conn.commit()
# Сидинг доменов ReadManga при первом запуске
rm = self.conn.execute("SELECT id FROM sources WHERE slug='readmanga'").fetchone()
if rm:
count = self.conn.execute(
"SELECT COUNT(*) FROM source_domains WHERE source_id=?", (rm["id"],)
).fetchone()[0]
if count == 0:
for domain in _DEFAULT_READMANGA_DOMAINS:
try:
self.conn.execute(
"INSERT INTO source_domains (source_id, domain) VALUES (?,?)",
(rm["id"], domain)
)
except Exception:
pass
self.conn.commit()
logger.info("Сидинг доменов ReadManga: {} доменов", len(_DEFAULT_READMANGA_DOMAINS))
# Логируем источники в БД без кода (не в реестре)
known_slugs = set(registry.all_slugs())
db_slugs = [r["slug"] for r in self.conn.execute("SELECT slug FROM sources").fetchall()]
for slug in db_slugs:
if slug not in known_slugs:
logger.warning("Источник '{}' есть в БД, но отсутствует в реестре — манги недоступны", slug)
def migrate_manga_sources(self) -> int:
"""
Авто-миграция: проставляет source_id для манг с source_id IS NULL.
Определяет источник по домену URL через source_domains.
Возвращает количество обновлённых манг.
"""
nulls = self.conn.execute(
"SELECT url FROM mangas WHERE source_id IS NULL"
).fetchall()
updated = 0
for row in nulls:
url = row["url"]
domain = _extract_domain(url)
source_row = self.get_source_by_domain(domain)
if source_row:
self.conn.execute(
"UPDATE mangas SET source_id=? WHERE url=?",
(source_row["id"], url)
)
updated += 1
if updated:
self.conn.commit()
return updated
# ── Sources ───────────────────────────────────
def get_source_by_id(self, source_id: int) -> Optional[dict]:
row = self.conn.execute("SELECT * FROM sources WHERE id=?", (source_id,)).fetchone()
return dict(row) if row else None
def get_source_by_slug(self, slug: str) -> Optional[dict]:
row = self.conn.execute("SELECT * FROM sources WHERE slug=?", (slug,)).fetchone()
return dict(row) if row else None
def get_source_by_domain(self, domain: str) -> Optional[dict]:
"""Возвращает запись source по домену (через source_domains JOIN)."""
row = self.conn.execute("""
SELECT s.* FROM sources s
JOIN source_domains sd ON sd.source_id = s.id
WHERE sd.domain=?
""", (domain.lower(),)).fetchone()
return dict(row) if row else None
def get_all_sources(self) -> list[dict]:
"""Возвращает все источники с вложенным списком доменов."""
sources = self.conn.execute("SELECT * FROM sources ORDER BY id").fetchall()
result = []
for s in sources:
s_dict = dict(s)
domains = self.conn.execute(
"SELECT domain FROM source_domains WHERE source_id=? ORDER BY domain",
(s["id"],)
).fetchall()
s_dict["domains"] = [d["domain"] for d in domains]
try:
s_dict["settings"] = json.loads(s_dict.get("settings") or "{}")
except Exception:
s_dict["settings"] = {}
result.append(s_dict)
return result
def add_domain(self, source_id: int, domain: str) -> bool:
"""Добавляет домен к источнику. Возвращает False если уже существует."""
domain = domain.lower().strip()
try:
self.conn.execute(
"INSERT INTO source_domains (source_id, domain) VALUES (?,?)",
(source_id, domain)
)
self.conn.commit()
return True
except Exception:
return False
def remove_domain(self, source_id: int, domain: str) -> bool:
"""Удаляет домен у источника. Возвращает True если удалён."""
cur = self.conn.execute(
"DELETE FROM source_domains WHERE source_id=? AND domain=?",
(source_id, domain.lower())
)
self.conn.commit()
return cur.rowcount > 0
def set_manga_source(self, manga_url: str, source_id: int) -> None:
"""Меняет источник у манги."""
self.conn.execute(
"UPDATE mangas SET source_id=?, updated_at=? WHERE url=?",
(source_id, _now(), manga_url)
)
self.conn.commit()
def reset_failed_chapters(self, manga_url: str) -> int:
"""Сбрасывает failed и partial главы в pending. Возвращает количество."""
now = _now()
c1 = self.conn.execute(
"UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=? "
"WHERE manga_url=? AND status='failed'",
(now, manga_url)
).rowcount
c2 = self.conn.execute(
"""UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=?
WHERE manga_url=? AND status='done'
AND pages_total > 0 AND pages_done < pages_total""",
(now, manga_url)
).rowcount
self.conn.commit()
return c1 + c2
def count_mangas_by_source_domain(self, domain: str) -> int:
"""Считает манги с указанным доменом (для предупреждений в UI)."""
source = self.get_source_by_domain(domain)
if not source:
return 0
return self.conn.execute(
"SELECT COUNT(*) FROM mangas WHERE source_id=?", (source["id"],)
).fetchone()[0]
# ── Mangas ──────────────────────────────────── # ── Mangas ────────────────────────────────────
def add_manga(self, url: str, fmt: str = "cbz") -> bool: def add_manga(self, url: str, fmt: str = "cbz", source_id: Optional[int] = None) -> bool:
"""Добавляет мангу в очередь. Возвращает True если новая.""" """Добавляет мангу в очередь. Возвращает True если новая."""
cur = self.conn.execute("SELECT id FROM mangas WHERE url=?", (url,)) cur = self.conn.execute("SELECT id FROM mangas WHERE url=?", (url,))
if cur.fetchone(): if cur.fetchone():
return False return False
self.conn.execute(""" self.conn.execute("""
INSERT INTO mangas (url, format, status, added_at, updated_at) INSERT INTO mangas (url, format, status, source_id, added_at, updated_at)
VALUES (?, ?, 'queued', ?, ?) VALUES (?, ?, 'queued', ?, ?, ?)
""", (url, fmt, _now(), _now())) """, (url, fmt, source_id, _now(), _now()))
self.conn.commit() self.conn.commit()
return True return True
@@ -318,3 +513,15 @@ class StateDB:
def _now() -> str: def _now() -> str:
return datetime.utcnow().isoformat() return datetime.utcnow().isoformat()
def _extract_domain(url: str) -> str:
"""Извлекает домен без www."""
try:
domain = urlparse(url).netloc.lower()
if domain.startswith("www."):
domain = domain[4:]
return domain
except Exception:
return ""

View File

@@ -11,7 +11,9 @@ from typing import Callable, Optional
from loguru import logger from loguru import logger
from .browser import BrowserManager from .browser import BrowserManager
from .scraper import get_manga_info, get_chapter_images_and_download, Chapter from .sources import registry, get_source_for_url, extract_domain
from .sources.base import Chapter, MangaInfo
from .scraper import get_manga_info, get_chapter_images_and_download # shim для обратной совместимости
from .exporter import export, MangaMeta from .exporter import export, MangaMeta
from .state import StateDB from .state import StateDB
@@ -61,10 +63,23 @@ async def download_manga(
started_ts = await db_call(db.mark_started, url) started_ts = await db_call(db.mark_started, url)
await emit({"type": "manga_start", "url": url, "started_at": started_ts}) await emit({"type": "manga_start", "url": url, "started_at": started_ts})
# Резолвим источник
source = get_source_for_url(url, db)
if source is None:
# Последний шанс: по source_id в БД
manga_row = await db_call(db.get_manga, url)
if manga_row and manga_row.get("source_id"):
source = registry.get_by_db_id(manga_row["source_id"], db)
if source is None:
await db_call(db.update_manga_status, url, "failed")
await emit({"type": "source_unknown", "url": url,
"error": "Источник не определён. Выберите источник в настройках манги."})
return
async with BrowserManager(headless=True) as bm: async with BrowserManager(headless=True) as bm:
ctx, info_page = await bm.new_page() ctx, info_page = await bm.new_page()
manga = await get_manga_info(info_page, url) manga = await source.get_manga_info(info_page, url)
await info_page.close() await info_page.close()
if not manga: if not manga:
@@ -193,7 +208,7 @@ async def download_manga(
"pages_total": pages_total, "pages_total": pages_total,
}) })
image_paths = await get_chapter_images_and_download( image_paths = await source.get_chapter_images_and_download(
ch_page, ch.url, ch_page, ch.url,
dest_dir=tmp_path, dest_dir=tmp_path,
manga_url=url, manga_url=url,
@@ -329,9 +344,19 @@ async def check_for_updates(
db.add_history(manga_url=url, event_type="check_started") db.add_history(manga_url=url, event_type="check_started")
await emit({"type": "check_started", "url": url}) await emit({"type": "check_started", "url": url})
# Резолвим источник
source = get_source_for_url(url, db)
if source is None:
manga_row = db.get_manga(url)
if manga_row and manga_row.get("source_id"):
source = registry.get_by_db_id(manga_row["source_id"], db)
if source is None:
await emit({"type": "source_unknown", "url": url})
return []
async with BrowserManager(headless=True) as bm: async with BrowserManager(headless=True) as bm:
_, page = await bm.new_page() _, page = await bm.new_page()
manga = await get_manga_info(page, url) manga = await source.get_manga_info(page, url)
await page.close() await page.close()
if not manga: if not manga:
return [] return []