upd
This commit is contained in:
@@ -108,7 +108,7 @@
|
|||||||
<div class="card rounded-xl p-5 mb-6">
|
<div class="card rounded-xl p-5 mb-6">
|
||||||
<h2 class="text-sm font-semibold text-gray-400 uppercase tracking-wider mb-3">Добавить мангу</h2>
|
<h2 class="text-sm font-semibold text-gray-400 uppercase tracking-wider mb-3">Добавить мангу</h2>
|
||||||
<div class="flex flex-col md:flex-row gap-3">
|
<div class="flex flex-col md:flex-row gap-3">
|
||||||
<textarea id="url-input" rows="2" placeholder="Один или несколько URL (каждый с новой строки) https://3.readmanga.ru/manga_slug" class="flex-1 px-3 py-2 text-sm resize-none"></textarea>
|
<textarea id="url-input" rows="2" placeholder="Один или несколько URL (каждый с новой строки) https://3.readmanga.ru/manga_slug" class="flex-1 px-3 py-2 text-sm resize-none" oninput="onUrlInputChange()"></textarea>
|
||||||
<div class="flex flex-col gap-2">
|
<div class="flex flex-col gap-2">
|
||||||
<select id="fmt-select" class="px-3 py-2 text-sm">
|
<select id="fmt-select" class="px-3 py-2 text-sm">
|
||||||
<option value="cbz">CBZ</option>
|
<option value="cbz">CBZ</option>
|
||||||
@@ -116,7 +116,20 @@
|
|||||||
<option value="epub">EPUB</option>
|
<option value="epub">EPUB</option>
|
||||||
<option value="all">Все форматы</option>
|
<option value="all">Все форматы</option>
|
||||||
</select>
|
</select>
|
||||||
<button onclick="addToQueue()" class="btn-primary text-sm">➕ В очередь</button>
|
<button onclick="addToQueue()" id="add-btn" class="btn-primary text-sm">➕ В очередь</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- Source detection hint -->
|
||||||
|
<div id="source-hint" class="mt-2 hidden">
|
||||||
|
<div id="source-hint-found" class="hidden text-xs text-green-400 flex items-center gap-2">
|
||||||
|
<span>🔗 Источник:</span>
|
||||||
|
<span id="source-hint-name" class="font-semibold"></span>
|
||||||
|
</div>
|
||||||
|
<div id="source-hint-unknown" class="hidden flex flex-col gap-2">
|
||||||
|
<div class="text-xs text-yellow-400">⚠ Домен не распознан. Выберите источник вручную:</div>
|
||||||
|
<select id="source-manual-select" class="px-3 py-2 text-sm w-full md:w-72">
|
||||||
|
<option value="">— выберите источник —</option>
|
||||||
|
</select>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div id="add-msg" class="mt-2 text-sm text-green-400 hidden"></div>
|
<div id="add-msg" class="mt-2 text-sm text-green-400 hidden"></div>
|
||||||
@@ -132,6 +145,8 @@
|
|||||||
class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">🔔 Новости</button>
|
class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">🔔 Новости</button>
|
||||||
<button onclick="switchTab('history')" id="tab-history"
|
<button onclick="switchTab('history')" id="tab-history"
|
||||||
class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">🕒 История</button>
|
class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">🕒 История</button>
|
||||||
|
<button onclick="switchTab('settings')" id="tab-settings"
|
||||||
|
class="px-4 py-3 text-sm font-semibold border-b-2 border-transparent text-gray-400 hover:text-white">⚙️ Настройки</button>
|
||||||
</div>
|
</div>
|
||||||
<div id="manga-filters" class="flex gap-2 py-2">
|
<div id="manga-filters" class="flex gap-2 py-2">
|
||||||
<button onclick="filterMangas('all')" id="filter-all" class="text-xs px-3 py-1 rounded-full bg-indigo-600 text-white">Все</button>
|
<button onclick="filterMangas('all')" id="filter-all" class="text-xs px-3 py-1 rounded-full bg-indigo-600 text-white">Все</button>
|
||||||
@@ -177,6 +192,36 @@
|
|||||||
<div class="px-5 py-8 text-center text-gray-500 text-sm">Загрузка...</div>
|
<div class="px-5 py-8 text-center text-gray-500 text-sm">Загрузка...</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Settings -->
|
||||||
|
<div id="tab-content-settings" class="hidden">
|
||||||
|
<div class="px-5 py-4">
|
||||||
|
<h3 class="text-sm font-semibold text-gray-300 uppercase tracking-wider mb-1">Источники</h3>
|
||||||
|
<p class="text-xs text-gray-500 mb-4">Источники определяются в коде приложения. Здесь можно управлять доменами для каждого источника.</p>
|
||||||
|
<div id="sources-list" class="flex flex-col gap-3"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Switch Source Modal -->
|
||||||
|
<div id="switch-source-modal" class="fixed inset-0 z-[60] hidden items-center justify-center" style="background:rgba(0,0,0,0.75)">
|
||||||
|
<div class="card rounded-2xl w-full max-w-md mx-4 p-6 flex flex-col gap-4">
|
||||||
|
<h3 class="font-semibold text-white text-base">↔ Сменить источник</h3>
|
||||||
|
<div class="text-sm text-gray-400" id="switch-source-current"></div>
|
||||||
|
<div class="flex flex-col gap-2">
|
||||||
|
<label class="text-xs text-gray-400">Новый источник</label>
|
||||||
|
<select id="switch-source-select" class="px-3 py-2 text-sm w-full"></select>
|
||||||
|
<div id="switch-source-warning" class="text-xs text-yellow-400 hidden"></div>
|
||||||
|
</div>
|
||||||
|
<div class="flex gap-3 justify-end mt-2">
|
||||||
|
<button onclick="closeSwitchSourceModal()"
|
||||||
|
class="px-4 py-2 rounded-lg text-sm text-gray-400 hover:text-white"
|
||||||
|
style="background:#1e293b">Отмена</button>
|
||||||
|
<button onclick="confirmSwitchSource()"
|
||||||
|
class="px-4 py-2 rounded-lg text-sm font-semibold text-white"
|
||||||
|
style="background:#312e81">Применить</button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -281,6 +326,7 @@ const state = {
|
|||||||
mangas: {}, // url → manga object
|
mangas: {}, // url → manga object
|
||||||
chapters: {}, // manga_url → [chapter, ...]
|
chapters: {}, // manga_url → [chapter, ...]
|
||||||
filter: 'all',
|
filter: 'all',
|
||||||
|
sources: [], // [{id, slug, display_name, domains}]
|
||||||
};
|
};
|
||||||
|
|
||||||
// ── Auth ─────────────────────────────────────
|
// ── Auth ─────────────────────────────────────
|
||||||
@@ -407,8 +453,12 @@ function handleEvent(msg) {
|
|||||||
|
|
||||||
case 'manga_queued':
|
case 'manga_queued':
|
||||||
if(!state.mangas[msg.url]) {
|
if(!state.mangas[msg.url]) {
|
||||||
|
const srcInfo = msg.source_id ? (state.sources.find(s => s.id === msg.source_id) || null) : null;
|
||||||
state.mangas[msg.url] = { url: msg.url, title: msg.url, status: 'queued', format: msg.format,
|
state.mangas[msg.url] = { url: msg.url, title: msg.url, status: 'queued', format: msg.format,
|
||||||
chapters_total: 0, chapters_done: 0, size_human: '—' };
|
chapters_total: 0, chapters_done: 0, size_human: '—',
|
||||||
|
source: srcInfo ? {id: srcInfo.id, slug: srcInfo.slug, display_name: srcInfo.display_name} : null };
|
||||||
|
} else {
|
||||||
|
state.mangas[msg.url].status = 'queued';
|
||||||
}
|
}
|
||||||
renderList();
|
renderList();
|
||||||
loadStats();
|
loadStats();
|
||||||
@@ -597,6 +647,25 @@ function handleEvent(msg) {
|
|||||||
renderList();
|
renderList();
|
||||||
loadStats();
|
loadStats();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 'source_unknown':
|
||||||
|
_showNotification('⚠ Источник не определён для ' + (state.mangas[msg.url]?.title || msg.url) + '. Выберите источник.', 'warn');
|
||||||
|
if(state.mangas[msg.url]) { state.mangas[msg.url].status = 'failed'; renderList(); }
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'source_domain_added':
|
||||||
|
case 'source_domain_removed':
|
||||||
|
loadSources();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'source_switched':
|
||||||
|
if(state.mangas[msg.url]) {
|
||||||
|
// Обновляем source у манги из актуального списка источников
|
||||||
|
const newSrc = state.sources.find(s => s.id === msg.new_source_id);
|
||||||
|
if(newSrc) state.mangas[msg.url].source = {id: newSrc.id, slug: newSrc.slug, display_name: newSrc.display_name};
|
||||||
|
updateMangaRow(msg.url);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -604,7 +673,7 @@ function handleEvent(msg) {
|
|||||||
let newsUnreadCount = 0;
|
let newsUnreadCount = 0;
|
||||||
|
|
||||||
function switchTab(tab) {
|
function switchTab(tab) {
|
||||||
['mangas', 'news', 'history'].forEach(t => {
|
['mangas', 'news', 'history', 'settings'].forEach(t => {
|
||||||
document.getElementById('tab-content-'+t).classList.toggle('hidden', t !== tab);
|
document.getElementById('tab-content-'+t).classList.toggle('hidden', t !== tab);
|
||||||
const btn = document.getElementById('tab-'+t);
|
const btn = document.getElementById('tab-'+t);
|
||||||
btn.className = t === tab
|
btn.className = t === tab
|
||||||
@@ -614,6 +683,8 @@ function switchTab(tab) {
|
|||||||
document.getElementById('manga-filters').classList.toggle('hidden', tab !== 'mangas');
|
document.getElementById('manga-filters').classList.toggle('hidden', tab !== 'mangas');
|
||||||
if(tab === 'history') loadHistory();
|
if(tab === 'history') loadHistory();
|
||||||
if(tab === 'news') { newsUnreadCount = 0; updateNewsBadge(); loadNews(); }
|
if(tab === 'news') { newsUnreadCount = 0; updateNewsBadge(); loadNews(); }
|
||||||
|
if(tab === 'settings') loadSources();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function updateNewsBadge() {
|
function updateNewsBadge() {
|
||||||
@@ -773,6 +844,66 @@ async function checkNowBtn(btn, url) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Source detection ─────────────────────────
|
||||||
|
let _resolveTimer = null;
|
||||||
|
let _resolvedSourceId = null; // null = found via domain, undefined = unknown
|
||||||
|
|
||||||
|
async function onUrlInputChange() {
|
||||||
|
clearTimeout(_resolveTimer);
|
||||||
|
_resolveTimer = setTimeout(_resolveSource, 400);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function _resolveSource() {
|
||||||
|
const raw = document.getElementById('url-input').value.trim();
|
||||||
|
const hint = document.getElementById('source-hint');
|
||||||
|
const hintFound = document.getElementById('source-hint-found');
|
||||||
|
const hintUnknown = document.getElementById('source-hint-unknown');
|
||||||
|
|
||||||
|
// Берём первый непустой URL
|
||||||
|
const url = raw.split('\n').map(u=>u.trim()).filter(Boolean)[0];
|
||||||
|
if(!url) {
|
||||||
|
hint.classList.add('hidden');
|
||||||
|
_resolvedSourceId = null;
|
||||||
|
document.getElementById('add-btn').disabled = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/resolve-source?url=' + encodeURIComponent(url));
|
||||||
|
const data = await r.json();
|
||||||
|
hint.classList.remove('hidden');
|
||||||
|
|
||||||
|
if(data.source) {
|
||||||
|
hintFound.classList.remove('hidden');
|
||||||
|
hintUnknown.classList.add('hidden');
|
||||||
|
document.getElementById('source-hint-name').textContent = data.source.display_name;
|
||||||
|
_resolvedSourceId = data.source.id;
|
||||||
|
document.getElementById('add-btn').disabled = false;
|
||||||
|
} else {
|
||||||
|
hintFound.classList.add('hidden');
|
||||||
|
hintUnknown.classList.remove('hidden');
|
||||||
|
_resolvedSourceId = undefined; // неизвестен — нужен ручной выбор
|
||||||
|
document.getElementById('add-btn').disabled = true;
|
||||||
|
// Заполняем список источников
|
||||||
|
const sel = document.getElementById('source-manual-select');
|
||||||
|
sel.innerHTML = '<option value="">— выберите источник —</option>';
|
||||||
|
(state.sources || []).forEach(s => {
|
||||||
|
const opt = document.createElement('option');
|
||||||
|
opt.value = s.id;
|
||||||
|
opt.textContent = s.display_name;
|
||||||
|
sel.appendChild(opt);
|
||||||
|
});
|
||||||
|
sel.onchange = () => {
|
||||||
|
document.getElementById('add-btn').disabled = !sel.value;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} catch(e) {
|
||||||
|
hint.classList.add('hidden');
|
||||||
|
_resolvedSourceId = null;
|
||||||
|
document.getElementById('add-btn').disabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ── API ──────────────────────────────────────
|
// ── API ──────────────────────────────────────
|
||||||
async function loadStats() {
|
async function loadStats() {
|
||||||
try {
|
try {
|
||||||
@@ -788,17 +919,35 @@ async function addToQueue() {
|
|||||||
const urls = raw.split('\n').map(u=>u.trim()).filter(Boolean);
|
const urls = raw.split('\n').map(u=>u.trim()).filter(Boolean);
|
||||||
if(!urls.length) return;
|
if(!urls.length) return;
|
||||||
|
|
||||||
|
// Определяем source_id
|
||||||
|
let sourceId = null;
|
||||||
|
if(_resolvedSourceId === undefined) {
|
||||||
|
// Неизвестный домен — нужен ручной выбор
|
||||||
|
const manualVal = document.getElementById('source-manual-select').value;
|
||||||
|
if(!manualVal) { alert('Выберите источник для добавления манги'); return; }
|
||||||
|
sourceId = parseInt(manualVal);
|
||||||
|
} else if(_resolvedSourceId !== null) {
|
||||||
|
sourceId = _resolvedSourceId;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
const body = {urls, format: fmt};
|
||||||
|
if(sourceId !== null) body.source_id = sourceId;
|
||||||
const r = await fetch('/api/queue', {
|
const r = await fetch('/api/queue', {
|
||||||
method:'POST',
|
method:'POST',
|
||||||
headers:{'Content-Type':'application/json'},
|
headers:{'Content-Type':'application/json'},
|
||||||
body: JSON.stringify({urls, format: fmt})
|
body: JSON.stringify(body)
|
||||||
});
|
});
|
||||||
const data = await r.json();
|
const data = await r.json();
|
||||||
const msg = document.getElementById('add-msg');
|
const msg = document.getElementById('add-msg');
|
||||||
msg.textContent = `✓ Добавлено: ${data.added.length}, уже есть: ${data.skipped.length}`;
|
msg.textContent = `✓ Добавлено: ${data.added.length}, уже есть: ${data.skipped.length}`;
|
||||||
msg.classList.remove('hidden');
|
msg.classList.remove('hidden');
|
||||||
if(data.added.length) document.getElementById('url-input').value = '';
|
if(data.added.length) {
|
||||||
|
document.getElementById('url-input').value = '';
|
||||||
|
document.getElementById('source-hint').classList.add('hidden');
|
||||||
|
_resolvedSourceId = null;
|
||||||
|
document.getElementById('add-btn').disabled = false;
|
||||||
|
}
|
||||||
setTimeout(()=>msg.classList.add('hidden'), 4000);
|
setTimeout(()=>msg.classList.add('hidden'), 4000);
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
alert('Ошибка: ' + e.message);
|
alert('Ошибка: ' + e.message);
|
||||||
@@ -825,6 +974,193 @@ async function resumeManga(url) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Sources ───────────────────────────────────
|
||||||
|
async function loadSources() {
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/sources');
|
||||||
|
if(r.ok) {
|
||||||
|
state.sources = await r.json();
|
||||||
|
if(!document.getElementById('tab-content-settings').classList.contains('hidden')) {
|
||||||
|
renderSources();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch(e) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderSources() {
|
||||||
|
const container = document.getElementById('sources-list');
|
||||||
|
if(!container) return;
|
||||||
|
if(!state.sources.length) {
|
||||||
|
container.innerHTML = '<div class="text-sm text-gray-500">Нет доступных источников</div>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
container.innerHTML = state.sources.map(s => `
|
||||||
|
<div class="rounded-lg p-4" style="background:#0f172a;border:1px solid #1e293b">
|
||||||
|
<div class="flex items-center justify-between mb-3">
|
||||||
|
<div>
|
||||||
|
<span class="text-sm font-semibold text-white">${escHtml(s.display_name)}</span>
|
||||||
|
<span class="ml-2 text-xs text-gray-500">slug: ${escHtml(s.slug)}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="flex flex-wrap gap-2 items-center">
|
||||||
|
${s.domains.map(d => `
|
||||||
|
<span class="flex items-center gap-1 text-xs px-2 py-1 rounded" style="background:#1e293b;color:#94a3b8">
|
||||||
|
${escHtml(d)}
|
||||||
|
<button onclick="removeDomain(${s.id}, '${escHtml(d)}')"
|
||||||
|
title="Удалить домен"
|
||||||
|
style="color:#ef4444;background:none;border:none;cursor:pointer;padding:0 2px;font-size:0.8rem;line-height:1">✕</button>
|
||||||
|
</span>
|
||||||
|
`).join('')}
|
||||||
|
<span id="add-domain-area-${s.id}">
|
||||||
|
<button onclick="showAddDomain(${s.id})"
|
||||||
|
style="font-size:0.7rem;padding:3px 8px;border-radius:4px;background:#1e293b;color:#6ee7b7;border:1px dashed #334155;cursor:pointer">
|
||||||
|
+ домен
|
||||||
|
</button>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
function showAddDomain(sourceId) {
|
||||||
|
const area = document.getElementById('add-domain-area-' + sourceId);
|
||||||
|
if(!area) return;
|
||||||
|
area.innerHTML = `
|
||||||
|
<span class="flex items-center gap-1">
|
||||||
|
<input id="new-domain-input-${sourceId}" type="text" placeholder="example.com"
|
||||||
|
class="text-xs px-2 py-1 rounded" style="background:#1e293b;color:#e2e8f0;border:1px solid #334155;width:140px"
|
||||||
|
onkeydown="if(event.key==='Enter') addDomain(${sourceId}); if(event.key==='Escape') renderSources();">
|
||||||
|
<button onclick="addDomain(${sourceId})"
|
||||||
|
style="font-size:0.75rem;padding:3px 8px;border-radius:4px;background:#166534;color:#86efac;cursor:pointer">✓</button>
|
||||||
|
<button onclick="renderSources()"
|
||||||
|
style="font-size:0.75rem;padding:3px 8px;border-radius:4px;background:#1e293b;color:#94a3b8;cursor:pointer">✕</button>
|
||||||
|
</span>
|
||||||
|
`;
|
||||||
|
setTimeout(() => document.getElementById('new-domain-input-' + sourceId)?.focus(), 50);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function addDomain(sourceId) {
|
||||||
|
const input = document.getElementById('new-domain-input-' + sourceId);
|
||||||
|
if(!input) return;
|
||||||
|
const domain = input.value.trim().toLowerCase();
|
||||||
|
if(!domain) return;
|
||||||
|
try {
|
||||||
|
const r = await fetch(`/api/sources/${sourceId}/domains`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({domain}),
|
||||||
|
});
|
||||||
|
if(!r.ok) {
|
||||||
|
const err = await r.json();
|
||||||
|
_showNotification('Ошибка: ' + (err.detail || 'неизвестная ошибка'), 'error');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
await loadSources();
|
||||||
|
} catch(e) {
|
||||||
|
_showNotification('Ошибка: ' + e.message, 'error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function removeDomain(sourceId, domain) {
|
||||||
|
if(!confirm(`Удалить домен «${domain}»?`)) return;
|
||||||
|
try {
|
||||||
|
const r = await fetch(`/api/sources/${sourceId}/domains/${encodeURIComponent(domain)}`, {method: 'DELETE'});
|
||||||
|
if(!r.ok) {
|
||||||
|
const err = await r.json();
|
||||||
|
_showNotification('Ошибка: ' + (err.detail || 'неизвестная ошибка'), 'error');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
await loadSources();
|
||||||
|
} catch(e) {
|
||||||
|
_showNotification('Ошибка: ' + e.message, 'error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Switch Source Modal ───────────────────────
|
||||||
|
let _switchSourceUrl = null;
|
||||||
|
|
||||||
|
function openSwitchSourceModal(url) {
|
||||||
|
_switchSourceUrl = url;
|
||||||
|
const manga = state.mangas[url];
|
||||||
|
const modal = document.getElementById('switch-source-modal');
|
||||||
|
const sel = document.getElementById('switch-source-select');
|
||||||
|
const warning = document.getElementById('switch-source-warning');
|
||||||
|
|
||||||
|
document.getElementById('switch-source-current').textContent =
|
||||||
|
'Текущий источник: ' + (manga?.source?.display_name || 'не определён');
|
||||||
|
|
||||||
|
sel.innerHTML = '<option value="">— выберите источник —</option>';
|
||||||
|
state.sources.forEach(s => {
|
||||||
|
const opt = document.createElement('option');
|
||||||
|
opt.value = s.id;
|
||||||
|
opt.textContent = s.display_name;
|
||||||
|
if(manga?.source?.id === s.id) opt.selected = true;
|
||||||
|
sel.appendChild(opt);
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const domain = new URL(url).hostname.replace(/^www\./, '');
|
||||||
|
warning.textContent = `⚠ Домен «${domain}» будет перепривязан к выбранному источнику. Это затронет все манги с этого домена.`;
|
||||||
|
warning.classList.remove('hidden');
|
||||||
|
} catch(e) { warning.classList.add('hidden'); }
|
||||||
|
|
||||||
|
modal.classList.remove('hidden');
|
||||||
|
modal.classList.add('flex');
|
||||||
|
}
|
||||||
|
|
||||||
|
function closeSwitchSourceModal() {
|
||||||
|
_switchSourceUrl = null;
|
||||||
|
const modal = document.getElementById('switch-source-modal');
|
||||||
|
modal.classList.add('hidden');
|
||||||
|
modal.classList.remove('flex');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function confirmSwitchSource() {
|
||||||
|
const url = _switchSourceUrl;
|
||||||
|
const sourceId = parseInt(document.getElementById('switch-source-select').value);
|
||||||
|
if(!url || !sourceId) return;
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/mangas/switch-source', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({url, source_id: sourceId}),
|
||||||
|
});
|
||||||
|
if(!r.ok) {
|
||||||
|
const err = await r.json();
|
||||||
|
_showNotification('Ошибка: ' + (err.detail || 'неизвестная ошибка'), 'error');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const data = await r.json();
|
||||||
|
closeSwitchSourceModal();
|
||||||
|
_showNotification(
|
||||||
|
`✓ Источник изменён на «${data.source_name}»` +
|
||||||
|
(data.chapters_reset ? `. Сброшено глав: ${data.chapters_reset}` : ''), 'ok'
|
||||||
|
);
|
||||||
|
if(state.mangas[url]) {
|
||||||
|
const src = state.sources.find(s => s.id === sourceId);
|
||||||
|
if(src) state.mangas[url].source = {id: src.id, slug: src.slug, display_name: src.display_name};
|
||||||
|
updateMangaRow(url);
|
||||||
|
}
|
||||||
|
} catch(e) {
|
||||||
|
_showNotification('Ошибка: ' + e.message, 'error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener('click', function(e) {
|
||||||
|
const modal = document.getElementById('switch-source-modal');
|
||||||
|
if(modal && !modal.classList.contains('hidden') && e.target === modal) closeSwitchSourceModal();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Notification helper ───────────────────────
|
||||||
|
function _showNotification(text, type='ok') {
|
||||||
|
const el = document.getElementById('add-msg');
|
||||||
|
if(!el) return;
|
||||||
|
el.textContent = text;
|
||||||
|
el.style.color = type === 'error' ? '#f87171' : type === 'warn' ? '#fbbf24' : '#4ade80';
|
||||||
|
el.classList.remove('hidden');
|
||||||
|
setTimeout(() => el.classList.add('hidden'), 5000);
|
||||||
|
}
|
||||||
|
|
||||||
// ── Delete modal ─────────────────────────────
|
// ── Delete modal ─────────────────────────────
|
||||||
let _deleteUrl = null;
|
let _deleteUrl = null;
|
||||||
let _deleteFilesChecked = false;
|
let _deleteFilesChecked = false;
|
||||||
@@ -1011,6 +1347,12 @@ function pubStatusPill(s) {
|
|||||||
return `<span class="pill pill-pub-${s}">${map[s]}</span>`;
|
return `<span class="pill pill-pub-${s}">${map[s]}</span>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function _sourceBadge(source) {
|
||||||
|
if(!source) return '<span style="font-size:0.65rem;padding:2px 6px;border-radius:4px;background:#1e293b;color:#64748b">Источник неизвестен</span>';
|
||||||
|
if(source.slug === 'unknown') return '<span style="font-size:0.65rem;padding:2px 6px;border-radius:4px;background:#450a0a;color:#fca5a5">' + escHtml(source.display_name) + '</span>';
|
||||||
|
return '<span style="font-size:0.65rem;padding:2px 6px;border-radius:4px;background:#0f2a1e;color:#6ee7b7">' + escHtml(source.display_name) + '</span>';
|
||||||
|
}
|
||||||
|
|
||||||
// ── Время загрузки ────────────────────────────
|
// ── Время загрузки ────────────────────────────
|
||||||
// Храним интервал живого таймера: url → intervalId
|
// Храним интервал живого таймера: url → intervalId
|
||||||
const _timerIntervals = {};
|
const _timerIntervals = {};
|
||||||
@@ -1126,6 +1468,7 @@ function renderMangaRow(m) {
|
|||||||
<div class="flex items-center gap-2 flex-wrap">
|
<div class="flex items-center gap-2 flex-wrap">
|
||||||
<span data-r="status">${statusPill(m.status)}</span>
|
<span data-r="status">${statusPill(m.status)}</span>
|
||||||
<span data-r="pubstatus">${pubStatusPill(m.pub_status || 'unknown')}</span>
|
<span data-r="pubstatus">${pubStatusPill(m.pub_status || 'unknown')}</span>
|
||||||
|
<span data-r="source">${_sourceBadge(m.source)}</span>
|
||||||
<span class="text-sm font-medium text-white truncate" data-r="title">${escHtml(m.title || m.url)}</span>
|
<span class="text-sm font-medium text-white truncate" data-r="title">${escHtml(m.title || m.url)}</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="text-xs text-gray-500 mt-0.5 flex gap-3 flex-wrap">
|
<div class="text-xs text-gray-500 mt-0.5 flex gap-3 flex-wrap">
|
||||||
@@ -1170,6 +1513,11 @@ function _rowButtons(m) {
|
|||||||
title="${m.errors_count} проблем при загрузке"
|
title="${m.errors_count} проблем при загрузке"
|
||||||
style="background:#450a0a;color:#fca5a5;padding:4px 8px;border-radius:6px;font-size:0.75rem;cursor:pointer">⚠️ ${m.errors_count}</button>`
|
style="background:#450a0a;color:#fca5a5;padding:4px 8px;border-radius:6px;font-size:0.75rem;cursor:pointer">⚠️ ${m.errors_count}</button>`
|
||||||
: ''}
|
: ''}
|
||||||
|
${!isActive
|
||||||
|
? `<button onclick="event.stopPropagation(); openSwitchSourceModal('${u}')"
|
||||||
|
title="Сменить источник"
|
||||||
|
style="background:#1e3a2e;color:#6ee7b7;padding:4px 8px;border-radius:6px;font-size:0.75rem;cursor:pointer">↔ Источник</button>`
|
||||||
|
: ''}
|
||||||
${isActive
|
${isActive
|
||||||
? `<button onclick="stopManga('${u}')" class="btn-danger" title="Остановить" style="background:#7c2d12;color:#fdba74">⏸</button>`
|
? `<button onclick="stopManga('${u}')" class="btn-danger" title="Остановить" style="background:#7c2d12;color:#fdba74">⏸</button>`
|
||||||
: ''}
|
: ''}
|
||||||
@@ -1282,6 +1630,7 @@ function _patchRow(el, m) {
|
|||||||
|
|
||||||
set('status', statusPill(m.status));
|
set('status', statusPill(m.status));
|
||||||
set('pubstatus', pubStatusPill(m.pub_status || 'unknown'));
|
set('pubstatus', pubStatusPill(m.pub_status || 'unknown'));
|
||||||
|
set('source', _sourceBadge(m.source));
|
||||||
set('title', escHtml(m.title || m.url));
|
set('title', escHtml(m.title || m.url));
|
||||||
set('chcount', `📖 ${chDone}/${chTotal} глав`);
|
set('chcount', `📖 ${chDone}/${chTotal} глав`);
|
||||||
set('size', `💾 ${m.size_human || '—'}`);
|
set('size', `💾 ${m.size_human || '—'}`);
|
||||||
@@ -1661,6 +2010,7 @@ async function saveRenameFolder() {
|
|||||||
async function initApp() {
|
async function initApp() {
|
||||||
_initDeleteModal();
|
_initDeleteModal();
|
||||||
await loadStats();
|
await loadStats();
|
||||||
|
await loadSources();
|
||||||
connectWS();
|
connectWS();
|
||||||
// Загружаем список манги
|
// Загружаем список манги
|
||||||
try {
|
try {
|
||||||
|
|||||||
217
src/api.py
217
src/api.py
@@ -20,6 +20,7 @@ from loguru import logger
|
|||||||
from .state import StateDB
|
from .state import StateDB
|
||||||
from .worker import download_manga, check_for_updates
|
from .worker import download_manga, check_for_updates
|
||||||
from .exporter import patch_meta, MangaMeta
|
from .exporter import patch_meta, MangaMeta
|
||||||
|
from .sources import registry, get_source_for_url, extract_domain
|
||||||
|
|
||||||
OUTPUT_DIR = Path("/app/output")
|
OUTPUT_DIR = Path("/app/output")
|
||||||
FRONTEND_DIR = Path("/app/frontend")
|
FRONTEND_DIR = Path("/app/frontend")
|
||||||
@@ -172,6 +173,16 @@ async def _queue_worker_loop():
|
|||||||
|
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
async def startup_event():
|
async def startup_event():
|
||||||
|
# Синхронизируем источники с кодом и мигрируем существующие манги
|
||||||
|
_db = StateDB()
|
||||||
|
try:
|
||||||
|
_db.sync_sources(registry)
|
||||||
|
migrated = _db.migrate_manga_sources()
|
||||||
|
if migrated:
|
||||||
|
logger.info("Авто-миграция: проставлен source_id для {} манг", migrated)
|
||||||
|
finally:
|
||||||
|
_db.close()
|
||||||
|
|
||||||
asyncio.create_task(queue_worker())
|
asyncio.create_task(queue_worker())
|
||||||
asyncio.create_task(update_scheduler())
|
asyncio.create_task(update_scheduler())
|
||||||
# Восстанавливаем очередь из БД (незавершённые задачи)
|
# Восстанавливаем очередь из БД (незавершённые задачи)
|
||||||
@@ -365,6 +376,16 @@ def _enrich_manga(m: dict, db: StateDB) -> dict:
|
|||||||
AND pages_total > 0 AND pages_done < pages_total""",
|
AND pages_total > 0 AND pages_done < pages_total""",
|
||||||
(m["url"],)
|
(m["url"],)
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
|
|
||||||
|
# Источник
|
||||||
|
source_info = None
|
||||||
|
if m.get("source_id"):
|
||||||
|
src = db.get_source_by_id(m["source_id"])
|
||||||
|
if src:
|
||||||
|
source_info = {"id": src["id"], "slug": src["slug"], "display_name": src["display_name"]}
|
||||||
|
else:
|
||||||
|
source_info = {"id": m["source_id"], "slug": "unknown", "display_name": "Источник недоступен"}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
**m,
|
**m,
|
||||||
"chapters_done": ch_done_count,
|
"chapters_done": ch_done_count,
|
||||||
@@ -375,6 +396,7 @@ def _enrich_manga(m: dict, db: StateDB) -> dict:
|
|||||||
"errors_count": ch_failed + ch_partial,
|
"errors_count": ch_failed + ch_partial,
|
||||||
"started_at": m.get("started_at"),
|
"started_at": m.get("started_at"),
|
||||||
"finished_at": m.get("finished_at"),
|
"finished_at": m.get("finished_at"),
|
||||||
|
"source": source_info,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -454,6 +476,7 @@ def _manga_detail(manga: dict, db: StateDB) -> dict:
|
|||||||
class AddMangaRequest(BaseModel):
|
class AddMangaRequest(BaseModel):
|
||||||
urls: List[str]
|
urls: List[str]
|
||||||
format: str = "cbz"
|
format: str = "cbz"
|
||||||
|
source_id: Optional[int] = None # явный выбор источника (для неизвестных доменов)
|
||||||
|
|
||||||
|
|
||||||
# ── Auth API ─────────────────────────────────
|
# ── Auth API ─────────────────────────────────
|
||||||
@@ -536,7 +559,24 @@ async def add_to_queue(body: AddMangaRequest):
|
|||||||
url = url.strip()
|
url = url.strip()
|
||||||
if not url:
|
if not url:
|
||||||
continue
|
continue
|
||||||
is_new = db.add_manga(url, body.format)
|
|
||||||
|
# Определяем source_id: явный из запроса или авто по домену
|
||||||
|
source_id = body.source_id
|
||||||
|
if source_id is None:
|
||||||
|
domain = extract_domain(url)
|
||||||
|
source_row = db.get_source_by_domain(domain)
|
||||||
|
if source_row:
|
||||||
|
source_id = source_row["id"]
|
||||||
|
|
||||||
|
# Если источник указан явно — привязываем домен к нему
|
||||||
|
if body.source_id is not None:
|
||||||
|
domain = extract_domain(url)
|
||||||
|
existing = db.get_source_by_domain(domain)
|
||||||
|
if existing and existing["id"] != body.source_id:
|
||||||
|
db.remove_domain(existing["id"], domain)
|
||||||
|
db.add_domain(body.source_id, domain)
|
||||||
|
|
||||||
|
is_new = db.add_manga(url, body.format, source_id=source_id)
|
||||||
if is_new:
|
if is_new:
|
||||||
await download_queue.put({"url": url, "fmt": body.format})
|
await download_queue.put({"url": url, "fmt": body.format})
|
||||||
added.append(url)
|
added.append(url)
|
||||||
@@ -544,9 +584,9 @@ async def add_to_queue(body: AddMangaRequest):
|
|||||||
"type": "manga_queued",
|
"type": "manga_queued",
|
||||||
"url": url,
|
"url": url,
|
||||||
"format": body.format,
|
"format": body.format,
|
||||||
|
"source_id": source_id,
|
||||||
})
|
})
|
||||||
await _broadcast_queue_positions()
|
await _broadcast_queue_positions()
|
||||||
# Запускаем фоновую задачу предпросмотра (без Chromium — быстро)
|
|
||||||
asyncio.create_task(_fetch_preview(url))
|
asyncio.create_task(_fetch_preview(url))
|
||||||
else:
|
else:
|
||||||
skipped.append(url)
|
skipped.append(url)
|
||||||
@@ -559,15 +599,27 @@ async def _fetch_preview(url: str):
|
|||||||
"""Быстро получает название и количество глав сразу после добавления."""
|
"""Быстро получает название и количество глав сразу после добавления."""
|
||||||
try:
|
try:
|
||||||
from .browser import BrowserManager
|
from .browser import BrowserManager
|
||||||
from .scraper import get_manga_info
|
|
||||||
async with BrowserManager(headless=True) as bm:
|
|
||||||
_, page = await bm.new_page()
|
|
||||||
manga = await get_manga_info(page, url)
|
|
||||||
if not manga:
|
|
||||||
return
|
|
||||||
db = StateDB()
|
db = StateDB()
|
||||||
try:
|
try:
|
||||||
db.update_manga_info(
|
source = get_source_for_url(url, db)
|
||||||
|
if source is None:
|
||||||
|
manga_row = db.get_manga(url)
|
||||||
|
if manga_row and manga_row.get("source_id"):
|
||||||
|
source = registry.get_by_db_id(manga_row["source_id"], db)
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
if source is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
async with BrowserManager(headless=True) as bm:
|
||||||
|
_, page = await bm.new_page()
|
||||||
|
manga = await source.get_manga_info(page, url)
|
||||||
|
if not manga:
|
||||||
|
return
|
||||||
|
db2 = StateDB()
|
||||||
|
try:
|
||||||
|
db2.update_manga_info(
|
||||||
url,
|
url,
|
||||||
title=manga.title_ru or manga.title,
|
title=manga.title_ru or manga.title,
|
||||||
chapters_total=len(manga.chapters),
|
chapters_total=len(manga.chapters),
|
||||||
@@ -576,7 +628,7 @@ async def _fetch_preview(url: str):
|
|||||||
pub_status=manga.pub_status,
|
pub_status=manga.pub_status,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
db.close()
|
db2.close()
|
||||||
await ws_manager.broadcast({
|
await ws_manager.broadcast({
|
||||||
"type": "manga_preview",
|
"type": "manga_preview",
|
||||||
"url": url,
|
"url": url,
|
||||||
@@ -996,6 +1048,151 @@ async def delete_manga(url: str, delete_files: bool = False):
|
|||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ── Sources API ───────────────────────────────
|
||||||
|
|
||||||
|
class DomainAdd(BaseModel):
|
||||||
|
domain: str
|
||||||
|
|
||||||
|
|
||||||
|
class SwitchSourceRequest(BaseModel):
|
||||||
|
url: str
|
||||||
|
source_id: int
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/sources")
|
||||||
|
async def list_sources():
|
||||||
|
"""Список всех источников с доменами."""
|
||||||
|
db = StateDB()
|
||||||
|
try:
|
||||||
|
return db.get_all_sources()
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/resolve-source")
|
||||||
|
async def resolve_source(url: str):
|
||||||
|
"""Определить источник по URL. Возвращает {id, slug, display_name} или null."""
|
||||||
|
db = StateDB()
|
||||||
|
try:
|
||||||
|
domain = extract_domain(url)
|
||||||
|
row = db.get_source_by_domain(domain)
|
||||||
|
if not row:
|
||||||
|
return {"source": None, "domain": domain}
|
||||||
|
return {
|
||||||
|
"source": {
|
||||||
|
"id": row["id"],
|
||||||
|
"slug": row["slug"],
|
||||||
|
"display_name": row["display_name"],
|
||||||
|
},
|
||||||
|
"domain": domain,
|
||||||
|
}
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/sources/{source_id}/domains")
|
||||||
|
async def add_domain(source_id: int, body: DomainAdd):
|
||||||
|
"""Добавить домен к источнику."""
|
||||||
|
db = StateDB()
|
||||||
|
try:
|
||||||
|
source = db.get_source_by_id(source_id)
|
||||||
|
if not source:
|
||||||
|
raise HTTPException(status_code=404, detail="Источник не найден")
|
||||||
|
domain = body.domain.lower().strip()
|
||||||
|
if not domain:
|
||||||
|
raise HTTPException(status_code=400, detail="Домен не может быть пустым")
|
||||||
|
# Проверяем не занят ли домен другим источником
|
||||||
|
existing = db.get_source_by_domain(domain)
|
||||||
|
if existing and existing["id"] != source_id:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=409,
|
||||||
|
detail=f"Домен уже привязан к источнику «{existing['display_name']}»"
|
||||||
|
)
|
||||||
|
ok = db.add_domain(source_id, domain)
|
||||||
|
if not ok:
|
||||||
|
raise HTTPException(status_code=409, detail="Домен уже существует")
|
||||||
|
await ws_manager.broadcast({
|
||||||
|
"type": "source_domain_added",
|
||||||
|
"source_id": source_id,
|
||||||
|
"domain": domain,
|
||||||
|
})
|
||||||
|
return {"ok": True, "domain": domain}
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/api/sources/{source_id}/domains/{domain:path}")
|
||||||
|
async def remove_domain(source_id: int, domain: str):
|
||||||
|
"""Удалить домен у источника."""
|
||||||
|
db = StateDB()
|
||||||
|
try:
|
||||||
|
source = db.get_source_by_id(source_id)
|
||||||
|
if not source:
|
||||||
|
raise HTTPException(status_code=404, detail="Источник не найден")
|
||||||
|
ok = db.remove_domain(source_id, domain)
|
||||||
|
if not ok:
|
||||||
|
raise HTTPException(status_code=404, detail="Домен не найден")
|
||||||
|
await ws_manager.broadcast({
|
||||||
|
"type": "source_domain_removed",
|
||||||
|
"source_id": source_id,
|
||||||
|
"domain": domain,
|
||||||
|
})
|
||||||
|
return {"ok": True}
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/mangas/switch-source")
|
||||||
|
async def switch_manga_source(body: SwitchSourceRequest):
|
||||||
|
"""Сменить источник у манги + перепривязать домен."""
|
||||||
|
db = StateDB()
|
||||||
|
try:
|
||||||
|
manga = db.get_manga(body.url)
|
||||||
|
if not manga:
|
||||||
|
raise HTTPException(status_code=404, detail="Манга не найдена")
|
||||||
|
if manga["status"] == "downloading" and body.url in active_tasks:
|
||||||
|
raise HTTPException(status_code=400, detail="Нельзя сменить источник во время загрузки")
|
||||||
|
|
||||||
|
new_source = db.get_source_by_id(body.source_id)
|
||||||
|
if not new_source:
|
||||||
|
raise HTTPException(status_code=404, detail="Источник не найден")
|
||||||
|
|
||||||
|
old_source_id = manga.get("source_id")
|
||||||
|
domain = extract_domain(body.url)
|
||||||
|
|
||||||
|
# Перепривязываем домен
|
||||||
|
if domain:
|
||||||
|
existing_domain = db.get_source_by_domain(domain)
|
||||||
|
if existing_domain and existing_domain["id"] != body.source_id:
|
||||||
|
db.remove_domain(existing_domain["id"], domain)
|
||||||
|
db.add_domain(body.source_id, domain)
|
||||||
|
|
||||||
|
# Меняем источник у манги
|
||||||
|
db.set_manga_source(body.url, body.source_id)
|
||||||
|
|
||||||
|
# Сбрасываем failed/partial главы → pending
|
||||||
|
reset_count = db.reset_failed_chapters(body.url)
|
||||||
|
|
||||||
|
await ws_manager.broadcast({
|
||||||
|
"type": "source_switched",
|
||||||
|
"url": body.url,
|
||||||
|
"old_source_id": old_source_id,
|
||||||
|
"new_source_id": body.source_id,
|
||||||
|
"new_source_name": new_source["display_name"],
|
||||||
|
"domain_rebound": bool(domain),
|
||||||
|
"chapters_reset": reset_count,
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"source_id": body.source_id,
|
||||||
|
"source_name": new_source["display_name"],
|
||||||
|
"chapters_reset": reset_count,
|
||||||
|
}
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/stats")
|
@app.get("/api/stats")
|
||||||
async def global_stats():
|
async def global_stats():
|
||||||
db = StateDB()
|
db = StateDB()
|
||||||
|
|||||||
668
src/scraper.py
668
src/scraper.py
@@ -1,665 +1,19 @@
|
|||||||
"""
|
"""
|
||||||
Парсер readmanga.ru: список глав и URL/байты изображений внутри главы.
|
Обратно-совместимый shim: делегирует вызовы ReadmangaSource.
|
||||||
|
Не используйте напрямую в новом коде — используйте src.sources.registry.
|
||||||
"""
|
"""
|
||||||
import asyncio
|
from .sources.base import Chapter, MangaInfo # noqa: F401 — реэкспорт для импортёров
|
||||||
import re
|
from .sources.readmanga import ReadmangaSource
|
||||||
import time
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from loguru import logger
|
_instance = ReadmangaSource()
|
||||||
from playwright.async_api import Page
|
|
||||||
|
|
||||||
from .browser import BrowserManager
|
|
||||||
|
|
||||||
|
|
||||||
# ──────────────────────────────────────────────
|
async def get_manga_info(page, url):
|
||||||
# Модели данных
|
return await _instance.get_manga_info(page, url)
|
||||||
# ──────────────────────────────────────────────
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Chapter:
|
|
||||||
title: str
|
|
||||||
url: str
|
|
||||||
number: float = 0.0
|
|
||||||
volume: int = 0
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
async def get_chapter_images_and_download(page, chapter_url, dest_dir,
|
||||||
class MangaInfo:
|
manga_url=None, on_page=None):
|
||||||
title: str
|
return await _instance.get_chapter_images_and_download(
|
||||||
url: str
|
page, chapter_url, dest_dir, manga_url=manga_url, on_page=on_page
|
||||||
chapters: list[Chapter] = field(default_factory=list)
|
|
||||||
pub_status: str = "unknown" # completed / ongoing / unknown
|
|
||||||
title_ru: str = "" # Только русский тайтл (для папки)
|
|
||||||
title_full: str = "" # Полный тайтл как на странице
|
|
||||||
description: str = "" # Описание/синопсис
|
|
||||||
genres: list[str] = field(default_factory=list) # Жанры
|
|
||||||
|
|
||||||
|
|
||||||
# ──────────────────────────────────────────────
|
|
||||||
# Страница манги — список глав
|
|
||||||
# ──────────────────────────────────────────────
|
|
||||||
|
|
||||||
async def get_manga_info(page: Page, url: str) -> Optional[MangaInfo]:
|
|
||||||
"""Открывает страницу манги и возвращает список всех глав."""
|
|
||||||
logger.info("Загружаем страницу манги: {}", url)
|
|
||||||
ok = await _navigate(page, url)
|
|
||||||
if not ok:
|
|
||||||
return None
|
|
||||||
|
|
||||||
title_full = await page.title()
|
|
||||||
title_full = re.sub(r"\s*[-–|].*$", "", title_full).strip()
|
|
||||||
|
|
||||||
# Пробуем взять русский тайтл напрямую из DOM
|
|
||||||
title_ru = await _extract_ru_title_from_dom(page)
|
|
||||||
if not title_ru:
|
|
||||||
title_ru = _parse_ru_title(title_full)
|
|
||||||
|
|
||||||
logger.info("Манга: {} | ru: {}", title_full, title_ru)
|
|
||||||
|
|
||||||
pub_status = await _extract_pub_status(page)
|
|
||||||
logger.info("Статус выпуска: {}", pub_status)
|
|
||||||
|
|
||||||
description = await _extract_description(page)
|
|
||||||
genres = await _extract_genres(page)
|
|
||||||
|
|
||||||
await _expand_chapters(page)
|
|
||||||
chapters = await _extract_chapters(page)
|
|
||||||
if not chapters:
|
|
||||||
chapters = await _extract_chapters_alt(page)
|
|
||||||
|
|
||||||
logger.info("Найдено глав: {}", len(chapters))
|
|
||||||
return MangaInfo(
|
|
||||||
title=title_ru or title_full,
|
|
||||||
url=url,
|
|
||||||
chapters=chapters,
|
|
||||||
pub_status=pub_status,
|
|
||||||
title_ru=title_ru,
|
|
||||||
title_full=title_full,
|
|
||||||
description=description,
|
|
||||||
genres=genres,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _extract_ru_title_from_dom(page: Page) -> str:
|
|
||||||
"""Ищет русский тайтл в структуре страницы readmanga."""
|
|
||||||
try:
|
|
||||||
result = await page.evaluate("""
|
|
||||||
() => {
|
|
||||||
// readmanga: основной тайтл в span.name внутри .names
|
|
||||||
const selectors = [
|
|
||||||
'.names .name',
|
|
||||||
'h1.manga-title',
|
|
||||||
'h1 .name',
|
|
||||||
'.name-block .name',
|
|
||||||
];
|
|
||||||
for (const sel of selectors) {
|
|
||||||
const el = document.querySelector(sel);
|
|
||||||
if (el && el.textContent.trim()) return el.textContent.trim();
|
|
||||||
}
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
return (result or "").strip()
|
|
||||||
except Exception:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_ru_title(full_title: str) -> str:
|
|
||||||
"""Извлекает русский тайтл из полной строки тайтла.
|
|
||||||
|
|
||||||
Примеры:
|
|
||||||
'Манга Режим — АД. Хардкорный геймер ... (Hellmode)' → 'Режим — АД. Хардкорный геймер ...'
|
|
||||||
'Манга Магическая битва (Sorcery Fight) Гэгэ онлайн' → 'Магическая битва'
|
|
||||||
'Авантюрист Monster Eater Adventurer' → 'Авантюрист'
|
|
||||||
"""
|
|
||||||
t = full_title.strip()
|
|
||||||
# Убираем префикс "Манга "
|
|
||||||
t = re.sub(r'^Манга\s+', '', t).strip()
|
|
||||||
# Берём только до первой скобки (начало английского тайтла)
|
|
||||||
t = re.split(r'\s*[\(\[]', t)[0].strip()
|
|
||||||
# Убираем суффикс " онлайн"
|
|
||||||
t = re.sub(r'\s+онлайн\s*$', '', t, flags=re.IGNORECASE).strip()
|
|
||||||
|
|
||||||
# Обрезаем хвост из латинских слов.
|
|
||||||
# Правило: стоп только на токене содержащем латиницу (a-zA-Z).
|
|
||||||
# Пунктуация между кириллическими словами (—, –, ., :, !) — сохраняем.
|
|
||||||
words = t.split()
|
|
||||||
result = []
|
|
||||||
for w in words:
|
|
||||||
if re.search(r'[а-яёА-ЯЁ]', w):
|
|
||||||
result.append(w)
|
|
||||||
elif re.search(r'[a-zA-Z]', w):
|
|
||||||
# Первое латинское слово после кириллических — обрезаем здесь
|
|
||||||
if result:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# Чисто пунктуационный токен (—, –, ., :, …)
|
|
||||||
# Добавляем только если уже есть кириллические слова (связка внутри)
|
|
||||||
if result:
|
|
||||||
result.append(w)
|
|
||||||
|
|
||||||
# Убираем висячую пунктуацию в конце (если последнее слово — не кириллица)
|
|
||||||
while result and not re.search(r'[а-яёА-ЯЁ]', result[-1]):
|
|
||||||
result.pop()
|
|
||||||
|
|
||||||
if result:
|
|
||||||
t = ' '.join(result)
|
|
||||||
return t
|
|
||||||
|
|
||||||
|
|
||||||
async def _extract_pub_status(page: Page) -> str:
|
|
||||||
"""Извлекает статус выпуска: completed / ongoing / unknown."""
|
|
||||||
try:
|
|
||||||
result = await page.evaluate("""
|
|
||||||
() => {
|
|
||||||
// readmanga хранит статус в .elem_status .value или похожих блоках
|
|
||||||
const statusSelectors = [
|
|
||||||
'.elem_status .value',
|
|
||||||
'.manga-info .status',
|
|
||||||
'[class*="status"] .value',
|
|
||||||
'.property .status',
|
|
||||||
];
|
|
||||||
for (const sel of statusSelectors) {
|
|
||||||
const el = document.querySelector(sel);
|
|
||||||
if (el) {
|
|
||||||
const t = el.textContent.toLowerCase();
|
|
||||||
if (t.includes('завершён') || t.includes('завершен') || t.includes('complete')) return 'completed';
|
|
||||||
if (t.includes('продолжает') || t.includes('ongoing')) return 'ongoing';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Fallback: сканируем весь текст страницы
|
|
||||||
const bodyText = document.body ? document.body.innerText.toLowerCase() : '';
|
|
||||||
if (bodyText.includes('выпуск завершён') || bodyText.includes('выпуск завершен')) return 'completed';
|
|
||||||
if (bodyText.includes('продолжается')) return 'ongoing';
|
|
||||||
return 'unknown';
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
return result or "unknown"
|
|
||||||
except Exception:
|
|
||||||
return "unknown"
|
|
||||||
|
|
||||||
|
|
||||||
async def _extract_description(page: Page) -> str:
|
|
||||||
"""Извлекает описание/синопсис манги."""
|
|
||||||
try:
|
|
||||||
result = await page.evaluate("""
|
|
||||||
() => {
|
|
||||||
const selectors = [
|
|
||||||
'.manga-description',
|
|
||||||
'.elem_descr .value',
|
|
||||||
'#tab-description .description-text',
|
|
||||||
'.description',
|
|
||||||
'[itemprop="description"]',
|
|
||||||
];
|
|
||||||
for (const sel of selectors) {
|
|
||||||
const el = document.querySelector(sel);
|
|
||||||
if (el && el.textContent.trim()) return el.textContent.trim();
|
|
||||||
}
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
return (result or "").strip()[:2000] # обрезаем до 2000 символов
|
|
||||||
except Exception:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
async def _extract_genres(page: Page) -> list[str]:
|
|
||||||
"""Извлекает список жанров манги."""
|
|
||||||
try:
|
|
||||||
result = await page.evaluate("""
|
|
||||||
() => {
|
|
||||||
const selectors = [
|
|
||||||
'.elem_genre .value a',
|
|
||||||
'.genres a',
|
|
||||||
'[itemprop="genre"]',
|
|
||||||
'.genre-list a',
|
|
||||||
];
|
|
||||||
for (const sel of selectors) {
|
|
||||||
const els = document.querySelectorAll(sel);
|
|
||||||
if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
|
|
||||||
}
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
return result or []
|
|
||||||
except Exception:
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
async def _navigate(page: Page, url: str, retries: int = 3,
|
|
||||||
referer: str | None = None) -> bool:
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
if referer is None:
|
|
||||||
p = urlparse(url)
|
|
||||||
referer = f"{p.scheme}://{p.netloc}/"
|
|
||||||
for attempt in range(1, retries + 1):
|
|
||||||
try:
|
|
||||||
resp = await page.goto(url, wait_until="domcontentloaded",
|
|
||||||
timeout=60_000, referer=referer)
|
|
||||||
if resp and resp.status >= 400:
|
|
||||||
logger.warning("Попытка {}/{}: HTTP {}", attempt, retries, resp.status)
|
|
||||||
await asyncio.sleep(3 * attempt)
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
await page.wait_for_load_state("networkidle", timeout=10_000)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("Попытка {}/{}: {}", attempt, retries, e)
|
|
||||||
await asyncio.sleep(3 * attempt)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
async def _expand_chapters(page: Page):
|
|
||||||
for sel in ["a.chapter-link.all", "button:has-text('Все главы')",
|
|
||||||
"a:has-text('Все главы')"]:
|
|
||||||
try:
|
|
||||||
el = page.locator(sel).first
|
|
||||||
if await el.is_visible(timeout=2000):
|
|
||||||
await el.click()
|
|
||||||
await page.wait_for_load_state("networkidle", timeout=10_000)
|
|
||||||
return
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
async def _extract_chapters(page: Page) -> list[Chapter]:
|
|
||||||
"""Основной парсер: #chapters-list → tr.item-row → td[data-num] a.chapter-link"""
|
|
||||||
rows = await page.query_selector_all("#chapters-list tr.item-row")
|
|
||||||
chapters = []
|
|
||||||
for row in rows:
|
|
||||||
link = await row.query_selector("td[class*='item-title'] a")
|
|
||||||
if not link:
|
|
||||||
continue
|
|
||||||
href = await link.get_attribute("href") or ""
|
|
||||||
text = (await link.inner_text()).strip()
|
|
||||||
if not href:
|
|
||||||
continue
|
|
||||||
td = await row.query_selector("td[data-num]")
|
|
||||||
vol = int((await td.get_attribute("data-vol") or "0")) if td else 0
|
|
||||||
num_raw = int((await td.get_attribute("data-num") or "0")) if td else 0
|
|
||||||
number = num_raw / 10.0
|
|
||||||
full_url = href if href.startswith("http") else _base_url(page.url) + href
|
|
||||||
chapters.append(Chapter(title=text, url=full_url, number=number, volume=vol))
|
|
||||||
return chapters
|
|
||||||
|
|
||||||
|
|
||||||
async def _extract_chapters_alt(page: Page) -> list[Chapter]:
|
|
||||||
result = await page.evaluate("""
|
|
||||||
() => {
|
|
||||||
const links = Array.from(document.querySelectorAll('a[href*="/vol"]'));
|
|
||||||
return links.map(a => ({ href: a.href, text: a.textContent.trim() }))
|
|
||||||
.filter(x => x.href && x.text);
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
return [Chapter(title=x["text"], url=x["href"],
|
|
||||||
number=_parse_num(x["text"]), volume=_parse_vol(x["text"]))
|
|
||||||
for x in result]
|
|
||||||
|
|
||||||
|
|
||||||
def _base_url(url: str) -> str:
|
|
||||||
m = re.match(r"(https?://[^/]+)", url)
|
|
||||||
return m.group(1) if m else "https://readmanga.ru"
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_num(text: str) -> float:
|
|
||||||
m = re.search(r"[\d]+(?:[.,]\d+)?", text.replace(",", "."))
|
|
||||||
return float(m.group()) if m else 0.0
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_vol(text: str) -> int:
|
|
||||||
m = re.search(r"Том\s+(\d+)", text, re.IGNORECASE)
|
|
||||||
return int(m.group(1)) if m else 0
|
|
||||||
|
|
||||||
|
|
||||||
# ──────────────────────────────────────────────
|
|
||||||
# Страница главы — получение URL изображений
|
|
||||||
# ──────────────────────────────────────────────
|
|
||||||
|
|
||||||
async def _extract_images_from_js(page: Page) -> list[str]:
|
|
||||||
"""
|
|
||||||
Извлекает URL из rm_h.readerInit(chapterInfo, [[base, '', path, w, h], ...]).
|
|
||||||
Считает скобки для точного захвата массива.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
result = await page.evaluate("""
|
|
||||||
() => {
|
|
||||||
for (const s of document.querySelectorAll('script')) {
|
|
||||||
const text = s.textContent || '';
|
|
||||||
const mi = text.indexOf('readerInit');
|
|
||||||
if (mi === -1) continue;
|
|
||||||
const ai = text.indexOf('[', mi);
|
|
||||||
if (ai === -1) continue;
|
|
||||||
let depth = 0, end = -1;
|
|
||||||
for (let i = ai; i < text.length; i++) {
|
|
||||||
if (text[i] === '[') depth++;
|
|
||||||
else if (text[i] === ']') { depth--; if (!depth) { end = i+1; break; } }
|
|
||||||
}
|
|
||||||
if (end === -1) continue;
|
|
||||||
try {
|
|
||||||
const arr = eval(text.slice(ai, end));
|
|
||||||
if (Array.isArray(arr) && arr.length)
|
|
||||||
return arr.map(item => Array.isArray(item) && item.length >= 3
|
|
||||||
? item[0] + item[2] : null).filter(Boolean);
|
|
||||||
} catch(e) {}
|
|
||||||
}
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
if result:
|
|
||||||
logger.debug("JS readerInit нашёл {} изображений", len(result))
|
|
||||||
return result or []
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug("JS-метод не сработал: {}", e)
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
async def _extract_images_from_dom(page: Page) -> list[str]:
|
|
||||||
try:
|
|
||||||
result = await page.evaluate("""
|
|
||||||
() => {
|
|
||||||
for (const sel of ['img.manga-page', '.page-image img', '#mangaReader img', 'img[data-src]']) {
|
|
||||||
const found = Array.from(document.querySelectorAll(sel));
|
|
||||||
if (found.length) return found.map(i => i.src || i.dataset.src).filter(Boolean);
|
|
||||||
}
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
return result or []
|
|
||||||
except Exception:
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def _get_ext(url: str) -> str:
|
|
||||||
m = re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, re.IGNORECASE)
|
|
||||||
if m:
|
|
||||||
ext = m.group(1).lower()
|
|
||||||
return ".jpg" if ext == "jpeg" else f".{ext}"
|
|
||||||
return ".jpg"
|
|
||||||
|
|
||||||
|
|
||||||
# ──────────────────────────────────────────────
|
|
||||||
# Скачивание главы
|
|
||||||
# ──────────────────────────────────────────────
|
|
||||||
|
|
||||||
async def get_chapter_images_and_download(
|
|
||||||
page: Page,
|
|
||||||
chapter_url: str,
|
|
||||||
dest_dir: Path,
|
|
||||||
manga_url: str | None = None,
|
|
||||||
on_page: object = None,
|
|
||||||
) -> list[Path]:
|
|
||||||
"""
|
|
||||||
1. Открывает страницу главы (устанавливает DDoS-Guard cookies для CDN).
|
|
||||||
2. Извлекает список URL из readerInit.
|
|
||||||
3. Перехватывает img-запросы через page.route() + route.fetch()
|
|
||||||
(браузерный стек — правильные Sec-Fetch-* заголовки, cookies).
|
|
||||||
4. Пролистывает читалку клавишей ArrowRight чтобы загрузить все страницы.
|
|
||||||
5. Retry для страниц с timeout через JS fetch.
|
|
||||||
"""
|
|
||||||
t_start = time.monotonic()
|
|
||||||
ch_id = chapter_url.split("/")[-1] # короткий идентификатор для логов
|
|
||||||
logger.info("[{}] Загружаем главу: {}", ch_id, chapter_url)
|
|
||||||
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
parsed = urlparse(chapter_url)
|
|
||||||
parts = parsed.path.strip("/").split("/")
|
|
||||||
manga_slug = parts[0] if parts else ""
|
|
||||||
referer = manga_url or f"{parsed.scheme}://{parsed.netloc}/{manga_slug}"
|
|
||||||
|
|
||||||
load_url = chapter_url + ("?mtr=1" if "?" not in chapter_url else "&mtr=1")
|
|
||||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
def _base(u: str) -> str:
|
|
||||||
return u.split("?")[0]
|
|
||||||
|
|
||||||
# Баннеры/рекламные изображения — игнорируем без логирования
|
|
||||||
BANNER_RE = re.compile(r"466_p\.|570_p\.|banner|advert", re.I)
|
|
||||||
|
|
||||||
def _is_manga_image(url: str) -> bool:
|
|
||||||
base = _base(url)
|
|
||||||
if not re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", base, re.I):
|
|
||||||
return False
|
|
||||||
if "resrmr." in url or "/static/" in url:
|
|
||||||
return False
|
|
||||||
return bool(re.search(r"one-way\.work|staticfa\.|rm\.one-way|cdnmanga|reimg", url, re.I))
|
|
||||||
|
|
||||||
captured: dict[str, bytes] = {} # base_url → bytes
|
|
||||||
route_errors: dict[str, str] = {} # base_url → текст ошибки
|
|
||||||
route_statuses: dict[str, int] = {} # base_url → HTTP status (не 200/206)
|
|
||||||
lock = asyncio.Lock()
|
|
||||||
|
|
||||||
async def route_handler(route, request):
|
|
||||||
url = request.url
|
|
||||||
base = _base(url)
|
|
||||||
if not _is_manga_image(url):
|
|
||||||
await route.continue_()
|
|
||||||
return
|
|
||||||
if BANNER_RE.search(base):
|
|
||||||
await route.continue_()
|
|
||||||
return
|
|
||||||
async with lock:
|
|
||||||
already = base in captured
|
|
||||||
if already:
|
|
||||||
await route.continue_()
|
|
||||||
return
|
|
||||||
fname = base.split("/")[-1]
|
|
||||||
try:
|
|
||||||
response = await route.fetch()
|
|
||||||
status = response.status
|
|
||||||
body = await response.body()
|
|
||||||
if body and len(body) > 500 and status in (200, 206):
|
|
||||||
async with lock:
|
|
||||||
if base not in captured:
|
|
||||||
captured[base] = body
|
|
||||||
logger.debug("[{}] ✓ {}: {} байт", ch_id, fname, len(body))
|
|
||||||
if on_page:
|
|
||||||
try:
|
|
||||||
asyncio.ensure_future(on_page(0, 0))
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
async with lock:
|
|
||||||
route_statuses[base] = status
|
|
||||||
if status not in (200, 206):
|
|
||||||
logger.warning("[{}] CDN HTTP {} для '{}' | {}",
|
|
||||||
ch_id, status, fname, base[-70:])
|
|
||||||
else:
|
|
||||||
logger.warning("[{}] Слишком мал ответ ({} байт) для '{}'",
|
|
||||||
ch_id, len(body), fname)
|
|
||||||
await route.fulfill(response=response)
|
|
||||||
except Exception as e:
|
|
||||||
err = str(e)
|
|
||||||
async with lock:
|
|
||||||
route_errors[base] = err
|
|
||||||
is_timeout = "timeout" in err.lower()
|
|
||||||
level = logger.warning if is_timeout else logger.warning
|
|
||||||
level("[{}] route.fetch {} '{}': {}",
|
|
||||||
ch_id, "timeout" if is_timeout else "ошибка", fname, err[:150])
|
|
||||||
try:
|
|
||||||
await route.continue_()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
await page.route("**/*", route_handler)
|
|
||||||
|
|
||||||
# 1. Открываем главу
|
|
||||||
ok = await _navigate(page, load_url, referer=referer)
|
|
||||||
if not ok:
|
|
||||||
await page.unroute("**/*", route_handler)
|
|
||||||
logger.error("[{}] Не удалось открыть главу после всех retry: {}", ch_id, chapter_url)
|
|
||||||
return []
|
|
||||||
|
|
||||||
# 2. Ждём readerInit
|
|
||||||
try:
|
|
||||||
await page.wait_for_function(
|
|
||||||
"() => Array.from(document.querySelectorAll('script'))"
|
|
||||||
".some(s => s.textContent.includes('readerInit'))",
|
|
||||||
timeout=15_000,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("[{}] readerInit не появился за 15с ({}). "
|
|
||||||
"Продолжаем через DOM-fallback.", ch_id, str(e)[:80])
|
|
||||||
|
|
||||||
# 3. Извлекаем список URL
|
|
||||||
image_urls = await _extract_images_from_js(page)
|
|
||||||
if not image_urls:
|
|
||||||
logger.debug("[{}] JS readerInit не дал URL, пробуем DOM-парсинг", ch_id)
|
|
||||||
image_urls = await _extract_images_from_dom(page)
|
|
||||||
if not image_urls:
|
|
||||||
await page.unroute("**/*", route_handler)
|
|
||||||
try:
|
|
||||||
page_info = await page.evaluate("() => document.title + ' | ' + location.href")
|
|
||||||
except Exception:
|
|
||||||
page_info = "?"
|
|
||||||
logger.error("[{}] Список изображений пуст. Текущая страница: {}", ch_id, page_info)
|
|
||||||
return []
|
|
||||||
|
|
||||||
logger.info("[{}] Найдено изображений: {}", ch_id, len(image_urls))
|
|
||||||
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
|
|
||||||
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
|
|
||||||
total = len(image_urls)
|
|
||||||
|
|
||||||
def _count_matched() -> int:
|
|
||||||
count = 0
|
|
||||||
for base_url in captured:
|
|
||||||
if base_url in url_to_idx or base_url.split("/")[-1] in filename_to_idx:
|
|
||||||
count += 1
|
|
||||||
return count
|
|
||||||
|
|
||||||
# 4. Пролистываем читалку
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
stall_count = 0
|
|
||||||
prev_done = -1
|
|
||||||
for i in range(total + 20):
|
|
||||||
done = _count_matched()
|
|
||||||
if done >= total:
|
|
||||||
break
|
|
||||||
try:
|
|
||||||
await page.keyboard.press("ArrowRight")
|
|
||||||
await asyncio.sleep(0.5)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("[{}] Ошибка листания на шаге {}: {}", ch_id, i + 1, e)
|
|
||||||
break
|
|
||||||
if i % 20 == 19:
|
|
||||||
done = _count_matched()
|
|
||||||
logger.debug("[{}] Пролистано {}, загружено: {}/{}", ch_id, i + 1, done, total)
|
|
||||||
if done == prev_done:
|
|
||||||
stall_count += 1
|
|
||||||
if stall_count >= 3:
|
|
||||||
logger.warning("[{}] Прогресс завис ({}/{}) после {} листаний — прерываем",
|
|
||||||
ch_id, done, total, i + 1)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
stall_count = 0
|
|
||||||
prev_done = done
|
|
||||||
|
|
||||||
# Финальное ожидание
|
|
||||||
await asyncio.sleep(3)
|
|
||||||
|
|
||||||
# 5. Retry для страниц с timeout через браузерный JS fetch
|
|
||||||
async with lock:
|
|
||||||
timeout_bases = [u for u, e in route_errors.items()
|
|
||||||
if "timeout" in e.lower() and u not in captured]
|
|
||||||
if timeout_bases:
|
|
||||||
logger.info("[{}] Retry {} страниц с timeout через JS fetch...",
|
|
||||||
ch_id, len(timeout_bases))
|
|
||||||
for retry_base in timeout_bases:
|
|
||||||
if retry_base in captured:
|
|
||||||
continue
|
|
||||||
fname = retry_base.split("/")[-1]
|
|
||||||
try:
|
|
||||||
data_b64 = await page.evaluate("""async (url) => {
|
|
||||||
try {
|
|
||||||
const r = await fetch(url, {credentials: 'include'});
|
|
||||||
if (!r.ok) return null;
|
|
||||||
const buf = await r.arrayBuffer();
|
|
||||||
const bytes = new Uint8Array(buf);
|
|
||||||
let bin = '';
|
|
||||||
for (let b of bytes) bin += String.fromCharCode(b);
|
|
||||||
return btoa(bin);
|
|
||||||
} catch(e) { return null; }
|
|
||||||
}""", retry_base)
|
|
||||||
if data_b64:
|
|
||||||
import base64
|
|
||||||
body = base64.b64decode(data_b64)
|
|
||||||
if len(body) > 500:
|
|
||||||
async with lock:
|
|
||||||
captured[retry_base] = body
|
|
||||||
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
|
|
||||||
else:
|
|
||||||
logger.warning("[{}] Retry вернул {} байт для '{}' — игнорируем",
|
|
||||||
ch_id, len(body), fname)
|
|
||||||
else:
|
|
||||||
logger.warning("[{}] Retry вернул null для '{}' | {}",
|
|
||||||
ch_id, fname, retry_base[-70:])
|
|
||||||
except Exception as e2:
|
|
||||||
logger.warning("[{}] Retry JS ошибка для '{}': {}", ch_id, fname, e2)
|
|
||||||
|
|
||||||
await page.unroute("**/*", route_handler)
|
|
||||||
|
|
||||||
done = _count_matched()
|
|
||||||
elapsed = time.monotonic() - t_start
|
|
||||||
logger.info("[{}] Перехвачено: {}/{} за {:.1f}с", ch_id, done, total, elapsed)
|
|
||||||
|
|
||||||
# 6. Сохраняем в правильном порядке
|
|
||||||
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
|
|
||||||
|
|
||||||
paths: dict[int, Path] = {}
|
|
||||||
unmatched_other: list[str] = []
|
|
||||||
for base_url, body in captured.items():
|
|
||||||
idx = url_to_idx.get(base_url)
|
|
||||||
if idx is None:
|
|
||||||
fname = base_url.split("/")[-1]
|
|
||||||
idx = filename_to_idx.get(fname)
|
|
||||||
if idx is None:
|
|
||||||
if not BANNER_RE.search(base_url):
|
|
||||||
unmatched_other.append(base_url.split("/")[-1])
|
|
||||||
continue
|
|
||||||
ext = _get_ext(base_url)
|
|
||||||
p = dest_dir / f"{idx:04d}{ext}"
|
|
||||||
p.write_bytes(body)
|
|
||||||
paths[idx] = p
|
|
||||||
|
|
||||||
if unmatched_other:
|
|
||||||
logger.debug("[{}] Перехвачено, но не совпало с readerInit ({}): {}",
|
|
||||||
ch_id, len(unmatched_other), unmatched_other)
|
|
||||||
|
|
||||||
# 7. Итоговый отчёт по пропущенным страницам
|
|
||||||
missing_idxs = [i for i in range(total) if i not in paths]
|
|
||||||
if missing_idxs:
|
|
||||||
missing_files = [_base(image_urls[i]).split("/")[-1] for i in missing_idxs]
|
|
||||||
missing_full = [_base(image_urls[i]) for i in missing_idxs]
|
|
||||||
|
|
||||||
timeout_miss = [missing_files[j] for j, i in enumerate(missing_idxs)
|
|
||||||
if missing_full[j] in route_errors
|
|
||||||
and "timeout" in route_errors[missing_full[j]].lower()]
|
|
||||||
http_miss = [f"{missing_files[j]}(HTTP {route_statuses.get(missing_full[j], '?')})"
|
|
||||||
for j, i in enumerate(missing_idxs)
|
|
||||||
if missing_full[j] in route_statuses]
|
|
||||||
unrcv = [missing_files[j] for j, i in enumerate(missing_idxs)
|
|
||||||
if missing_full[j] not in route_errors
|
|
||||||
and missing_full[j] not in route_statuses]
|
|
||||||
|
|
||||||
reasons = []
|
|
||||||
if timeout_miss:
|
|
||||||
reasons.append(f"timeout×{len(timeout_miss)}: {timeout_miss}")
|
|
||||||
if http_miss:
|
|
||||||
reasons.append(f"HTTP-err×{len(http_miss)}: {http_miss}")
|
|
||||||
if unrcv:
|
|
||||||
reasons.append(f"не_перехвачено×{len(unrcv)}: {unrcv}")
|
|
||||||
|
|
||||||
logger.warning(
|
|
||||||
"[{}] Пропущено {}/{} стр. | №: {} | причины: {}",
|
|
||||||
ch_id, len(missing_idxs), total,
|
|
||||||
[i + 1 for i in missing_idxs],
|
|
||||||
" | ".join(reasons) if reasons else "неизвестно",
|
|
||||||
)
|
|
||||||
logger.debug("[{}] Полные URL пропущенных: {}", ch_id, missing_full)
|
|
||||||
|
|
||||||
return [paths[i] for i in sorted(paths.keys())]
|
|
||||||
|
|
||||||
|
|||||||
74
src/sources/__init__.py
Normal file
74
src/sources/__init__.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
"""
|
||||||
|
Реестр источников манги.
|
||||||
|
|
||||||
|
Для добавления нового источника:
|
||||||
|
1. Создать файл src/sources/mysource.py с классом, реализующим MangaSourceProtocol
|
||||||
|
2. Импортировать его здесь и добавить в список SOURCES
|
||||||
|
"""
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from .base import MangaSourceProtocol
|
||||||
|
from .readmanga import ReadmangaSource
|
||||||
|
|
||||||
|
# ── Регистрация источников ─────────────────────
|
||||||
|
# Добавьте новые источники сюда:
|
||||||
|
SOURCES: list = [
|
||||||
|
ReadmangaSource(),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Быстрый поиск по slug
|
||||||
|
_BY_SLUG: dict[str, object] = {s.slug: s for s in SOURCES}
|
||||||
|
|
||||||
|
|
||||||
|
class SourceRegistry:
|
||||||
|
"""Реестр источников. Источники определяются только в коде."""
|
||||||
|
|
||||||
|
def get_by_slug(self, slug: str) -> Optional[object]:
|
||||||
|
return _BY_SLUG.get(slug)
|
||||||
|
|
||||||
|
def get_by_db_id(self, source_id: int, db) -> Optional[object]:
|
||||||
|
"""Резолвит адаптер через БД: source_id → slug → экземпляр."""
|
||||||
|
row = db.get_source_by_id(source_id)
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
return _BY_SLUG.get(row["slug"])
|
||||||
|
|
||||||
|
def all_sources(self) -> list:
|
||||||
|
return list(SOURCES)
|
||||||
|
|
||||||
|
def all_slugs(self) -> list[str]:
|
||||||
|
return [s.slug for s in SOURCES]
|
||||||
|
|
||||||
|
|
||||||
|
registry = SourceRegistry()
|
||||||
|
|
||||||
|
|
||||||
|
def get_source_for_url(url: str, db) -> Optional[object]:
|
||||||
|
"""
|
||||||
|
Определяет источник по домену URL.
|
||||||
|
Ищет домен в таблице source_domains → возвращает адаптер.
|
||||||
|
Если домен не зарегистрирован — возвращает None.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
domain = urlparse(url).netloc.lower()
|
||||||
|
if domain.startswith("www."):
|
||||||
|
domain = domain[4:]
|
||||||
|
row = db.get_source_by_domain(domain)
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
return _BY_SLUG.get(row["slug"])
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_domain(url: str) -> str:
|
||||||
|
"""Извлекает домен без www."""
|
||||||
|
try:
|
||||||
|
domain = urlparse(url).netloc.lower()
|
||||||
|
if domain.startswith("www."):
|
||||||
|
domain = domain[4:]
|
||||||
|
return domain
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
58
src/sources/base.py
Normal file
58
src/sources/base.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
"""
|
||||||
|
Базовые модели данных и Protocol-интерфейс для источников манги.
|
||||||
|
"""
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, Protocol, runtime_checkable
|
||||||
|
|
||||||
|
from playwright.async_api import Page
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
# Модели данных (общие для всех источников)
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Chapter:
|
||||||
|
title: str
|
||||||
|
url: str
|
||||||
|
number: float = 0.0
|
||||||
|
volume: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MangaInfo:
|
||||||
|
title: str
|
||||||
|
url: str
|
||||||
|
chapters: list[Chapter] = field(default_factory=list)
|
||||||
|
pub_status: str = "unknown" # completed / ongoing / unknown
|
||||||
|
title_ru: str = ""
|
||||||
|
title_full: str = ""
|
||||||
|
description: str = ""
|
||||||
|
genres: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
# Интерфейс источника
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class MangaSourceProtocol(Protocol):
|
||||||
|
slug: str # уникальный код источника в коде ("readmanga")
|
||||||
|
display_name: str # название для UI ("ReadManga")
|
||||||
|
|
||||||
|
async def get_manga_info(self, page: Page, url: str) -> Optional[MangaInfo]:
|
||||||
|
"""Возвращает информацию о манге и список глав."""
|
||||||
|
...
|
||||||
|
|
||||||
|
async def get_chapter_images_and_download(
|
||||||
|
self,
|
||||||
|
page: Page,
|
||||||
|
chapter_url: str,
|
||||||
|
dest_dir: Path,
|
||||||
|
manga_url: Optional[str] = None,
|
||||||
|
on_page: object = None,
|
||||||
|
) -> list[Path]:
|
||||||
|
"""Скачивает страницы главы в dest_dir и возвращает список путей."""
|
||||||
|
...
|
||||||
|
|
||||||
589
src/sources/readmanga.py
Normal file
589
src/sources/readmanga.py
Normal file
@@ -0,0 +1,589 @@
|
|||||||
|
"""
|
||||||
|
Адаптер ReadManga: поддерживает readmanga.ru и все его клоны.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
from playwright.async_api import Page
|
||||||
|
|
||||||
|
from .base import Chapter, MangaInfo
|
||||||
|
|
||||||
|
|
||||||
|
class ReadmangaSource:
|
||||||
|
slug = "readmanga"
|
||||||
|
display_name = "ReadManga"
|
||||||
|
|
||||||
|
# CDN-домены из которых принимаем картинки глав
|
||||||
|
cdn_patterns = ["one-way.work", "staticfa.", "rm.one-way", "cdnmanga", "reimg"]
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
# Страница манги — список глав
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def get_manga_info(self, page: Page, url: str) -> Optional[MangaInfo]:
|
||||||
|
"""Открывает страницу манги и возвращает список всех глав."""
|
||||||
|
logger.info("Загружаем страницу манги: {}", url)
|
||||||
|
ok = await _navigate(page, url)
|
||||||
|
if not ok:
|
||||||
|
return None
|
||||||
|
|
||||||
|
title_full = await page.title()
|
||||||
|
title_full = re.sub(r"\s*[-–|].*$", "", title_full).strip()
|
||||||
|
|
||||||
|
title_ru = await _extract_ru_title_from_dom(page)
|
||||||
|
if not title_ru:
|
||||||
|
title_ru = _parse_ru_title(title_full)
|
||||||
|
|
||||||
|
logger.info("Манга: {} | ru: {}", title_full, title_ru)
|
||||||
|
|
||||||
|
pub_status = await _extract_pub_status(page)
|
||||||
|
logger.info("Статус выпуска: {}", pub_status)
|
||||||
|
|
||||||
|
description = await _extract_description(page)
|
||||||
|
genres = await _extract_genres(page)
|
||||||
|
|
||||||
|
await _expand_chapters(page)
|
||||||
|
chapters = await _extract_chapters(page)
|
||||||
|
if not chapters:
|
||||||
|
chapters = await _extract_chapters_alt(page)
|
||||||
|
|
||||||
|
logger.info("Найдено глав: {}", len(chapters))
|
||||||
|
return MangaInfo(
|
||||||
|
title=title_ru or title_full,
|
||||||
|
url=url,
|
||||||
|
chapters=chapters,
|
||||||
|
pub_status=pub_status,
|
||||||
|
title_ru=title_ru,
|
||||||
|
title_full=title_full,
|
||||||
|
description=description,
|
||||||
|
genres=genres,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
# Скачивание главы
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def get_chapter_images_and_download(
|
||||||
|
self,
|
||||||
|
page: Page,
|
||||||
|
chapter_url: str,
|
||||||
|
dest_dir: Path,
|
||||||
|
manga_url: Optional[str] = None,
|
||||||
|
on_page: object = None,
|
||||||
|
) -> list[Path]:
|
||||||
|
"""
|
||||||
|
1. Открывает страницу главы.
|
||||||
|
2. Извлекает список URL из readerInit.
|
||||||
|
3. Перехватывает img-запросы через page.route().
|
||||||
|
4. Пролистывает читалку клавишей ArrowRight.
|
||||||
|
5. Retry для страниц с timeout через JS fetch.
|
||||||
|
"""
|
||||||
|
cdn_patterns = self.cdn_patterns
|
||||||
|
t_start = time.monotonic()
|
||||||
|
ch_id = chapter_url.split("/")[-1]
|
||||||
|
logger.info("[{}] Загружаем главу: {}", ch_id, chapter_url)
|
||||||
|
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed = urlparse(chapter_url)
|
||||||
|
parts = parsed.path.strip("/").split("/")
|
||||||
|
manga_slug = parts[0] if parts else ""
|
||||||
|
referer = manga_url or f"{parsed.scheme}://{parsed.netloc}/{manga_slug}"
|
||||||
|
|
||||||
|
load_url = chapter_url + ("?mtr=1" if "?" not in chapter_url else "&mtr=1")
|
||||||
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def _base(u: str) -> str:
|
||||||
|
return u.split("?")[0]
|
||||||
|
|
||||||
|
BANNER_RE = re.compile(r"466_p\.|570_p\.|banner|advert", re.I)
|
||||||
|
|
||||||
|
def _is_manga_image(url: str) -> bool:
|
||||||
|
base = _base(url)
|
||||||
|
if not re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", base, re.I):
|
||||||
|
return False
|
||||||
|
if "resrmr." in url or "/static/" in url:
|
||||||
|
return False
|
||||||
|
pattern = "|".join(re.escape(p) for p in cdn_patterns)
|
||||||
|
return bool(re.search(pattern, url, re.I))
|
||||||
|
|
||||||
|
captured: dict[str, bytes] = {}
|
||||||
|
route_errors: dict[str, str] = {}
|
||||||
|
route_statuses: dict[str, int] = {}
|
||||||
|
lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def route_handler(route, request):
|
||||||
|
url = request.url
|
||||||
|
base = _base(url)
|
||||||
|
if not _is_manga_image(url):
|
||||||
|
await route.continue_()
|
||||||
|
return
|
||||||
|
if BANNER_RE.search(base):
|
||||||
|
await route.continue_()
|
||||||
|
return
|
||||||
|
async with lock:
|
||||||
|
already = base in captured
|
||||||
|
if already:
|
||||||
|
await route.continue_()
|
||||||
|
return
|
||||||
|
fname = base.split("/")[-1]
|
||||||
|
try:
|
||||||
|
response = await route.fetch()
|
||||||
|
status = response.status
|
||||||
|
body = await response.body()
|
||||||
|
if body and len(body) > 500 and status in (200, 206):
|
||||||
|
async with lock:
|
||||||
|
if base not in captured:
|
||||||
|
captured[base] = body
|
||||||
|
logger.debug("[{}] ✓ {}: {} байт", ch_id, fname, len(body))
|
||||||
|
if on_page:
|
||||||
|
try:
|
||||||
|
asyncio.ensure_future(on_page(0, 0))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
async with lock:
|
||||||
|
route_statuses[base] = status
|
||||||
|
if status not in (200, 206):
|
||||||
|
logger.warning("[{}] CDN HTTP {} для '{}' | {}",
|
||||||
|
ch_id, status, fname, base[-70:])
|
||||||
|
else:
|
||||||
|
logger.warning("[{}] Слишком мал ответ ({} байт) для '{}'",
|
||||||
|
ch_id, len(body), fname)
|
||||||
|
await route.fulfill(response=response)
|
||||||
|
except Exception as e:
|
||||||
|
err = str(e)
|
||||||
|
async with lock:
|
||||||
|
route_errors[base] = err
|
||||||
|
is_timeout = "timeout" in err.lower()
|
||||||
|
logger.warning("[{}] route.fetch {} '{}': {}",
|
||||||
|
ch_id, "timeout" if is_timeout else "ошибка", fname, err[:150])
|
||||||
|
try:
|
||||||
|
await route.continue_()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
await page.route("**/*", route_handler)
|
||||||
|
|
||||||
|
ok = await _navigate(page, load_url, referer=referer)
|
||||||
|
if not ok:
|
||||||
|
await page.unroute("**/*", route_handler)
|
||||||
|
logger.error("[{}] Не удалось открыть главу: {}", ch_id, chapter_url)
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
await page.wait_for_function(
|
||||||
|
"() => Array.from(document.querySelectorAll('script'))"
|
||||||
|
".some(s => s.textContent.includes('readerInit'))",
|
||||||
|
timeout=15_000,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("[{}] readerInit не появился за 15с ({}). DOM-fallback.", ch_id, str(e)[:80])
|
||||||
|
|
||||||
|
image_urls = await _extract_images_from_js(page)
|
||||||
|
if not image_urls:
|
||||||
|
logger.debug("[{}] JS readerInit не дал URL, пробуем DOM-парсинг", ch_id)
|
||||||
|
image_urls = await _extract_images_from_dom(page)
|
||||||
|
if not image_urls:
|
||||||
|
await page.unroute("**/*", route_handler)
|
||||||
|
try:
|
||||||
|
page_info = await page.evaluate("() => document.title + ' | ' + location.href")
|
||||||
|
except Exception:
|
||||||
|
page_info = "?"
|
||||||
|
logger.error("[{}] Список изображений пуст. Страница: {}", ch_id, page_info)
|
||||||
|
return []
|
||||||
|
|
||||||
|
logger.info("[{}] Найдено изображений: {}", ch_id, len(image_urls))
|
||||||
|
url_to_idx = {_base(u): i for i, u in enumerate(image_urls)}
|
||||||
|
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
|
||||||
|
total = len(image_urls)
|
||||||
|
|
||||||
|
def _count_matched() -> int:
|
||||||
|
count = 0
|
||||||
|
for base_url in captured:
|
||||||
|
if base_url in url_to_idx or base_url.split("/")[-1] in filename_to_idx:
|
||||||
|
count += 1
|
||||||
|
return count
|
||||||
|
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
stall_count = 0
|
||||||
|
prev_done = -1
|
||||||
|
for i in range(total + 20):
|
||||||
|
done = _count_matched()
|
||||||
|
if done >= total:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await page.keyboard.press("ArrowRight")
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("[{}] Ошибка листания на шаге {}: {}", ch_id, i + 1, e)
|
||||||
|
break
|
||||||
|
if i % 20 == 19:
|
||||||
|
done = _count_matched()
|
||||||
|
logger.debug("[{}] Пролистано {}, загружено: {}/{}", ch_id, i + 1, done, total)
|
||||||
|
if done == prev_done:
|
||||||
|
stall_count += 1
|
||||||
|
if stall_count >= 3:
|
||||||
|
logger.warning("[{}] Прогресс завис ({}/{}) — прерываем", ch_id, done, total)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
stall_count = 0
|
||||||
|
prev_done = done
|
||||||
|
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
# Retry timeout через JS fetch
|
||||||
|
async with lock:
|
||||||
|
timeout_bases = [u for u, e in route_errors.items()
|
||||||
|
if "timeout" in e.lower() and u not in captured]
|
||||||
|
if timeout_bases:
|
||||||
|
logger.info("[{}] Retry {} страниц с timeout...", ch_id, len(timeout_bases))
|
||||||
|
for retry_base in timeout_bases:
|
||||||
|
if retry_base in captured:
|
||||||
|
continue
|
||||||
|
fname = retry_base.split("/")[-1]
|
||||||
|
try:
|
||||||
|
data_b64 = await page.evaluate("""async (url) => {
|
||||||
|
try {
|
||||||
|
const r = await fetch(url, {credentials: 'include'});
|
||||||
|
if (!r.ok) return null;
|
||||||
|
const buf = await r.arrayBuffer();
|
||||||
|
const bytes = new Uint8Array(buf);
|
||||||
|
let bin = '';
|
||||||
|
for (let b of bytes) bin += String.fromCharCode(b);
|
||||||
|
return btoa(bin);
|
||||||
|
} catch(e) { return null; }
|
||||||
|
}""", retry_base)
|
||||||
|
if data_b64:
|
||||||
|
body = base64.b64decode(data_b64)
|
||||||
|
if len(body) > 500:
|
||||||
|
async with lock:
|
||||||
|
captured[retry_base] = body
|
||||||
|
logger.info("[{}] Retry OK: {} ({} байт)", ch_id, fname, len(body))
|
||||||
|
else:
|
||||||
|
logger.warning("[{}] Retry вернул {} байт — игнорируем", ch_id, len(body))
|
||||||
|
else:
|
||||||
|
logger.warning("[{}] Retry null для '{}'", ch_id, fname)
|
||||||
|
except Exception as e2:
|
||||||
|
logger.warning("[{}] Retry JS ошибка '{}': {}", ch_id, fname, e2)
|
||||||
|
|
||||||
|
await page.unroute("**/*", route_handler)
|
||||||
|
|
||||||
|
done = _count_matched()
|
||||||
|
elapsed = time.monotonic() - t_start
|
||||||
|
logger.info("[{}] Перехвачено: {}/{} за {:.1f}с", ch_id, done, total, elapsed)
|
||||||
|
|
||||||
|
filename_to_idx = {_base(u).split("/")[-1]: i for i, u in enumerate(image_urls)}
|
||||||
|
|
||||||
|
paths: dict[int, Path] = {}
|
||||||
|
unmatched_other: list[str] = []
|
||||||
|
for base_url, body in captured.items():
|
||||||
|
idx = url_to_idx.get(base_url)
|
||||||
|
if idx is None:
|
||||||
|
fname = base_url.split("/")[-1]
|
||||||
|
idx = filename_to_idx.get(fname)
|
||||||
|
if idx is None:
|
||||||
|
if not BANNER_RE.search(base_url):
|
||||||
|
unmatched_other.append(base_url.split("/")[-1])
|
||||||
|
continue
|
||||||
|
ext = _get_ext(base_url)
|
||||||
|
p = dest_dir / f"{idx:04d}{ext}"
|
||||||
|
p.write_bytes(body)
|
||||||
|
paths[idx] = p
|
||||||
|
|
||||||
|
if unmatched_other:
|
||||||
|
logger.debug("[{}] Не совпало с readerInit ({}): {}", ch_id, len(unmatched_other), unmatched_other)
|
||||||
|
|
||||||
|
missing_idxs = [i for i in range(total) if i not in paths]
|
||||||
|
if missing_idxs:
|
||||||
|
missing_files = [_base(image_urls[i]).split("/")[-1] for i in missing_idxs]
|
||||||
|
missing_full = [_base(image_urls[i]) for i in missing_idxs]
|
||||||
|
|
||||||
|
timeout_miss = [missing_files[j] for j, i in enumerate(missing_idxs)
|
||||||
|
if missing_full[j] in route_errors
|
||||||
|
and "timeout" in route_errors[missing_full[j]].lower()]
|
||||||
|
http_miss = [f"{missing_files[j]}(HTTP {route_statuses.get(missing_full[j], '?')})"
|
||||||
|
for j, i in enumerate(missing_idxs)
|
||||||
|
if missing_full[j] in route_statuses]
|
||||||
|
unrcv = [missing_files[j] for j, i in enumerate(missing_idxs)
|
||||||
|
if missing_full[j] not in route_errors
|
||||||
|
and missing_full[j] not in route_statuses]
|
||||||
|
|
||||||
|
reasons = []
|
||||||
|
if timeout_miss:
|
||||||
|
reasons.append(f"timeout×{len(timeout_miss)}: {timeout_miss}")
|
||||||
|
if http_miss:
|
||||||
|
reasons.append(f"HTTP-err×{len(http_miss)}: {http_miss}")
|
||||||
|
if unrcv:
|
||||||
|
reasons.append(f"не_перехвачено×{len(unrcv)}: {unrcv}")
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"[{}] Пропущено {}/{} стр. | №: {} | причины: {}",
|
||||||
|
ch_id, len(missing_idxs), total,
|
||||||
|
[i + 1 for i in missing_idxs],
|
||||||
|
" | ".join(reasons) if reasons else "неизвестно",
|
||||||
|
)
|
||||||
|
|
||||||
|
return [paths[i] for i in sorted(paths.keys())]
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
# Вспомогательные функции (приватные)
|
||||||
|
# ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def _navigate(page: Page, url: str, retries: int = 3,
|
||||||
|
referer: str | None = None) -> bool:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
if referer is None:
|
||||||
|
p = urlparse(url)
|
||||||
|
referer = f"{p.scheme}://{p.netloc}/"
|
||||||
|
for attempt in range(1, retries + 1):
|
||||||
|
try:
|
||||||
|
resp = await page.goto(url, wait_until="domcontentloaded",
|
||||||
|
timeout=60_000, referer=referer)
|
||||||
|
if resp and resp.status >= 400:
|
||||||
|
logger.warning("Попытка {}/{}: HTTP {}", attempt, retries, resp.status)
|
||||||
|
await asyncio.sleep(3 * attempt)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
await page.wait_for_load_state("networkidle", timeout=10_000)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Попытка {}/{}: {}", attempt, retries, e)
|
||||||
|
await asyncio.sleep(3 * attempt)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_ru_title_from_dom(page: Page) -> str:
|
||||||
|
try:
|
||||||
|
result = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const selectors = [
|
||||||
|
'.names .name', 'h1.manga-title', 'h1 .name', '.name-block .name',
|
||||||
|
];
|
||||||
|
for (const sel of selectors) {
|
||||||
|
const el = document.querySelector(sel);
|
||||||
|
if (el && el.textContent.trim()) return el.textContent.trim();
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
return (result or "").strip()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_ru_title(full_title: str) -> str:
|
||||||
|
t = full_title.strip()
|
||||||
|
t = re.sub(r'^Манга\s+', '', t).strip()
|
||||||
|
t = re.split(r'\s*[\(\[]', t)[0].strip()
|
||||||
|
t = re.sub(r'\s+онлайн\s*$', '', t, flags=re.IGNORECASE).strip()
|
||||||
|
words = t.split()
|
||||||
|
result = []
|
||||||
|
for w in words:
|
||||||
|
if re.search(r'[а-яёА-ЯЁ]', w):
|
||||||
|
result.append(w)
|
||||||
|
elif re.search(r'[a-zA-Z]', w):
|
||||||
|
if result:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if result:
|
||||||
|
result.append(w)
|
||||||
|
while result and not re.search(r'[а-яёА-ЯЁ]', result[-1]):
|
||||||
|
result.pop()
|
||||||
|
if result:
|
||||||
|
t = ' '.join(result)
|
||||||
|
return t
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_pub_status(page: Page) -> str:
|
||||||
|
try:
|
||||||
|
result = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const statusSelectors = [
|
||||||
|
'.elem_status .value', '.manga-info .status',
|
||||||
|
'[class*="status"] .value', '.property .status',
|
||||||
|
];
|
||||||
|
for (const sel of statusSelectors) {
|
||||||
|
const el = document.querySelector(sel);
|
||||||
|
if (el) {
|
||||||
|
const t = el.textContent.toLowerCase();
|
||||||
|
if (t.includes('завершён') || t.includes('завершен') || t.includes('complete')) return 'completed';
|
||||||
|
if (t.includes('продолжает') || t.includes('ongoing')) return 'ongoing';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const bodyText = document.body ? document.body.innerText.toLowerCase() : '';
|
||||||
|
if (bodyText.includes('выпуск завершён') || bodyText.includes('выпуск завершен')) return 'completed';
|
||||||
|
if (bodyText.includes('продолжается')) return 'ongoing';
|
||||||
|
return 'unknown';
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
return result or "unknown"
|
||||||
|
except Exception:
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_description(page: Page) -> str:
|
||||||
|
try:
|
||||||
|
result = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const selectors = [
|
||||||
|
'.manga-description', '.elem_descr .value',
|
||||||
|
'#tab-description .description-text', '.description',
|
||||||
|
'[itemprop="description"]',
|
||||||
|
];
|
||||||
|
for (const sel of selectors) {
|
||||||
|
const el = document.querySelector(sel);
|
||||||
|
if (el && el.textContent.trim()) return el.textContent.trim();
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
return (result or "").strip()[:2000]
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_genres(page: Page) -> list[str]:
|
||||||
|
try:
|
||||||
|
result = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const selectors = [
|
||||||
|
'.elem_genre .value a', '.genres a',
|
||||||
|
'[itemprop="genre"]', '.genre-list a',
|
||||||
|
];
|
||||||
|
for (const sel of selectors) {
|
||||||
|
const els = document.querySelectorAll(sel);
|
||||||
|
if (els.length) return Array.from(els).map(e => e.textContent.trim()).filter(Boolean);
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
return result or []
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
async def _expand_chapters(page: Page):
|
||||||
|
for sel in ["a.chapter-link.all", "button:has-text('Все главы')", "a:has-text('Все главы')"]:
|
||||||
|
try:
|
||||||
|
el = page.locator(sel).first
|
||||||
|
if await el.is_visible(timeout=2000):
|
||||||
|
await el.click()
|
||||||
|
await page.wait_for_load_state("networkidle", timeout=10_000)
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_chapters(page: Page) -> list[Chapter]:
|
||||||
|
rows = await page.query_selector_all("#chapters-list tr.item-row")
|
||||||
|
chapters = []
|
||||||
|
for row in rows:
|
||||||
|
link = await row.query_selector("td[class*='item-title'] a")
|
||||||
|
if not link:
|
||||||
|
continue
|
||||||
|
href = await link.get_attribute("href") or ""
|
||||||
|
text = (await link.inner_text()).strip()
|
||||||
|
if not href:
|
||||||
|
continue
|
||||||
|
td = await row.query_selector("td[data-num]")
|
||||||
|
vol = int((await td.get_attribute("data-vol") or "0")) if td else 0
|
||||||
|
num_raw = int((await td.get_attribute("data-num") or "0")) if td else 0
|
||||||
|
number = num_raw / 10.0
|
||||||
|
full_url = href if href.startswith("http") else _base_url(page.url) + href
|
||||||
|
chapters.append(Chapter(title=text, url=full_url, number=number, volume=vol))
|
||||||
|
return chapters
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_chapters_alt(page: Page) -> list[Chapter]:
|
||||||
|
result = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
const links = Array.from(document.querySelectorAll('a[href*="/vol"]'));
|
||||||
|
return links.map(a => ({ href: a.href, text: a.textContent.trim() }))
|
||||||
|
.filter(x => x.href && x.text);
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
return [Chapter(title=x["text"], url=x["href"],
|
||||||
|
number=_parse_num(x["text"]), volume=_parse_vol(x["text"]))
|
||||||
|
for x in result]
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_images_from_js(page: Page) -> list[str]:
|
||||||
|
try:
|
||||||
|
result = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
for (const s of document.querySelectorAll('script')) {
|
||||||
|
const text = s.textContent || '';
|
||||||
|
const mi = text.indexOf('readerInit');
|
||||||
|
if (mi === -1) continue;
|
||||||
|
const ai = text.indexOf('[', mi);
|
||||||
|
if (ai === -1) continue;
|
||||||
|
let depth = 0, end = -1;
|
||||||
|
for (let i = ai; i < text.length; i++) {
|
||||||
|
if (text[i] === '[') depth++;
|
||||||
|
else if (text[i] === ']') { depth--; if (!depth) { end = i+1; break; } }
|
||||||
|
}
|
||||||
|
if (end === -1) continue;
|
||||||
|
try {
|
||||||
|
const arr = eval(text.slice(ai, end));
|
||||||
|
if (Array.isArray(arr) && arr.length)
|
||||||
|
return arr.map(item => Array.isArray(item) && item.length >= 3
|
||||||
|
? item[0] + item[2] : null).filter(Boolean);
|
||||||
|
} catch(e) {}
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
if result:
|
||||||
|
logger.debug("JS readerInit нашёл {} изображений", len(result))
|
||||||
|
return result or []
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("JS-метод не сработал: {}", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_images_from_dom(page: Page) -> list[str]:
|
||||||
|
try:
|
||||||
|
result = await page.evaluate("""
|
||||||
|
() => {
|
||||||
|
for (const sel of ['img.manga-page', '.page-image img', '#mangaReader img', 'img[data-src]']) {
|
||||||
|
const found = Array.from(document.querySelectorAll(sel));
|
||||||
|
if (found.length) return found.map(i => i.src || i.dataset.src).filter(Boolean);
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
return result or []
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _get_ext(url: str) -> str:
|
||||||
|
m = re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, re.IGNORECASE)
|
||||||
|
if m:
|
||||||
|
ext = m.group(1).lower()
|
||||||
|
return ".jpg" if ext == "jpeg" else f".{ext}"
|
||||||
|
return ".jpg"
|
||||||
|
|
||||||
|
|
||||||
|
def _base_url(url: str) -> str:
|
||||||
|
m = re.match(r"(https?://[^/]+)", url)
|
||||||
|
return m.group(1) if m else "https://readmanga.ru"
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_num(text: str) -> float:
|
||||||
|
m = re.search(r"[\d]+(?:[.,]\d+)?", text.replace(",", "."))
|
||||||
|
return float(m.group()) if m else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_vol(text: str) -> int:
|
||||||
|
m = re.search(r"Том\s+(\d+)", text, re.IGNORECASE)
|
||||||
|
return int(m.group(1)) if m else 0
|
||||||
|
|
||||||
235
src/state.py
235
src/state.py
@@ -1,14 +1,25 @@
|
|||||||
"""
|
"""
|
||||||
Хранение состояния скачивания в SQLite.
|
Хранение состояния скачивания в SQLite.
|
||||||
"""
|
"""
|
||||||
|
import json
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
|
||||||
DB_PATH = Path("/app/state/progress.db")
|
DB_PATH = Path("/app/state/progress.db")
|
||||||
|
|
||||||
|
# Домены ReadManga по умолчанию (сидинг при первом запуске)
|
||||||
|
_DEFAULT_READMANGA_DOMAINS = [
|
||||||
|
"readmanga.ru",
|
||||||
|
"readmanga.live",
|
||||||
|
"readmanga.me",
|
||||||
|
"readmanga.io",
|
||||||
|
"3.readmanga.ru",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class StateDB:
|
class StateDB:
|
||||||
def __init__(self, db_path: Path = DB_PATH):
|
def __init__(self, db_path: Path = DB_PATH):
|
||||||
@@ -68,18 +79,35 @@ class StateDB:
|
|||||||
created_at TEXT
|
created_at TEXT
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
self.conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS sources (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
slug TEXT UNIQUE NOT NULL,
|
||||||
|
display_name TEXT NOT NULL,
|
||||||
|
settings TEXT DEFAULT '{}',
|
||||||
|
created_at TEXT
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
self.conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS source_domains (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
source_id INTEGER NOT NULL REFERENCES sources(id),
|
||||||
|
domain TEXT UNIQUE NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
# Migrate old DB: add missing columns
|
# Migrate old DB: add missing columns
|
||||||
migrations = [
|
migrations = [
|
||||||
("chapters", "pages_total", "INTEGER DEFAULT 0"),
|
("chapters", "pages_total", "INTEGER DEFAULT 0"),
|
||||||
("chapters", "pages_done", "INTEGER DEFAULT 0"),
|
("chapters", "pages_done", "INTEGER DEFAULT 0"),
|
||||||
("mangas", "title_ru", "TEXT"),
|
("mangas", "title_ru", "TEXT"),
|
||||||
("mangas", "title_full", "TEXT"),
|
("mangas", "title_full", "TEXT"),
|
||||||
("mangas", "pub_status", "TEXT DEFAULT 'unknown'"),
|
("mangas", "pub_status", "TEXT DEFAULT 'unknown'"),
|
||||||
("mangas", "auto_update", "INTEGER DEFAULT 0"),
|
("mangas", "auto_update", "INTEGER DEFAULT 0"),
|
||||||
("mangas", "last_checked_at", "TEXT"),
|
("mangas", "last_checked_at","TEXT"),
|
||||||
("mangas", "started_at", "TEXT"),
|
("mangas", "started_at", "TEXT"),
|
||||||
("mangas", "finished_at", "TEXT"),
|
("mangas", "finished_at", "TEXT"),
|
||||||
("mangas", "folder_name", "TEXT"),
|
("mangas", "folder_name", "TEXT"),
|
||||||
|
("mangas", "source_id", "INTEGER REFERENCES sources(id)"),
|
||||||
]
|
]
|
||||||
for table, col, typedef in migrations:
|
for table, col, typedef in migrations:
|
||||||
try:
|
try:
|
||||||
@@ -88,17 +116,184 @@ class StateDB:
|
|||||||
pass
|
pass
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
|
|
||||||
|
def sync_sources(self, registry) -> None:
|
||||||
|
"""
|
||||||
|
Синхронизирует таблицу sources с реестром из кода.
|
||||||
|
Вызывается при старте приложения.
|
||||||
|
При первом запуске создаёт записи и засеивает домены ReadManga.
|
||||||
|
"""
|
||||||
|
from loguru import logger
|
||||||
|
for source in registry.all_sources():
|
||||||
|
existing = self.conn.execute(
|
||||||
|
"SELECT id, display_name FROM sources WHERE slug=?", (source.slug,)
|
||||||
|
).fetchone()
|
||||||
|
if not existing:
|
||||||
|
self.conn.execute(
|
||||||
|
"INSERT INTO sources (slug, display_name, settings, created_at) VALUES (?,?,?,?)",
|
||||||
|
(source.slug, source.display_name, "{}", _now())
|
||||||
|
)
|
||||||
|
logger.info("Источник добавлен в БД: {} ({})", source.display_name, source.slug)
|
||||||
|
else:
|
||||||
|
if existing["display_name"] != source.display_name:
|
||||||
|
self.conn.execute(
|
||||||
|
"UPDATE sources SET display_name=? WHERE slug=?",
|
||||||
|
(source.display_name, source.slug)
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
# Сидинг доменов ReadManga при первом запуске
|
||||||
|
rm = self.conn.execute("SELECT id FROM sources WHERE slug='readmanga'").fetchone()
|
||||||
|
if rm:
|
||||||
|
count = self.conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM source_domains WHERE source_id=?", (rm["id"],)
|
||||||
|
).fetchone()[0]
|
||||||
|
if count == 0:
|
||||||
|
for domain in _DEFAULT_READMANGA_DOMAINS:
|
||||||
|
try:
|
||||||
|
self.conn.execute(
|
||||||
|
"INSERT INTO source_domains (source_id, domain) VALUES (?,?)",
|
||||||
|
(rm["id"], domain)
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self.conn.commit()
|
||||||
|
logger.info("Сидинг доменов ReadManga: {} доменов", len(_DEFAULT_READMANGA_DOMAINS))
|
||||||
|
|
||||||
|
# Логируем источники в БД без кода (не в реестре)
|
||||||
|
known_slugs = set(registry.all_slugs())
|
||||||
|
db_slugs = [r["slug"] for r in self.conn.execute("SELECT slug FROM sources").fetchall()]
|
||||||
|
for slug in db_slugs:
|
||||||
|
if slug not in known_slugs:
|
||||||
|
logger.warning("Источник '{}' есть в БД, но отсутствует в реестре — манги недоступны", slug)
|
||||||
|
|
||||||
|
def migrate_manga_sources(self) -> int:
|
||||||
|
"""
|
||||||
|
Авто-миграция: проставляет source_id для манг с source_id IS NULL.
|
||||||
|
Определяет источник по домену URL через source_domains.
|
||||||
|
Возвращает количество обновлённых манг.
|
||||||
|
"""
|
||||||
|
nulls = self.conn.execute(
|
||||||
|
"SELECT url FROM mangas WHERE source_id IS NULL"
|
||||||
|
).fetchall()
|
||||||
|
updated = 0
|
||||||
|
for row in nulls:
|
||||||
|
url = row["url"]
|
||||||
|
domain = _extract_domain(url)
|
||||||
|
source_row = self.get_source_by_domain(domain)
|
||||||
|
if source_row:
|
||||||
|
self.conn.execute(
|
||||||
|
"UPDATE mangas SET source_id=? WHERE url=?",
|
||||||
|
(source_row["id"], url)
|
||||||
|
)
|
||||||
|
updated += 1
|
||||||
|
if updated:
|
||||||
|
self.conn.commit()
|
||||||
|
return updated
|
||||||
|
|
||||||
|
# ── Sources ───────────────────────────────────
|
||||||
|
|
||||||
|
def get_source_by_id(self, source_id: int) -> Optional[dict]:
|
||||||
|
row = self.conn.execute("SELECT * FROM sources WHERE id=?", (source_id,)).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
def get_source_by_slug(self, slug: str) -> Optional[dict]:
|
||||||
|
row = self.conn.execute("SELECT * FROM sources WHERE slug=?", (slug,)).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
def get_source_by_domain(self, domain: str) -> Optional[dict]:
|
||||||
|
"""Возвращает запись source по домену (через source_domains JOIN)."""
|
||||||
|
row = self.conn.execute("""
|
||||||
|
SELECT s.* FROM sources s
|
||||||
|
JOIN source_domains sd ON sd.source_id = s.id
|
||||||
|
WHERE sd.domain=?
|
||||||
|
""", (domain.lower(),)).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
def get_all_sources(self) -> list[dict]:
|
||||||
|
"""Возвращает все источники с вложенным списком доменов."""
|
||||||
|
sources = self.conn.execute("SELECT * FROM sources ORDER BY id").fetchall()
|
||||||
|
result = []
|
||||||
|
for s in sources:
|
||||||
|
s_dict = dict(s)
|
||||||
|
domains = self.conn.execute(
|
||||||
|
"SELECT domain FROM source_domains WHERE source_id=? ORDER BY domain",
|
||||||
|
(s["id"],)
|
||||||
|
).fetchall()
|
||||||
|
s_dict["domains"] = [d["domain"] for d in domains]
|
||||||
|
try:
|
||||||
|
s_dict["settings"] = json.loads(s_dict.get("settings") or "{}")
|
||||||
|
except Exception:
|
||||||
|
s_dict["settings"] = {}
|
||||||
|
result.append(s_dict)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def add_domain(self, source_id: int, domain: str) -> bool:
|
||||||
|
"""Добавляет домен к источнику. Возвращает False если уже существует."""
|
||||||
|
domain = domain.lower().strip()
|
||||||
|
try:
|
||||||
|
self.conn.execute(
|
||||||
|
"INSERT INTO source_domains (source_id, domain) VALUES (?,?)",
|
||||||
|
(source_id, domain)
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def remove_domain(self, source_id: int, domain: str) -> bool:
|
||||||
|
"""Удаляет домен у источника. Возвращает True если удалён."""
|
||||||
|
cur = self.conn.execute(
|
||||||
|
"DELETE FROM source_domains WHERE source_id=? AND domain=?",
|
||||||
|
(source_id, domain.lower())
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
def set_manga_source(self, manga_url: str, source_id: int) -> None:
|
||||||
|
"""Меняет источник у манги."""
|
||||||
|
self.conn.execute(
|
||||||
|
"UPDATE mangas SET source_id=?, updated_at=? WHERE url=?",
|
||||||
|
(source_id, _now(), manga_url)
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def reset_failed_chapters(self, manga_url: str) -> int:
|
||||||
|
"""Сбрасывает failed и partial главы в pending. Возвращает количество."""
|
||||||
|
now = _now()
|
||||||
|
c1 = self.conn.execute(
|
||||||
|
"UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=? "
|
||||||
|
"WHERE manga_url=? AND status='failed'",
|
||||||
|
(now, manga_url)
|
||||||
|
).rowcount
|
||||||
|
c2 = self.conn.execute(
|
||||||
|
"""UPDATE chapters SET status='pending', pages_done=0, pages_total=0, updated_at=?
|
||||||
|
WHERE manga_url=? AND status='done'
|
||||||
|
AND pages_total > 0 AND pages_done < pages_total""",
|
||||||
|
(now, manga_url)
|
||||||
|
).rowcount
|
||||||
|
self.conn.commit()
|
||||||
|
return c1 + c2
|
||||||
|
|
||||||
|
def count_mangas_by_source_domain(self, domain: str) -> int:
|
||||||
|
"""Считает манги с указанным доменом (для предупреждений в UI)."""
|
||||||
|
source = self.get_source_by_domain(domain)
|
||||||
|
if not source:
|
||||||
|
return 0
|
||||||
|
return self.conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM mangas WHERE source_id=?", (source["id"],)
|
||||||
|
).fetchone()[0]
|
||||||
|
|
||||||
# ── Mangas ────────────────────────────────────
|
# ── Mangas ────────────────────────────────────
|
||||||
|
|
||||||
def add_manga(self, url: str, fmt: str = "cbz") -> bool:
|
def add_manga(self, url: str, fmt: str = "cbz", source_id: Optional[int] = None) -> bool:
|
||||||
"""Добавляет мангу в очередь. Возвращает True если новая."""
|
"""Добавляет мангу в очередь. Возвращает True если новая."""
|
||||||
cur = self.conn.execute("SELECT id FROM mangas WHERE url=?", (url,))
|
cur = self.conn.execute("SELECT id FROM mangas WHERE url=?", (url,))
|
||||||
if cur.fetchone():
|
if cur.fetchone():
|
||||||
return False
|
return False
|
||||||
self.conn.execute("""
|
self.conn.execute("""
|
||||||
INSERT INTO mangas (url, format, status, added_at, updated_at)
|
INSERT INTO mangas (url, format, status, source_id, added_at, updated_at)
|
||||||
VALUES (?, ?, 'queued', ?, ?)
|
VALUES (?, ?, 'queued', ?, ?, ?)
|
||||||
""", (url, fmt, _now(), _now()))
|
""", (url, fmt, source_id, _now(), _now()))
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -318,3 +513,15 @@ class StateDB:
|
|||||||
def _now() -> str:
|
def _now() -> str:
|
||||||
return datetime.utcnow().isoformat()
|
return datetime.utcnow().isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_domain(url: str) -> str:
|
||||||
|
"""Извлекает домен без www."""
|
||||||
|
try:
|
||||||
|
domain = urlparse(url).netloc.lower()
|
||||||
|
if domain.startswith("www."):
|
||||||
|
domain = domain[4:]
|
||||||
|
return domain
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,9 @@ from typing import Callable, Optional
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from .browser import BrowserManager
|
from .browser import BrowserManager
|
||||||
from .scraper import get_manga_info, get_chapter_images_and_download, Chapter
|
from .sources import registry, get_source_for_url, extract_domain
|
||||||
|
from .sources.base import Chapter, MangaInfo
|
||||||
|
from .scraper import get_manga_info, get_chapter_images_and_download # shim для обратной совместимости
|
||||||
from .exporter import export, MangaMeta
|
from .exporter import export, MangaMeta
|
||||||
from .state import StateDB
|
from .state import StateDB
|
||||||
|
|
||||||
@@ -61,10 +63,23 @@ async def download_manga(
|
|||||||
started_ts = await db_call(db.mark_started, url)
|
started_ts = await db_call(db.mark_started, url)
|
||||||
await emit({"type": "manga_start", "url": url, "started_at": started_ts})
|
await emit({"type": "manga_start", "url": url, "started_at": started_ts})
|
||||||
|
|
||||||
|
# Резолвим источник
|
||||||
|
source = get_source_for_url(url, db)
|
||||||
|
if source is None:
|
||||||
|
# Последний шанс: по source_id в БД
|
||||||
|
manga_row = await db_call(db.get_manga, url)
|
||||||
|
if manga_row and manga_row.get("source_id"):
|
||||||
|
source = registry.get_by_db_id(manga_row["source_id"], db)
|
||||||
|
if source is None:
|
||||||
|
await db_call(db.update_manga_status, url, "failed")
|
||||||
|
await emit({"type": "source_unknown", "url": url,
|
||||||
|
"error": "Источник не определён. Выберите источник в настройках манги."})
|
||||||
|
return
|
||||||
|
|
||||||
async with BrowserManager(headless=True) as bm:
|
async with BrowserManager(headless=True) as bm:
|
||||||
ctx, info_page = await bm.new_page()
|
ctx, info_page = await bm.new_page()
|
||||||
|
|
||||||
manga = await get_manga_info(info_page, url)
|
manga = await source.get_manga_info(info_page, url)
|
||||||
await info_page.close()
|
await info_page.close()
|
||||||
|
|
||||||
if not manga:
|
if not manga:
|
||||||
@@ -193,7 +208,7 @@ async def download_manga(
|
|||||||
"pages_total": pages_total,
|
"pages_total": pages_total,
|
||||||
})
|
})
|
||||||
|
|
||||||
image_paths = await get_chapter_images_and_download(
|
image_paths = await source.get_chapter_images_and_download(
|
||||||
ch_page, ch.url,
|
ch_page, ch.url,
|
||||||
dest_dir=tmp_path,
|
dest_dir=tmp_path,
|
||||||
manga_url=url,
|
manga_url=url,
|
||||||
@@ -329,9 +344,19 @@ async def check_for_updates(
|
|||||||
db.add_history(manga_url=url, event_type="check_started")
|
db.add_history(manga_url=url, event_type="check_started")
|
||||||
await emit({"type": "check_started", "url": url})
|
await emit({"type": "check_started", "url": url})
|
||||||
|
|
||||||
|
# Резолвим источник
|
||||||
|
source = get_source_for_url(url, db)
|
||||||
|
if source is None:
|
||||||
|
manga_row = db.get_manga(url)
|
||||||
|
if manga_row and manga_row.get("source_id"):
|
||||||
|
source = registry.get_by_db_id(manga_row["source_id"], db)
|
||||||
|
if source is None:
|
||||||
|
await emit({"type": "source_unknown", "url": url})
|
||||||
|
return []
|
||||||
|
|
||||||
async with BrowserManager(headless=True) as bm:
|
async with BrowserManager(headless=True) as bm:
|
||||||
_, page = await bm.new_page()
|
_, page = await bm.new_page()
|
||||||
manga = await get_manga_info(page, url)
|
manga = await source.get_manga_info(page, url)
|
||||||
await page.close()
|
await page.close()
|
||||||
if not manga:
|
if not manga:
|
||||||
return []
|
return []
|
||||||
|
|||||||
Reference in New Issue
Block a user