validation

This commit is contained in:
2026-05-03 14:37:57 +03:00
parent 0f8707fe93
commit 672e199d3a
5 changed files with 398 additions and 1 deletions

View File

@@ -393,6 +393,7 @@ const state = {
currentUser: null, // {id, username, role} currentUser: null, // {id, username, role}
authWarnings: {}, // source_slug → {source_slug, source_name} authWarnings: {}, // source_slug → {source_slug, source_name}
metaUpdating: new Set(), // urls where meta refresh is in progress metaUpdating: new Set(), // urls where meta refresh is in progress
validating: {}, // url → {checked, total} for in-progress validations
}; };
// ── Auth ───────────────────────────────────── // ── Auth ─────────────────────────────────────
@@ -703,6 +704,31 @@ function handleEvent(msg) {
_updateMetaBtn(msg.url, msg.failed === -1 ? 'error' : 'done'); _updateMetaBtn(msg.url, msg.failed === -1 ? 'error' : 'done');
break; break;
case 'validate_started':
state.validating[msg.url] = {checked: 0, total: 0};
_updateValidateBtn(msg.url);
break;
case 'validate_progress':
if(state.validating[msg.url]) {
state.validating[msg.url].checked = msg.checked;
state.validating[msg.url].total = msg.total;
}
_updateValidateBtn(msg.url);
break;
case 'validate_done': {
delete state.validating[msg.url];
const result = msg.total_to_redownload > 0 || msg.new_chapters > 0 ? 'issues' : 'ok';
_updateValidateBtn(msg.url, result, msg);
break;
}
case 'validate_error':
delete state.validating[msg.url];
_updateValidateBtn(msg.url, 'error');
break;
case 'manga_meta_updated': case 'manga_meta_updated':
if(state.mangas[msg.url]) { if(state.mangas[msg.url]) {
state.mangas[msg.url].title = msg.title; state.mangas[msg.url].title = msg.title;
@@ -1691,6 +1717,63 @@ async function refreshMetaModal(url) {
// Спиннер появится через WS meta_refresh_started, исчезнет через meta_refreshed // Спиннер появится через WS meta_refresh_started, исчезнет через meta_refreshed
} }
function _updateValidateBtn(url, result, data) {
const modal = document.getElementById('modal');
if(!modal || modal.classList.contains('hidden') || modal.dataset.currentUrl !== url) return;
const btn = document.getElementById('modal-validate-btn');
if(!btn) return;
const v = state.validating[url];
if(v !== undefined) {
const prog = v.total > 0 ? ` ${v.checked}/${v.total}` : '...';
btn.innerHTML = `<span class="meta-spinner"></span> Проверка${prog}`;
btn.disabled = true;
btn.style.color = '#94a3b8';
btn.style.borderColor = '#334155';
} else if(result === 'ok') {
btn.innerHTML = '✅ Всё в порядке';
btn.disabled = false;
btn.style.color = '#4ade80';
btn.style.borderColor = '#166534';
setTimeout(() => { btn.innerHTML = '🔍 Проверить целостность'; btn.style.color='#67e8f9'; btn.style.borderColor='#164e63'; btn.disabled=false; }, 3000);
} else if(result === 'issues') {
const n = data ? (data.total_to_redownload + data.new_chapters) : '?';
btn.innerHTML = `⚡ Найдено проблем: ${n} — поставлено в очередь`;
btn.disabled = true;
btn.style.color = '#fbbf24';
btn.style.borderColor = '#78350f';
setTimeout(() => { btn.innerHTML = '🔍 Проверить целостность'; btn.style.color='#67e8f9'; btn.style.borderColor='#164e63'; btn.disabled=false; }, 5000);
} else if(result === 'error') {
btn.innerHTML = '❌ Ошибка валидации';
btn.disabled = false;
btn.style.color = '#f87171';
btn.style.borderColor = '#7f1d1d';
setTimeout(() => { btn.innerHTML = '🔍 Проверить целостность'; btn.style.color='#67e8f9'; btn.style.borderColor='#164e63'; btn.disabled=false; }, 3000);
} else {
btn.innerHTML = '🔍 Проверить целостность';
btn.disabled = false;
btn.style.color = '#67e8f9';
btn.style.borderColor = '#164e63';
}
}
async function validateManga(url) {
const btn = document.getElementById('modal-validate-btn');
if(btn) {
btn.innerHTML = '<span class="meta-spinner"></span> Запуск...';
btn.disabled = true;
}
const r = await fetch('/api/mangas/validate?url='+encodeURIComponent(url), {method:'POST'});
if(!r.ok) {
const err = await r.json().catch(() => ({}));
if(btn) {
btn.innerHTML = '❌ ' + (err.detail || 'Ошибка');
btn.style.color = '#f87171';
btn.style.borderColor = '#7f1d1d';
setTimeout(() => { btn.innerHTML = '🔍 Проверить целостность'; btn.style.color='#67e8f9'; btn.style.borderColor='#164e63'; btn.disabled=false; }, 3000);
}
}
}
async function forceRedownload(url, closeModalAfter = false) { async function forceRedownload(url, closeModalAfter = false) {
if(!confirm('Скачать заново ВСЕ главы? Уже скачанные файлы будут перезаписаны.')) return; if(!confirm('Скачать заново ВСЕ главы? Уже скачанные файлы будут перезаписаны.')) return;
const r = await fetch('/api/mangas/force_redownload?url='+encodeURIComponent(url), {method:'POST'}); const r = await fetch('/api/mangas/force_redownload?url='+encodeURIComponent(url), {method:'POST'});
@@ -2233,6 +2316,15 @@ function renderModalBody(data) {
📁 Переименовать папку 📁 Переименовать папку
</button>` : ''} </button>` : ''}
${data.status === 'done' && canManage(data) ? ` ${data.status === 'done' && canManage(data) ? `
<button id="modal-validate-btn" onclick="validateManga('${escHtml(data.url)}')"
class="flex items-center gap-2 px-4 py-2 rounded-lg text-sm font-semibold transition-colors"
style="background:#0c1a2e;color:#67e8f9;border:1px solid #164e63"
${state.validating[data.url] !== undefined ? 'disabled' : ''}>
${state.validating[data.url] !== undefined
? `<span class="meta-spinner"></span> Проверка${state.validating[data.url].total > 0 ? ' '+state.validating[data.url].checked+'/'+state.validating[data.url].total : '...'}`
: '🔍 Проверить целостность'}
</button>` : ''}
${data.status === 'done' && canManage(data) ? `
<button id="modal-refresh-meta-btn" onclick="refreshMetaModal('${escHtml(data.url)}')" <button id="modal-refresh-meta-btn" onclick="refreshMetaModal('${escHtml(data.url)}')"
class="flex items-center gap-2 px-4 py-2 rounded-lg text-sm font-semibold transition-colors" class="flex items-center gap-2 px-4 py-2 rounded-lg text-sm font-semibold transition-colors"
style="background:#1e1b4b;color:#a78bfa;border:1px solid #312e81"> style="background:#1e1b4b;color:#a78bfa;border:1px solid #312e81">

View File

@@ -16,7 +16,7 @@ from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel from pydantic import BaseModel
from loguru import logger from loguru import logger
from .state import StateDB from .state import StateDB
from .worker import download_manga, check_for_updates from .worker import download_manga, check_for_updates, validate_manga
from .browser import BrowserManager from .browser import BrowserManager
from .exporter import patch_meta, MangaMeta from .exporter import patch_meta, MangaMeta
from .sources import registry, get_source_for_url, extract_domain from .sources import registry, get_source_for_url, extract_domain
@@ -849,6 +849,44 @@ async def force_redownload(url: str, _: dict = Depends(require_admin)):
return {"ok": True} return {"ok": True}
finally: finally:
db.close() db.close()
@app.post("/api/mangas/validate")
async def validate_manga_endpoint(url: str, current_user: dict = Depends(get_current_user)):
db = StateDB()
try:
manga = db.get_manga(url)
if not manga:
raise HTTPException(status_code=404, detail="Манга не найдена")
if manga["status"] != "done":
raise HTTPException(status_code=400, detail="Валидация доступна только для манг в статусе 'Готово'")
_check_manga_access(manga, current_user)
finally:
db.close()
asyncio.create_task(_do_validate(url))
return {"ok": True}
async def _do_validate(url: str):
db = StateDB()
try:
manga = db.get_manga(url)
fmt = manga["format"] if manga else "cbz"
finally:
db.close()
result = await validate_manga(url, on_event=ws_manager.broadcast)
if not result.get("ok"):
return
chapters_to_retry = result.get("chapters_to_redownload", [])
new_chapters = result.get("new_chapters", 0)
if not chapters_to_retry and not new_chapters:
return
db2 = StateDB()
try:
for chapter_url in chapters_to_retry:
db2.reset_chapter(chapter_url)
db2.update_manga_status(url, "queued")
finally:
db2.close()
await download_queue.put({"url": url, "fmt": fmt})
await ws_manager.broadcast({"type": "manga_queued", "url": url, "format": fmt})
await _broadcast_queue_positions()
@app.post("/api/mangas/stop") @app.post("/api/mangas/stop")
async def stop_manga(url: str, current_user: dict = Depends(get_current_user)): async def stop_manga(url: str, current_user: dict = Depends(get_current_user)):
db = StateDB() db = StateDB()

View File

@@ -382,6 +382,59 @@ class MangalibSource:
return [paths[i] for i in sorted(paths.keys())] return [paths[i] for i in sorted(paths.keys())]
async def get_chapter_page_count(
self, page: Page, chapter_url: str, manga_url: Optional[str] = None
) -> int:
"""Открывает главу и возвращает количество страниц через API без скачивания изображений."""
pages_info: list = []
auth_err: list = []
lock = asyncio.Lock()
async def on_response(resp):
try:
if "api.cdnlibs.org" in resp.url and "/chapter?" in resp.url:
if resp.status in (401, 403):
auth_err.append(True)
return
body = await resp.body()
data = _json.loads(body)
async with lock:
if not pages_info:
pages_info.extend(data.get("data", {}).get("pages", []))
except Exception:
pass
if self.auth_token:
await page.set_extra_http_headers({"Authorization": f"Bearer {self.auth_token}"})
page.on("response", on_response)
referer = manga_url or _base_url(chapter_url)
ok = await _navigate(page, chapter_url, referer=referer)
if not ok:
mirror_url = _switch_to_mirror(chapter_url)
if mirror_url != chapter_url:
ok = await _navigate(
page, mirror_url,
referer=_switch_to_mirror(referer) if referer else referer,
)
if not ok:
page.remove_listener("response", on_response)
return 0
for _ in range(40):
async with lock:
if pages_info or auth_err:
break
await asyncio.sleep(0.5)
page.remove_listener("response", on_response)
if auth_err and not pages_info:
raise AuthRequiredError(self.slug)
return len(pages_info)
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
# Вспомогательные функции (приватные) # Вспомогательные функции (приватные)

View File

@@ -376,6 +376,19 @@ class ReadmangaSource:
return [paths[i] for i in sorted(paths.keys())] return [paths[i] for i in sorted(paths.keys())]
async def get_chapter_page_count(
self, page: Page, chapter_url: str, manga_url: Optional[str] = None
) -> int:
"""Открывает главу и возвращает количество страниц без скачивания изображений."""
load_url = chapter_url + ("?mtr=1" if "?" not in chapter_url else "&mtr=1")
ok = await _navigate(page, load_url)
if not ok:
return 0
urls = await _extract_images_from_js(page)
if not urls:
urls = await _extract_images_from_dom(page)
return len(urls)
# ────────────────────────────────────────────── # ──────────────────────────────────────────────
# Вспомогательные функции (приватные) # Вспомогательные функции (приватные)

View File

@@ -406,6 +406,207 @@ async def download_manga(
db.close() db.close()
async def validate_manga(
url: str,
output_dir: Path = OUTPUT_DIR,
on_event=None,
) -> dict:
"""
Проверяет целостность скачанной манги, сравнивая с сайтом.
- Получает актуальный список глав с сайта
- Добавляет новые главы в БД
- Для скачанных глав: проверяет наличие файлов и количество страниц
- Возвращает dict с chapters_to_redownload и статистикой
"""
async def emit(event: dict):
if on_event:
try:
await on_event(event)
except Exception as e:
logger.debug("on_event error: {}", e)
db = StateDB()
db_lock = asyncio.Lock()
async def db_call(fn, *args, **kwargs):
async with db_lock:
return fn(*args, **kwargs)
try:
await emit({"type": "validate_started", "url": url})
source = get_source_for_url(url, db)
if source is None:
manga_row = await db_call(db.get_manga, url)
if manga_row and manga_row.get("source_id"):
source = registry.get_by_db_id(manga_row["source_id"], db)
if source is None:
await emit({"type": "validate_error", "url": url,
"error": "Источник не определён. Выберите источник в настройках манги."})
return {"ok": False, "chapters_to_redownload": []}
if hasattr(source, "auth_token"):
_src_row = await db_call(db.get_source_by_slug, source.slug)
if _src_row:
_settings_raw = _src_row.get("settings") or "{}"
try:
_settings = _json.loads(_settings_raw) if isinstance(_settings_raw, str) else (_settings_raw or {})
except Exception:
_settings = {}
source.auth_token = _settings.get("auth_token") or None
manga_row = await db_call(db.get_manga, url)
fmt = (manga_row or {}).get("format", "cbz")
fmt_list = ["cbz", "pdf", "epub"] if fmt == "all" else [fmt]
async with BrowserManager(headless=True) as bm:
ctx, info_page = await bm.new_page()
try:
manga = await source.get_manga_info(info_page, url)
except Exception as e:
logger.error("validate: get_manga_info ошибка для {}: {}", url, e)
await emit({"type": "validate_error", "url": url, "error": str(e)})
return {"ok": False, "chapters_to_redownload": []}
finally:
await info_page.close()
if not manga:
await emit({"type": "validate_error", "url": url,
"error": "Не удалось получить информацию о манге с сайта"})
return {"ok": False, "chapters_to_redownload": []}
for ch in manga.chapters:
await db_call(db.upsert_chapter, url, ch.url, ch.title, ch.number, ch.volume)
all_ch_rows = await db_call(db.get_all_chapters, url)
db_chapters = {c["chapter_url"]: c for c in all_ch_rows}
new_chapters = [
ch for ch in manga.chapters
if db_chapters.get(ch.url, {}).get("status") == "pending"
]
done_chapters = [
ch for ch in manga.chapters
if db_chapters.get(ch.url, {}).get("status") == "done"
]
to_redownload: set = set()
fast_issues = 0
for ch in done_chapters:
db_ch = db_chapters[ch.url]
if db_ch.get("pages_total", 0) > 0 and db_ch.get("pages_done", 0) < db_ch["pages_total"]:
to_redownload.add(ch.url)
fast_issues += 1
continue
for f in fmt_list:
fpath = db_ch.get(f"output_{f}")
if fpath and not Path(fpath).exists():
to_redownload.add(ch.url)
fast_issues += 1
break
chapters_for_deep = [
ch for ch in done_chapters if ch.url not in to_redownload
]
site_mismatched = 0
checked = 0
has_page_count = hasattr(source, "get_chapter_page_count")
if has_page_count and chapters_for_deep:
sem = asyncio.Semaphore(2)
count_lock = asyncio.Lock()
async def check_one(ch: Chapter) -> None:
nonlocal checked, site_mismatched
async with sem:
db_ch = db_chapters[ch.url]
ch_page = await ctx.new_page()
mismatch = False
site_count = 0
try:
site_count = await source.get_chapter_page_count(
ch_page, ch.url, url
)
except AuthRequiredError:
raise
except Exception as e:
logger.warning(
"validate page count Т{} Гл.{}: {}", ch.volume, ch.number, e
)
finally:
await ch_page.close()
pages_have = db_ch.get("pages_done", 0)
if site_count > 0 and site_count != pages_have:
mismatch = True
logger.info(
"validate: Т{} Гл.{} — сайт {} стр., у нас {} → повтор",
ch.volume, ch.number, site_count, pages_have,
)
async with count_lock:
checked += 1
if mismatch:
to_redownload.add(ch.url)
site_mismatched += 1
await emit({
"type": "validate_progress",
"url": url,
"checked": checked,
"total": len(chapters_for_deep),
"chapter_number": ch.number,
"volume": ch.volume,
"mismatch": mismatch,
"site_count": site_count,
})
results = await asyncio.gather(
*[check_one(ch) for ch in chapters_for_deep],
return_exceptions=True,
)
auth_slug = None
for res in results:
if isinstance(res, AuthRequiredError):
auth_slug = res.source_slug
elif isinstance(res, Exception) and not isinstance(res, asyncio.CancelledError):
logger.exception("validate gather exception: {}", res)
if auth_slug:
await emit({"type": "validate_error", "url": url,
"error": f"auth_required:{auth_slug}"})
return {"ok": False, "chapters_to_redownload": []}
to_redownload_list = list(to_redownload)
result = {
"ok": True,
"url": url,
"site_chapters": len(manga.chapters),
"new_chapters": len(new_chapters),
"fast_issues": fast_issues,
"site_mismatched": site_mismatched,
"total_to_redownload": len(to_redownload_list),
"chapters_to_redownload": to_redownload_list,
}
await emit({
"type": "validate_done",
**{k: v for k, v in result.items() if k != "chapters_to_redownload"},
})
return result
except asyncio.CancelledError:
raise
except Exception as e:
logger.error("validate_manga {}: {}", url, e)
await emit({"type": "validate_error", "url": url, "error": str(e)})
return {"ok": False, "chapters_to_redownload": []}
finally:
db.close()
def _cover_ext_from_url(url: str) -> str: def _cover_ext_from_url(url: str) -> str:
import re as _re import re as _re
m = _re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, _re.IGNORECASE) m = _re.search(r"\.(jpg|jpeg|png|webp)(\?|$)", url, _re.IGNORECASE)