413 lines
16 KiB
Python
413 lines
16 KiB
Python
"""
|
||
Экспорт в CBZ, PDF, EPUB с поддержкой метаданных для Komga.
|
||
"""
|
||
import zipfile
|
||
import xml.etree.ElementTree as ET
|
||
from dataclasses import dataclass
|
||
from pathlib import Path
|
||
from typing import Literal, Optional
|
||
|
||
from loguru import logger
|
||
|
||
ExportFormat = Literal["cbz", "pdf", "epub"]
|
||
|
||
|
||
@dataclass
|
||
class MangaMeta:
|
||
"""Метаданные манги и главы для встраивания в файлы."""
|
||
series: str = "" # Название серии (title_ru)
|
||
series_full: str = "" # Полное название
|
||
chapter_title: str = "" # Название главы
|
||
number: float = 0.0 # Номер главы
|
||
volume: int = 0 # Том
|
||
chapters_total: int = 0 # Всего глав в серии (для completed)
|
||
pub_status: str = "unknown" # completed / ongoing / unknown
|
||
source_url: str = "" # URL источника
|
||
language: str = "ru"
|
||
summary: str = "" # Описание/синопсис серии
|
||
genre: str = "" # Жанры через запятую (для ComicInfo Genre)
|
||
series_group: str = "" # Группа/коллекция (для ComicInfo SeriesGroup)
|
||
|
||
|
||
def export(
|
||
image_paths: list[Path],
|
||
output_path: Path,
|
||
fmt: ExportFormat,
|
||
title: str = "Manga",
|
||
chapter: str = "",
|
||
meta: Optional[MangaMeta] = None,
|
||
):
|
||
# Строим meta из legacy-аргументов если не передан явно
|
||
if meta is None:
|
||
meta = MangaMeta(series=title, chapter_title=chapter)
|
||
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
logger.info("Экспортирую {} страниц → {} ({})", len(image_paths), output_path.name, fmt)
|
||
|
||
if fmt == "cbz":
|
||
_export_cbz(image_paths, output_path, meta)
|
||
elif fmt == "pdf":
|
||
_export_pdf(image_paths, output_path, meta)
|
||
elif fmt == "epub":
|
||
_export_epub(image_paths, output_path, meta)
|
||
else:
|
||
raise ValueError(f"Неизвестный формат: {fmt}")
|
||
|
||
logger.info("Сохранено: {}", output_path)
|
||
|
||
|
||
# ── CBZ + ComicInfo.xml ───────────────────────
|
||
|
||
def _make_comic_info(meta: MangaMeta) -> str:
|
||
"""Генерирует ComicInfo.xml по спецификации Anansi v2.1 (Komga-совместимый)."""
|
||
root = ET.Element("ComicInfo")
|
||
root.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
|
||
root.set("xsi:noNamespaceSchemaLocation",
|
||
"https://raw.githubusercontent.com/anansi-project/comicinfo/main/schema/v2.1/ComicInfo.xsd")
|
||
|
||
def add(tag: str, value):
|
||
if value is None:
|
||
return
|
||
s = str(value).strip()
|
||
if s:
|
||
ET.SubElement(root, tag).text = s
|
||
|
||
add("Series", meta.series)
|
||
add("Title", meta.chapter_title)
|
||
add("Summary", meta.summary)
|
||
|
||
# Номер главы: целое если без дроби, иначе float
|
||
if meta.number:
|
||
num_str = str(int(meta.number)) if meta.number == int(meta.number) else str(meta.number)
|
||
add("Number", num_str)
|
||
|
||
if meta.volume:
|
||
add("Volume", meta.volume)
|
||
|
||
# Count — только для завершённых серий
|
||
if meta.pub_status == "completed" and meta.chapters_total:
|
||
add("Count", meta.chapters_total)
|
||
|
||
add("Genre", meta.genre)
|
||
add("LanguageISO", meta.language)
|
||
|
||
# Manga = YesAndRightToLeft — стандартная японская манга
|
||
ET.SubElement(root, "Manga").text = "YesAndRightToLeft"
|
||
|
||
if meta.source_url:
|
||
add("Web", meta.source_url)
|
||
|
||
# SeriesGroup — Komga создаёт коллекцию с этим именем
|
||
if meta.series_group:
|
||
add("SeriesGroup", meta.series_group)
|
||
|
||
ET.indent(root, space=" ")
|
||
return '<?xml version="1.0" encoding="utf-8"?>\n' + ET.tostring(root, encoding="unicode")
|
||
|
||
|
||
def _export_cbz(images: list[Path], out: Path, meta: MangaMeta):
|
||
with zipfile.ZipFile(out, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||
# ComicInfo.xml первым файлом — Komga ищет его в корне архива
|
||
zf.writestr("ComicInfo.xml", _make_comic_info(meta))
|
||
for i, img in enumerate(images):
|
||
zf.write(img, f"{i:04d}{img.suffix}")
|
||
|
||
|
||
# ── PDF ───────────────────────────────────────
|
||
|
||
def _export_pdf(images: list[Path], out: Path, meta: MangaMeta):
|
||
try:
|
||
import img2pdf
|
||
pdf_bytes = img2pdf.convert([str(p) for p in images])
|
||
out.write_bytes(pdf_bytes)
|
||
except Exception as e:
|
||
logger.warning("img2pdf не сработал ({}), использую Pillow", e)
|
||
_export_pdf_pillow(images, out)
|
||
|
||
# Записываем метаданные поверх готового PDF через pypdf
|
||
_patch_pdf_meta(out, meta)
|
||
|
||
|
||
def _export_pdf_pillow(images: list[Path], out: Path):
|
||
from PIL import Image
|
||
pil_images = [Image.open(p).convert("RGB") for p in images]
|
||
try:
|
||
if pil_images:
|
||
pil_images[0].save(out, save_all=True, append_images=pil_images[1:], format="PDF")
|
||
finally:
|
||
for img in pil_images:
|
||
img.close()
|
||
|
||
|
||
def _patch_pdf_meta(pdf_path: Path, meta: MangaMeta):
|
||
"""Добавляет /Info и XMP метаданные в PDF через pypdf."""
|
||
try:
|
||
from pypdf import PdfReader, PdfWriter
|
||
import io
|
||
|
||
reader = PdfReader(str(pdf_path))
|
||
writer = PdfWriter()
|
||
writer.append(reader)
|
||
|
||
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
|
||
full_title = (f"{meta.series} — Том {meta.volume}, Глава {ch_num}"
|
||
if meta.volume else f"{meta.series} — Глава {ch_num}")
|
||
if meta.chapter_title:
|
||
full_title += f": {meta.chapter_title}"
|
||
|
||
# Стандартные PDF /Info поля
|
||
writer.add_metadata({
|
||
"/Title": full_title,
|
||
"/Subject": meta.series_full or meta.series,
|
||
"/Creator": "Manga Downloader",
|
||
"/Producer": "Manga Downloader",
|
||
})
|
||
|
||
# XMP-метаданные (Dublin Core + PDF) — Komga читает их при сканировании
|
||
xmp = _build_xmp(meta, full_title)
|
||
writer.add_metadata_xmp(xmp.encode("utf-8"))
|
||
|
||
buf = io.BytesIO()
|
||
writer.write(buf)
|
||
pdf_path.write_bytes(buf.getvalue())
|
||
|
||
except ImportError:
|
||
logger.debug("pypdf не установлен — PDF-метаданные пропущены")
|
||
except Exception as e:
|
||
logger.warning("Ошибка записи PDF-метаданных: {}", e)
|
||
|
||
|
||
def _build_xmp(meta: MangaMeta, full_title: str) -> str:
|
||
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
|
||
return f"""<?xpacket begin='\ufeff' id='W5M0MpCehiHzreSzNTczkc9d'?>
|
||
<x:xmpmeta xmlns:x='adobe:ns:meta/'>
|
||
<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
|
||
<rdf:Description rdf:about=''
|
||
xmlns:dc='http://purl.org/dc/elements/1.1/'
|
||
xmlns:pdf='http://ns.adobe.com/pdf/1.3/'
|
||
xmlns:xmp='http://ns.adobe.com/xap/1.0/'>
|
||
<dc:title><rdf:Alt><rdf:li xml:lang='x-default'>{_xe(full_title)}</rdf:li></rdf:Alt></dc:title>
|
||
<dc:description><rdf:Alt><rdf:li xml:lang='x-default'>{_xe(meta.series_full or meta.series)}</rdf:li></rdf:Alt></dc:description>
|
||
<dc:language><rdf:Bag><rdf:li>{meta.language}</rdf:li></rdf:Bag></dc:language>
|
||
<dc:source>{_xe(meta.source_url)}</dc:source>
|
||
<pdf:Producer>Manga Downloader</pdf:Producer>
|
||
</rdf:Description>
|
||
</rdf:RDF>
|
||
</x:xmpmeta>
|
||
<?xpacket end='w'?>"""
|
||
|
||
|
||
def _xe(s: str) -> str:
|
||
"""Экранирование для XML."""
|
||
return (s.replace("&", "&").replace("<", "<")
|
||
.replace(">", ">").replace('"', """))
|
||
|
||
|
||
# ── EPUB ──────────────────────────────────────
|
||
|
||
def _export_epub(images: list[Path], out: Path, meta: MangaMeta):
|
||
from ebooklib import epub
|
||
|
||
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
|
||
full_title = (f"{meta.series} — Том {meta.volume}, Глава {ch_num}"
|
||
if meta.volume else f"{meta.series} — Глава {ch_num}")
|
||
if meta.chapter_title:
|
||
full_title += f": {meta.chapter_title}"
|
||
|
||
book = epub.EpubBook()
|
||
book.set_identifier(
|
||
f"manga-{meta.series}-v{meta.volume}-ch{meta.number}".replace(" ", "-")
|
||
)
|
||
book.set_title(full_title)
|
||
book.set_language(meta.language)
|
||
|
||
# Dublin Core — серия как subject
|
||
if meta.series:
|
||
book.add_metadata("DC", "subject", meta.series)
|
||
if meta.summary:
|
||
book.add_metadata("DC", "description", meta.summary)
|
||
elif meta.series_full:
|
||
book.add_metadata("DC", "description", meta.series_full)
|
||
if meta.source_url:
|
||
book.add_metadata("DC", "source", meta.source_url)
|
||
|
||
# Calibre-совместимые метаданные серии (читает Komga и большинство читалок)
|
||
book.add_metadata(None, "meta", "", {
|
||
"name": "calibre:series",
|
||
"content": meta.series,
|
||
})
|
||
book.add_metadata(None, "meta", "", {
|
||
"name": "calibre:series_index",
|
||
"content": str(float(meta.number)),
|
||
})
|
||
|
||
# EPUB3 belongs-to-collection (официальный стандарт, Komga ≥ 0.157)
|
||
book.add_metadata(None, "meta", meta.series, {
|
||
"property": "belongs-to-collection",
|
||
"id": "series-id",
|
||
})
|
||
book.add_metadata(None, "meta", "series", {
|
||
"refines": "#series-id",
|
||
"property": "collection-type",
|
||
})
|
||
book.add_metadata(None, "meta", str(float(meta.number)), {
|
||
"refines": "#series-id",
|
||
"property": "group-position",
|
||
})
|
||
|
||
# Если серия завершена — указываем общее количество томов
|
||
if meta.pub_status == "completed" and meta.chapters_total:
|
||
book.add_metadata("DC", "relation",
|
||
f"chapters_total:{meta.chapters_total}")
|
||
|
||
spine = ["nav"]
|
||
toc = []
|
||
|
||
for i, img_path in enumerate(images):
|
||
img_data = img_path.read_bytes()
|
||
img_name = f"images/page_{i:04d}{img_path.suffix}"
|
||
|
||
epub_img = epub.EpubImage()
|
||
epub_img.file_name = img_name
|
||
epub_img.media_type = _mime(img_path.suffix)
|
||
epub_img.content = img_data
|
||
book.add_item(epub_img)
|
||
|
||
page_html = epub.EpubHtml(
|
||
title=f"Страница {i + 1}",
|
||
file_name=f"page_{i:04d}.xhtml",
|
||
lang=meta.language,
|
||
)
|
||
page_html.content = (
|
||
f'<html><body style="margin:0;padding:0;">'
|
||
f'<img src="{img_name}" style="max-width:100%;height:auto;display:block;margin:auto;"/>'
|
||
f'</body></html>'
|
||
)
|
||
book.add_item(page_html)
|
||
spine.append(page_html)
|
||
toc.append(epub.Link(f"page_{i:04d}.xhtml", f"Страница {i + 1}", f"page{i}"))
|
||
|
||
book.toc = toc
|
||
book.spine = spine
|
||
book.add_item(epub.EpubNcx())
|
||
book.add_item(epub.EpubNav())
|
||
|
||
epub.write_epub(str(out), book)
|
||
|
||
|
||
def _mime(ext: str) -> str:
|
||
return {
|
||
".jpg": "image/jpeg",
|
||
".jpeg": "image/jpeg",
|
||
".png": "image/png",
|
||
".webp": "image/webp",
|
||
}.get(ext.lower(), "image/jpeg")
|
||
|
||
|
||
# ── Обновление метаданных в существующих файлах ──
|
||
|
||
def patch_meta(file_path: Path, meta: MangaMeta) -> bool:
|
||
"""
|
||
Обновляет метаданные в уже существующем файле без перескачивания.
|
||
Возвращает True при успехе.
|
||
"""
|
||
suffix = file_path.suffix.lower()
|
||
try:
|
||
if suffix == ".cbz":
|
||
_patch_cbz_meta(file_path, meta)
|
||
elif suffix == ".pdf":
|
||
_patch_pdf_meta(file_path, meta)
|
||
elif suffix == ".epub":
|
||
_patch_epub_meta(file_path, meta)
|
||
else:
|
||
logger.warning("patch_meta: неизвестный формат {}", suffix)
|
||
return False
|
||
return True
|
||
except Exception as e:
|
||
logger.error("patch_meta {}: {}", file_path.name, e)
|
||
return False
|
||
|
||
|
||
def _patch_cbz_meta(cbz_path: Path, meta: MangaMeta):
|
||
"""Заменяет или добавляет ComicInfo.xml в существующем CBZ."""
|
||
import shutil
|
||
tmp = cbz_path.with_suffix(".tmp.cbz")
|
||
try:
|
||
with zipfile.ZipFile(cbz_path, "r") as zin, \
|
||
zipfile.ZipFile(tmp, "w", compression=zipfile.ZIP_DEFLATED) as zout:
|
||
# Сначала ComicInfo.xml
|
||
zout.writestr("ComicInfo.xml", _make_comic_info(meta))
|
||
# Затем все остальные файлы (пропускаем старый ComicInfo.xml если был)
|
||
for item in zin.infolist():
|
||
if item.filename.lower() != "comicinfo.xml":
|
||
zout.writestr(item, zin.read(item.filename))
|
||
shutil.move(str(tmp), str(cbz_path))
|
||
except Exception:
|
||
if tmp.exists():
|
||
tmp.unlink()
|
||
raise
|
||
|
||
|
||
def _patch_epub_meta(epub_path: Path, meta: MangaMeta):
|
||
"""
|
||
Обновляет OPF-метаданные в существующем EPUB.
|
||
Перезаписывает content.opf с новыми dc:* и meta-тегами.
|
||
"""
|
||
import shutil
|
||
import re as _re
|
||
|
||
tmp = epub_path.with_suffix(".tmp.epub")
|
||
try:
|
||
with zipfile.ZipFile(epub_path, "r") as zin, \
|
||
zipfile.ZipFile(tmp, "w", compression=zipfile.ZIP_DEFLATED) as zout:
|
||
|
||
# Находим путь к OPF внутри EPUB
|
||
opf_path = None
|
||
if "META-INF/container.xml" in zin.namelist():
|
||
container_xml = zin.read("META-INF/container.xml").decode("utf-8")
|
||
m = _re.search(r'full-path=["\']([^"\']+\.opf)["\']', container_xml)
|
||
if m:
|
||
opf_path = m.group(1)
|
||
|
||
for item in zin.infolist():
|
||
data = zin.read(item.filename)
|
||
if opf_path and item.filename == opf_path:
|
||
data = _inject_opf_meta(data.decode("utf-8"), meta).encode("utf-8")
|
||
zout.writestr(item, data)
|
||
|
||
shutil.move(str(tmp), str(epub_path))
|
||
except Exception:
|
||
if tmp.exists():
|
||
tmp.unlink()
|
||
raise
|
||
|
||
|
||
def _inject_opf_meta(opf: str, meta: MangaMeta) -> str:
|
||
"""
|
||
Вставляет/заменяет calibre:series и belongs-to-collection в OPF-строку.
|
||
Удаляет старые вхождения и добавляет свежие перед </metadata>.
|
||
"""
|
||
import re as _re
|
||
|
||
# Удаляем старые calibre и belongs-to-collection мета-теги
|
||
opf = _re.sub(
|
||
r'<meta[^>]+(?:calibre:series|belongs-to-collection|collection-type|group-position)[^/]*/?>',
|
||
'', opf, flags=_re.IGNORECASE
|
||
)
|
||
# Удаляем старые refines на series-id
|
||
opf = _re.sub(r'<meta[^>]+refines=["\']#series-id["\'][^/]*/?>',
|
||
'', opf, flags=_re.IGNORECASE)
|
||
|
||
ch_num = int(meta.number) if meta.number == int(meta.number) else meta.number
|
||
new_meta = (
|
||
f'\n <meta name="calibre:series" content="{_xe(meta.series)}"/>'
|
||
f'\n <meta name="calibre:series_index" content="{float(meta.number)}"/>'
|
||
f'\n <meta property="belongs-to-collection" id="series-id">{_xe(meta.series)}</meta>'
|
||
f'\n <meta refines="#series-id" property="collection-type">series</meta>'
|
||
f'\n <meta refines="#series-id" property="group-position">{float(meta.number)}</meta>'
|
||
)
|
||
opf = opf.replace("</metadata>", new_meta + "\n </metadata>")
|
||
return opf
|
||
|
||
|