manga/analyze_speed.py

import sqlite3
from datetime import datetime
from collections import defaultdict

conn = sqlite3.connect('/app/state/progress.db')
conn.row_factory = sqlite3.Row

rows = conn.execute('''
    SELECT h.created_at, h.chapter_number, h.volume, m.title, h.manga_url
    FROM history h
    LEFT JOIN mangas m ON h.manga_url = m.url
    WHERE h.event_type IN ("downloaded","auto_downloaded")
    ORDER BY h.created_at
''').fetchall()

if not rows:
    print("История пуста")
    conn.close()
    exit()

times = [datetime.fromisoformat(r["created_at"]) for r in rows]
total_dur = (times[-1] - times[0]).total_seconds()

print("=== ОБЩАЯ СТАТИСТИКА ===")
print(f"Глав скачано:     {len(rows)}")
print(f"Период:           {times[0].strftime('%d.%m %H:%M:%S')} — {times[-1].strftime('%d.%m %H:%M:%S')}")
print(f"Общее время:      {total_dur/3600:.2f} ч ({total_dur/60:.0f} мин)")
print(f"Средняя скорость: {len(rows)/(total_dur/60):.2f} глав/мин  ({total_dur/len(rows):.1f} сек/глава)")

# --- По мангам ---
print("\n=== ПО МАНГАМ ===")
by_manga = defaultdict(list)
for i, r in enumerate(rows):
    by_manga[r["manga_url"]].append(times[i])

for url, ts in sorted(by_manga.items(), key=lambda x: x[1][0]):
    title = next((r["title"] for r in rows if r["manga_url"] == url and r["title"]), url[-40:])
    dur = (ts[-1] - ts[0]).total_seconds() if len(ts) > 1 else 0
    rate = len(ts) / (dur / 60) if dur > 0 else 0
    print(f"  {(title or url)[:38]:38}  {len(ts):4d} гл  {dur/60:5.0f} мин  {rate:.2f} гл/мин")

# --- По часам ---
print("\n=== ГЛАВЫ ПО ЧАСАМ ===")
by_hour = defaultdict(int)
for t in times:
    by_hour[t.strftime('%d.%m %H:00')] += 1
for hour, cnt in sorted(by_hour.items()):
    bar = '█' * min(cnt, 60)
    print(f"  {hour}  {cnt:4d} глав  {bar}")

# --- Паузы > 5 мин ---
print("\n=== ПАУЗЫ > 5 МИН (между главами) ===")
big_gaps = []
for i in range(len(times) - 1):
    sec = (times[i+1] - times[i]).total_seconds()
    if sec > 300:
        big_gaps.append((times[i], times[i+1], sec, rows[i]["title"], rows[i+1]["title"]))

if big_gaps:
    for t1, t2, sec, m1, m2 in big_gaps:
        same = (rows[big_gaps.index((t1,t2,sec,m1,m2)) if False else 0])
        label = f"{(m1 or '')[:20]} → {(m2 or '')[:20]}" if m1 != m2 else (m1 or "")[:40]
        print(f"  {t1.strftime('%H:%M:%S')} → {t2.strftime('%H:%M:%S')}  {sec/60:5.1f} мин  [{label}]")
else:
    print("  Пауз > 5 мин не обнаружено")

# --- Скорость по 10-мин окнам ---
print("\n=== СКОРОСТЬ ПО 10-МИНУТНЫМ ОКНАМ ===")
window = 10 * 60
bucket_start = times[0]
bucket_count = 0
windows = []
for t in times:
    if (t - bucket_start).total_seconds() < window:
        bucket_count += 1
    else:
        elapsed = (t - bucket_start).total_seconds()
        windows.append((bucket_start, bucket_count, elapsed))
        bucket_start = t
        bucket_count = 1
if bucket_count:
    windows.append((bucket_start, bucket_count, (times[-1] - bucket_start).total_seconds() or 1))

max_cnt = max(w[1] for w in windows) if windows else 1
for ws, cnt, elapsed in windows:
    rate = cnt / (elapsed / 60) if elapsed > 0 else 0
    bar_len = int(cnt / max_cnt * 40)
    bar = '▓' * bar_len + '░' * (40 - bar_len)
    print(f"  {ws.strftime('%H:%M')}  {cnt:3d} гл  {rate:4.1f}/мин  |{bar}|")

# --- Перцентили интервалов ---
gaps_sec = sorted((times[i+1] - times[i]).total_seconds() for i in range(len(times)-1))
if gaps_sec:
    n = len(gaps_sec)
    print(f"\n=== ИНТЕРВАЛЫ МЕЖДУ ГЛАВАМИ ===")
    print(f"  Минимум:  {gaps_sec[0]:.1f} сек")
    print(f"  Медиана:  {gaps_sec[n//2]:.1f} сек")
    print(f"  P90:      {gaps_sec[int(n*0.9)]:.1f} сек")
    print(f"  P99:      {gaps_sec[int(n*0.99)]:.1f} сек")
    print(f"  Максимум: {gaps_sec[-1]:.1f} сек  ({gaps_sec[-1]/60:.1f} мин)")
    over_2min = sum(1 for g in gaps_sec if g > 120)
    over_5min = sum(1 for g in gaps_sec if g > 300)
    print(f"  > 2 мин:  {over_2min} ({over_2min/n*100:.1f}%)")
    print(f"  > 5 мин:  {over_5min} ({over_5min/n*100:.1f}%)")

conn.close()