Bookie-Plus/covers.py at main · OmegaRa/Bookie-Plus · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
"""Book cover extraction and management."""
import io
import os
import logging
import zipfile
import re
import xml.etree.ElementTree as ET
from pathlib import Path
from PIL import Image

logger = logging.getLogger(__name__)

COVERS_DIR = Path(os.environ.get("DATA_DIR", "data")) / "covers"
THUMB_SIZE = (300, 450)
COVER_SIZE = (667, 1000)


def ensure_dirs():
    COVERS_DIR.mkdir(parents=True, exist_ok=True)


def extract_cover_from_epub(filepath: str) -> bytes | None:
    """Extract cover image from EPUB file."""
    try:
        with zipfile.ZipFile(filepath, "r") as zf:
            # Try OPF-defined cover
            opf_path = _find_opf(zf)
            if opf_path:
                cover_item = _find_cover_in_opf(zf, opf_path)
                if cover_item:
                    return zf.read(cover_item)

            # Fallback: look for common cover filenames
            for name in zf.namelist():
                lower = name.lower()
                if any(kw in lower for kw in ("cover", "front")) and lower.endswith(
                    (".jpg", ".jpeg", ".png", ".gif", ".webp")
                ):
                    return zf.read(name)
    except Exception as exc:
        logger.warning("EPUB cover extraction failed for %s: %s", filepath, exc)
    return None


def _find_opf(zf: zipfile.ZipFile) -> str | None:
    """Find the OPF file path from container.xml."""
    try:
        container = zf.read("META-INF/container.xml").decode("utf-8", errors="replace")
        m = re.search(r'full-path="([^"]+\.opf)"', container)
        return m.group(1) if m else None
    except Exception:
        return None


def _find_cover_in_opf(zf: zipfile.ZipFile, opf_path: str) -> str | None:
    """Find cover image href from OPF manifest using XML parsing (no regex)."""
    try:
        root = ET.fromstring(zf.read(opf_path))
    except Exception:
        return None

    # Determine the OPF namespace prefix (e.g. "{http://www.idpf.org/2007/opf}")
    tag = root.tag
    ns_prefix = tag[: tag.index("}") + 1] if tag.startswith("{") else ""

    manifest = root.find(f"{ns_prefix}manifest")
    metadata = root.find(f"{ns_prefix}metadata")
    if manifest is None:
        return None

    # Build id -> href map from all manifest <item> elements
    id_to_href: dict[str, str] = {
        item.get("id", ""): item.get("href", "")
        for item in manifest
        if item.get("id") and item.get("href")
    }

    def _resolve(href: str) -> str | None:
        base = str(Path(opf_path).parent)
        full = str(Path(base) / href) if base != "." else href
        return full if full in zf.namelist() else None

    # EPUB3: <item properties="cover-image" .../>
    for item in manifest:
        if "cover-image" in item.get("properties", ""):
            resolved = _resolve(item.get("href", ""))
            if resolved:
                return resolved

    # EPUB2: <meta name="cover" content="item-id"/>
    if metadata is not None:
        for meta in metadata:
            local = meta.tag.split("}")[-1] if "}" in meta.tag else meta.tag
            if local == "meta" and meta.get("name", "").lower() == "cover":
                item_id = meta.get("content", "")
                href = id_to_href.get(item_id, "")
                if href:
                    resolved = _resolve(href)
                    if resolved:
                        return resolved

    # Fallback: any manifest item whose id contains "cover"
    for item_id, href in id_to_href.items():
        if "cover" in item_id.lower():
            resolved = _resolve(href)
            if resolved:
                return resolved

    return None


def extract_cover_from_pdf(filepath: str) -> bytes | None:
    """Extract first page as cover image from PDF."""
    try:
        import pypdf

        reader = pypdf.PdfReader(filepath)
        if reader.pages:
            page = reader.pages[0]
            for img in page.images:
                return img.data
    except Exception as exc:
        logger.warning("PDF cover extraction failed for %s: %s", filepath, exc)
    return None


def extract_cover_from_audio(filepath: str) -> bytes | None:
    """Extract embedded cover image from audio file (MP4, MP3, FLAC, etc.)."""
    try:
        from mutagen import File
        audio = File(filepath)
        if audio is None:
            return None

        # Check MP4 tags (M4B, M4A)
        if "covr" in audio:
            covr = audio["covr"]
            if covr:
                return bytes(covr[0])

        # Check ID3 APIC frame (MP3)
        if hasattr(audio, "tags") and audio.tags:
            for key, tag in audio.tags.items():
                if key.startswith("APIC") and hasattr(tag, "data"):
                    return tag.data

        # Check FLAC / OGG / etc. pictures
        if hasattr(audio, "pictures") and audio.pictures:
            return audio.pictures[0].data

    except Exception as exc:
        logger.warning("Audio cover extraction failed for %s: %s", filepath, exc)
    return None


def save_cover(book_id: int, image_data: bytes, fmt: str = "JPEG") -> str | None:
    """Process and save a cover image, returning the filename."""
    ensure_dirs()
    try:
        img = Image.open(io.BytesIO(image_data))
        img = img.convert("RGB")

        # Save full size
        full_path = COVERS_DIR / f"{book_id}.jpg"
        img_resized = img.copy()
        img_resized.thumbnail(COVER_SIZE, Image.LANCZOS)
        img_resized.save(str(full_path), "JPEG", quality=90)

        # Save thumbnail
        thumb_path = COVERS_DIR / f"{book_id}_thumb.jpg"
        img_thumb = img.copy()
        img_thumb.thumbnail(THUMB_SIZE, Image.LANCZOS)
        img_thumb.save(str(thumb_path), "JPEG", quality=85)

        return f"{book_id}.jpg"
    except Exception as exc:
        logger.warning("Cover save failed for book %s: %s", book_id, exc)
        return None


def embed_cover_in_epub(epub_path: str, cover_data: bytes) -> bool:
    """Replace/embed cover image in EPUB file, updating OPF manifest if needed."""
    import shutil
    import tempfile
    import xml.etree.ElementTree as ET

    try:
        img = Image.open(io.BytesIO(cover_data)).convert("RGB")
        buf = io.BytesIO()
        img.save(buf, "JPEG", quality=90)
        cover_jpeg = buf.getvalue()

        tmp_fd, tmp = tempfile.mkstemp(suffix=".epub")
        os.close(tmp_fd)
        shutil.copy2(epub_path, tmp)

        with zipfile.ZipFile(tmp, "r") as zin:
            names = zin.namelist()
            infos = {n: zin.getinfo(n) for n in names}
            contents = {n: zin.read(n) for n in names}

        # Find OPF
        opf_path = None
        try:
            container = contents.get("META-INF/container.xml", b"").decode("utf-8", errors="replace")
            m = re.search(r'full-path="([^"]+\.opf)"', container)
            if m:
                opf_path = m.group(1)
        except Exception:
            pass

        # Try to find existing cover item via OPF
        cover_item = None
        if opf_path and opf_path in contents:
            opf_text = contents[opf_path].decode("utf-8", errors="replace")
            # Try OPF2 <meta name="cover" content="..."/> (attribute order independent)
            meta_m = re.search(
                r'<meta\b[^>]*\bname=["\']cover["\'][^>]*\bcontent=["\']([^"\']+)["\']'
                r'|<meta\b[^>]*\bcontent=["\']([^"\']+)["\'][^>]*\bname=["\']cover["\']',
                opf_text, re.IGNORECASE,
            )
            if meta_m:
                item_id = meta_m.group(1) or meta_m.group(2)
                item_m = re.search(
                    r'<item\b[^>]*\bid=["\']' + re.escape(item_id) + r'["\'][^>]*\bhref=["\']([^"\']+)["\']'
                    r'|<item\b[^>]*\bhref=["\']([^"\']+)["\'][^>]*\bid=["\']' + re.escape(item_id) + r'["\']',
                    opf_text,
                )
                if item_m:
                    href = item_m.group(1) or item_m.group(2)
                    base = str(Path(opf_path).parent)
                    cover_item = (str(Path(base) / href) if base != "." else href)
            if not cover_item:
                # Try EPUB3 properties="cover-image" (attribute order independent)
                prop_m = re.search(
                    r'<item\b[^>]*\bproperties=["\']cover-image["\'][^>]*\bhref=["\']([^"\']+)["\']'
                    r'|<item\b[^>]*\bhref=["\']([^"\']+)["\'][^>]*\bproperties=["\']cover-image["\']',
                    opf_text, re.IGNORECASE,
                )
                if prop_m:
                    href = prop_m.group(1) or prop_m.group(2)
                    base = str(Path(opf_path).parent)
                    cover_item = (str(Path(base) / href) if base != "." else href)

        # Fallback: look for cover-named images in the zip
        if not cover_item:
            for name in names:
                low = name.lower()
                if any(k in low for k in ("cover", "front")) and low.endswith((".jpg", ".jpeg", ".png")):
                    cover_item = name
                    break

        if cover_item and cover_item in contents:
            # Replace existing cover bytes
            contents[cover_item] = cover_jpeg
        elif opf_path and opf_path in contents:
            # Add new cover.jpg to zip, update OPF
            opf_dir = str(Path(opf_path).parent)
            new_cover_zip_path = (str(Path(opf_dir) / "cover.jpg") if opf_dir != "." else "cover.jpg")
            contents[new_cover_zip_path] = cover_jpeg

            # Update OPF XML
            opf_text = contents[opf_path].decode("utf-8", errors="replace")
            # Add <meta name="cover" content="bookie-cover"/> to <metadata> if not present
            if 'name="cover"' not in opf_text and "name='cover'" not in opf_text:
                opf_text = re.sub(
                    r'(</metadata>)',
                    '  <meta name="cover" content="bookie-cover"/>\n\\1',
                    opf_text,
                    count=1,
                )
            # Add <item> to <manifest> if id not present
            if 'id="bookie-cover"' not in opf_text:
                opf_text = re.sub(
                    r'(</manifest>)',
                    '  <item id="bookie-cover" href="cover.jpg" media-type="image/jpeg"/>\n\\1',
                    opf_text,
                    count=1,
                )
            contents[opf_path] = opf_text.encode("utf-8")
        else:
            # Last resort: just add cover.jpg at root
            contents["cover.jpg"] = cover_jpeg

        # Write updated zip
        with zipfile.ZipFile(epub_path, "w", zipfile.ZIP_DEFLATED) as zout:
            for name in names:
                zout.writestr(infos[name], contents[name])
            # Write any new files added (not in original names)
            for name, data in contents.items():
                if name not in names:
                    zout.writestr(name, data)

        os.unlink(tmp)
        return True
    except Exception as exc:
        logger.warning("EPUB cover embed failed: %s", exc)
        return False


def delete_cover(book_id: int):
    for suffix in ("", "_thumb"):
        p = COVERS_DIR / f"{book_id}{suffix}.jpg"
        if p.exists():
            p.unlink()


def get_cover_path(book_id: int, thumb: bool = False) -> Path | None:
    suffix = "_thumb" if thumb else ""
    p = COVERS_DIR / f"{book_id}{suffix}.jpg"
    return p if p.exists() else None