forked from sweatyeggs69/Bookie
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcovers.py
More file actions
311 lines (262 loc) · 11.2 KB
/
Copy pathcovers.py
File metadata and controls
311 lines (262 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
"""Book cover extraction and management."""
import io
import os
import logging
import zipfile
import re
import xml.etree.ElementTree as ET
from pathlib import Path
from PIL import Image
logger = logging.getLogger(__name__)
COVERS_DIR = Path(os.environ.get("DATA_DIR", "data")) / "covers"
THUMB_SIZE = (300, 450)
COVER_SIZE = (667, 1000)
def ensure_dirs():
COVERS_DIR.mkdir(parents=True, exist_ok=True)
def extract_cover_from_epub(filepath: str) -> bytes | None:
"""Extract cover image from EPUB file."""
try:
with zipfile.ZipFile(filepath, "r") as zf:
# Try OPF-defined cover
opf_path = _find_opf(zf)
if opf_path:
cover_item = _find_cover_in_opf(zf, opf_path)
if cover_item:
return zf.read(cover_item)
# Fallback: look for common cover filenames
for name in zf.namelist():
lower = name.lower()
if any(kw in lower for kw in ("cover", "front")) and lower.endswith(
(".jpg", ".jpeg", ".png", ".gif", ".webp")
):
return zf.read(name)
except Exception as exc:
logger.warning("EPUB cover extraction failed for %s: %s", filepath, exc)
return None
def _find_opf(zf: zipfile.ZipFile) -> str | None:
"""Find the OPF file path from container.xml."""
try:
container = zf.read("META-INF/container.xml").decode("utf-8", errors="replace")
m = re.search(r'full-path="([^"]+\.opf)"', container)
return m.group(1) if m else None
except Exception:
return None
def _find_cover_in_opf(zf: zipfile.ZipFile, opf_path: str) -> str | None:
"""Find cover image href from OPF manifest using XML parsing (no regex)."""
try:
root = ET.fromstring(zf.read(opf_path))
except Exception:
return None
# Determine the OPF namespace prefix (e.g. "{http://www.idpf.org/2007/opf}")
tag = root.tag
ns_prefix = tag[: tag.index("}") + 1] if tag.startswith("{") else ""
manifest = root.find(f"{ns_prefix}manifest")
metadata = root.find(f"{ns_prefix}metadata")
if manifest is None:
return None
# Build id -> href map from all manifest <item> elements
id_to_href: dict[str, str] = {
item.get("id", ""): item.get("href", "")
for item in manifest
if item.get("id") and item.get("href")
}
def _resolve(href: str) -> str | None:
base = str(Path(opf_path).parent)
full = str(Path(base) / href) if base != "." else href
return full if full in zf.namelist() else None
# EPUB3: <item properties="cover-image" .../>
for item in manifest:
if "cover-image" in item.get("properties", ""):
resolved = _resolve(item.get("href", ""))
if resolved:
return resolved
# EPUB2: <meta name="cover" content="item-id"/>
if metadata is not None:
for meta in metadata:
local = meta.tag.split("}")[-1] if "}" in meta.tag else meta.tag
if local == "meta" and meta.get("name", "").lower() == "cover":
item_id = meta.get("content", "")
href = id_to_href.get(item_id, "")
if href:
resolved = _resolve(href)
if resolved:
return resolved
# Fallback: any manifest item whose id contains "cover"
for item_id, href in id_to_href.items():
if "cover" in item_id.lower():
resolved = _resolve(href)
if resolved:
return resolved
return None
def extract_cover_from_pdf(filepath: str) -> bytes | None:
"""Extract first page as cover image from PDF."""
try:
import pypdf
reader = pypdf.PdfReader(filepath)
if reader.pages:
page = reader.pages[0]
for img in page.images:
return img.data
except Exception as exc:
logger.warning("PDF cover extraction failed for %s: %s", filepath, exc)
return None
def extract_cover_from_audio(filepath: str) -> bytes | None:
"""Extract embedded cover image from audio file (MP4, MP3, FLAC, etc.)."""
try:
from mutagen import File
audio = File(filepath)
if audio is None:
return None
# Check MP4 tags (M4B, M4A)
if "covr" in audio:
covr = audio["covr"]
if covr:
return bytes(covr[0])
# Check ID3 APIC frame (MP3)
if hasattr(audio, "tags") and audio.tags:
for key, tag in audio.tags.items():
if key.startswith("APIC") and hasattr(tag, "data"):
return tag.data
# Check FLAC / OGG / etc. pictures
if hasattr(audio, "pictures") and audio.pictures:
return audio.pictures[0].data
except Exception as exc:
logger.warning("Audio cover extraction failed for %s: %s", filepath, exc)
return None
def save_cover(book_id: int, image_data: bytes, fmt: str = "JPEG") -> str | None:
"""Process and save a cover image, returning the filename."""
ensure_dirs()
try:
img = Image.open(io.BytesIO(image_data))
img = img.convert("RGB")
# Save full size
full_path = COVERS_DIR / f"{book_id}.jpg"
img_resized = img.copy()
img_resized.thumbnail(COVER_SIZE, Image.LANCZOS)
img_resized.save(str(full_path), "JPEG", quality=90)
# Save thumbnail
thumb_path = COVERS_DIR / f"{book_id}_thumb.jpg"
img_thumb = img.copy()
img_thumb.thumbnail(THUMB_SIZE, Image.LANCZOS)
img_thumb.save(str(thumb_path), "JPEG", quality=85)
return f"{book_id}.jpg"
except Exception as exc:
logger.warning("Cover save failed for book %s: %s", book_id, exc)
return None
def embed_cover_in_epub(epub_path: str, cover_data: bytes) -> bool:
"""Replace/embed cover image in EPUB file, updating OPF manifest if needed."""
import shutil
import tempfile
import xml.etree.ElementTree as ET
try:
img = Image.open(io.BytesIO(cover_data)).convert("RGB")
buf = io.BytesIO()
img.save(buf, "JPEG", quality=90)
cover_jpeg = buf.getvalue()
tmp_fd, tmp = tempfile.mkstemp(suffix=".epub")
os.close(tmp_fd)
shutil.copy2(epub_path, tmp)
with zipfile.ZipFile(tmp, "r") as zin:
names = zin.namelist()
infos = {n: zin.getinfo(n) for n in names}
contents = {n: zin.read(n) for n in names}
# Find OPF
opf_path = None
try:
container = contents.get("META-INF/container.xml", b"").decode("utf-8", errors="replace")
m = re.search(r'full-path="([^"]+\.opf)"', container)
if m:
opf_path = m.group(1)
except Exception:
pass
# Try to find existing cover item via OPF
cover_item = None
if opf_path and opf_path in contents:
opf_text = contents[opf_path].decode("utf-8", errors="replace")
# Try OPF2 <meta name="cover" content="..."/> (attribute order independent)
meta_m = re.search(
r'<meta\b[^>]*\bname=["\']cover["\'][^>]*\bcontent=["\']([^"\']+)["\']'
r'|<meta\b[^>]*\bcontent=["\']([^"\']+)["\'][^>]*\bname=["\']cover["\']',
opf_text, re.IGNORECASE,
)
if meta_m:
item_id = meta_m.group(1) or meta_m.group(2)
item_m = re.search(
r'<item\b[^>]*\bid=["\']' + re.escape(item_id) + r'["\'][^>]*\bhref=["\']([^"\']+)["\']'
r'|<item\b[^>]*\bhref=["\']([^"\']+)["\'][^>]*\bid=["\']' + re.escape(item_id) + r'["\']',
opf_text,
)
if item_m:
href = item_m.group(1) or item_m.group(2)
base = str(Path(opf_path).parent)
cover_item = (str(Path(base) / href) if base != "." else href)
if not cover_item:
# Try EPUB3 properties="cover-image" (attribute order independent)
prop_m = re.search(
r'<item\b[^>]*\bproperties=["\']cover-image["\'][^>]*\bhref=["\']([^"\']+)["\']'
r'|<item\b[^>]*\bhref=["\']([^"\']+)["\'][^>]*\bproperties=["\']cover-image["\']',
opf_text, re.IGNORECASE,
)
if prop_m:
href = prop_m.group(1) or prop_m.group(2)
base = str(Path(opf_path).parent)
cover_item = (str(Path(base) / href) if base != "." else href)
# Fallback: look for cover-named images in the zip
if not cover_item:
for name in names:
low = name.lower()
if any(k in low for k in ("cover", "front")) and low.endswith((".jpg", ".jpeg", ".png")):
cover_item = name
break
if cover_item and cover_item in contents:
# Replace existing cover bytes
contents[cover_item] = cover_jpeg
elif opf_path and opf_path in contents:
# Add new cover.jpg to zip, update OPF
opf_dir = str(Path(opf_path).parent)
new_cover_zip_path = (str(Path(opf_dir) / "cover.jpg") if opf_dir != "." else "cover.jpg")
contents[new_cover_zip_path] = cover_jpeg
# Update OPF XML
opf_text = contents[opf_path].decode("utf-8", errors="replace")
# Add <meta name="cover" content="bookie-cover"/> to <metadata> if not present
if 'name="cover"' not in opf_text and "name='cover'" not in opf_text:
opf_text = re.sub(
r'(</metadata>)',
' <meta name="cover" content="bookie-cover"/>\n\\1',
opf_text,
count=1,
)
# Add <item> to <manifest> if id not present
if 'id="bookie-cover"' not in opf_text:
opf_text = re.sub(
r'(</manifest>)',
' <item id="bookie-cover" href="cover.jpg" media-type="image/jpeg"/>\n\\1',
opf_text,
count=1,
)
contents[opf_path] = opf_text.encode("utf-8")
else:
# Last resort: just add cover.jpg at root
contents["cover.jpg"] = cover_jpeg
# Write updated zip
with zipfile.ZipFile(epub_path, "w", zipfile.ZIP_DEFLATED) as zout:
for name in names:
zout.writestr(infos[name], contents[name])
# Write any new files added (not in original names)
for name, data in contents.items():
if name not in names:
zout.writestr(name, data)
os.unlink(tmp)
return True
except Exception as exc:
logger.warning("EPUB cover embed failed: %s", exc)
return False
def delete_cover(book_id: int):
for suffix in ("", "_thumb"):
p = COVERS_DIR / f"{book_id}{suffix}.jpg"
if p.exists():
p.unlink()
def get_cover_path(book_id: int, thumb: bool = False) -> Path | None:
suffix = "_thumb" if thumb else ""
p = COVERS_DIR / f"{book_id}{suffix}.jpg"
return p if p.exists() else None