Skip to content

Commit 0bdfe9f

Browse files
authored
Merge pull request #33 from klaari/master
Add ImageMetadataBuffer helper for partial‐file XMP buffering
2 parents e33f5fb + c6a3597 commit 0bdfe9f

File tree

3 files changed

+506
-0
lines changed

3 files changed

+506
-0
lines changed

src/Buffer/ImageMetadataBuffer.php

Lines changed: 336 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
<?php
2+
3+
namespace CSD\Image\Buffer;
4+
5+
class ImageMetadataBuffer
6+
{
7+
const JPEG_SOI = "\xFF\xD8"; // Start of Image (SOI) marker
8+
const JPEG_SOS = "\xFF\xDA"; // Start of Scan (SOS) marker
9+
const JPEG_EOI = "\xFF\xD9"; // End of Image (EOI) marker
10+
const JPEG_APP1_MARKER = "\xFF\xE1"; // APP1 segment marker (where EXIF/XMP live)
11+
const JPEG_APP1_XMP_HEADER = "http://ns.adobe.com/xap/1.0/\x00";
12+
13+
const RIFF_SIGNATURE = 'RIFF'; // first four bytes of any RIFF container
14+
const WEBP_SIGNATURE = 'WEBP'; // four bytes after RIFF, in a WebP file
15+
const PNG_SIGNATURE = "\x89PNG\x0D\x0A\x1A\x0A"; // first eight bytes of any PNG file
16+
const PNG_ITXT_XMP_KEYWORD = "XML:com.adobe.xmp\x00";
17+
18+
19+
/**
20+
* Given a URL or local path, buffer just enough bytes (PNG chunks,
21+
* JPEG segments, or WebP sub‐chunks) to include any XMP metadata.
22+
* If fopen() fails, fall back to file_get_contents()
23+
* and return the entire file as a buffer.
24+
*
25+
* @param string $url
26+
* @return string|null
27+
*/
28+
public static function bufferUpThroughXmp($url)
29+
{
30+
$stream = @fopen($url, 'rb');
31+
if (! $stream) {
32+
$all = @file_get_contents($url);
33+
if ($all === false) {
34+
return null;
35+
}
36+
return $all === '' ? null : $all;
37+
}
38+
39+
// 2) Peek the first 12 bytes to sniff format
40+
$peek = fread($stream, 12);
41+
if ($peek === false || strlen($peek) < 2) {
42+
fclose($stream);
43+
return null;
44+
}
45+
46+
if (substr($peek, 0, 8) === self::PNG_SIGNATURE) {
47+
fclose($stream);
48+
$stream = @fopen($url, 'rb');
49+
if (! $stream) {
50+
return null;
51+
}
52+
$buf = self::bufferPngUpToXmp($stream);
53+
fclose($stream);
54+
return $buf;
55+
}
56+
57+
if (substr($peek, 0, 2) === self::JPEG_SOI) {
58+
fclose($stream);
59+
$stream = @fopen($url, 'rb');
60+
if (! $stream) {
61+
return null;
62+
}
63+
$buf = self::bufferJpegUpToXmp($stream);
64+
fclose($stream);
65+
return $buf;
66+
}
67+
68+
if (
69+
strlen($peek) >= 12
70+
&& substr($peek, 0, 4) === self::RIFF_SIGNATURE
71+
&& substr($peek, 8, 4) === self::WEBP_SIGNATURE
72+
) {
73+
fclose($stream);
74+
$stream = @fopen($url, 'rb');
75+
if (! $stream) {
76+
return null;
77+
}
78+
$buf = self::bufferWebpUpToXmp($stream);
79+
fclose($stream);
80+
return $buf;
81+
}
82+
83+
// Unknown format: just read entire file
84+
fclose($stream);
85+
$stream = @fopen($url, 'rb');
86+
if (! $stream) {
87+
return null;
88+
}
89+
$all = stream_get_contents($stream);
90+
if ($all === false) {
91+
fclose($stream);
92+
return null;
93+
}
94+
fclose($stream);
95+
return $all === '' ? null : $all;
96+
}
97+
98+
/**
99+
* Buffer a PNG chunk‐by‐chunk until we fully read an iTXt whose
100+
* data begins with "XML:com.adobe.xmp\x00", and then read through
101+
* the IEND chunk before stopping.
102+
*
103+
* @param resource $stream Opened PNG stream in binary mode
104+
* @return string|null A byte‐buffer containing: signature → iTXt(XMP) → IEND (or null on error)
105+
*/
106+
private static function bufferPngUpToXmp($stream)
107+
{
108+
// 1) Read and verify the 8‐byte PNG signature
109+
$sig = fread($stream, 8);
110+
if ($sig === false || strlen($sig) < 8) {
111+
return null;
112+
}
113+
if ($sig !== self::PNG_SIGNATURE) {
114+
return null;
115+
}
116+
117+
$buffer = $sig;
118+
$foundXmp = false;
119+
120+
while (true) {
121+
// 2) Read the next chunk's length+type (8 bytes)
122+
$hdr = fread($stream, 8);
123+
if ($hdr === false || strlen($hdr) < 8) {
124+
// EOF or truncated; return what we have so far
125+
break;
126+
}
127+
$buffer .= $hdr;
128+
129+
// Parse length (4 bytes BE) and chunk type (4 bytes ASCII)
130+
$u = @unpack('Nlength/a4type', $hdr);
131+
if ($u === false || ! isset($u['length'], $u['type'])) {
132+
// Invalid header, bail
133+
break;
134+
}
135+
$length = (int) $u['length'];
136+
$type = $u['type'];
137+
138+
// 3) If this is IEND, read its 4‐byte CRC, append, and stop
139+
if ($type === 'IEND') {
140+
$crc = fread($stream, 4);
141+
if ($crc !== false && strlen($crc) === 4) {
142+
$buffer .= $crc;
143+
}
144+
break;
145+
}
146+
147+
// 4) Otherwise, read payload + 4‐byte CRC
148+
$toRead = $length + 4;
149+
if ($toRead > 0) {
150+
$chunkData = fread($stream, $toRead);
151+
if ($chunkData === false || strlen($chunkData) < $toRead) {
152+
// Truncated payload; append whatever we got and bail
153+
$buffer .= ($chunkData ?: '');
154+
break;
155+
}
156+
$buffer .= $chunkData;
157+
} else {
158+
$chunkData = '';
159+
}
160+
161+
// 5) If this is an iTXt chunk and it begins with the XMP keyword, mark $foundXmp
162+
if ($type === 'iTXt') {
163+
$data = substr($chunkData, 0, $length);
164+
if (strpos($data, self::PNG_ITXT_XMP_KEYWORD) === 0) {
165+
$foundXmp = true;
166+
// Do NOT break yet – we still need to read through IEND
167+
}
168+
}
169+
170+
// 6) If we've seen XMP‐tagged iTXt, continue looping until we hit IEND above
171+
}
172+
173+
return $buffer;
174+
}
175+
176+
/**
177+
* Buffer a JPEG segment‐by‐segment until we have:
178+
* 1) Fully read the APP1-XMP chunk (so $foundXmp = true),
179+
* 2) Fully read the first SOF segment (so $sawSOF = true),
180+
* 3) Then append an EOI marker (0xFFD9) and break.
181+
*
182+
* If we encounter SOS (0xFFDA) or EOI (0xFFD9) before capturing both,
183+
* we break anyway, because no more headers exist.
184+
*
185+
* @param resource $stream
186+
* @return string|null
187+
*/
188+
private static function bufferJpegUpToXmp($stream)
189+
{
190+
// 1) Read SOI (2 bytes). Must be 0xFFD8.
191+
$soi = fread($stream, 2);
192+
if ($soi === false || strlen($soi) < 2 || $soi !== self::JPEG_SOI) {
193+
return null;
194+
}
195+
196+
$buffer = $soi;
197+
$foundXmp = false;
198+
$sawSOF = false;
199+
200+
while (true) {
201+
$marker = fread($stream, 2);
202+
if ($marker === false || strlen($marker) < 2) {
203+
// EOF or truncated
204+
break;
205+
}
206+
$buffer .= $marker;
207+
208+
// If SOS (0xFFDA) or EOI (0xFFD9) appear before we've captured both flags,
209+
// break anyway (no more headers).
210+
if ($marker === self::JPEG_SOS) {
211+
if (! ($foundXmp && $sawSOF)) {
212+
break;
213+
}
214+
// Both flags true, we’ll append EOI and stop.
215+
break;
216+
}
217+
if ($marker === self::JPEG_EOI) {
218+
break;
219+
}
220+
221+
$lenBytes = fread($stream, 2);
222+
if ($lenBytes === false || strlen($lenBytes) < 2) {
223+
break;
224+
}
225+
$buffer .= $lenBytes;
226+
227+
$un = @unpack('nsegmentLength', $lenBytes);
228+
if ($un === false || ! isset($un['segmentLength'])) {
229+
break;
230+
}
231+
$segLen = (int) $un['segmentLength'];
232+
$payloadLen = $segLen - 2;
233+
234+
if ($payloadLen > 0) {
235+
$payload = fread($stream, $payloadLen);
236+
if ($payload === false || strlen($payload) < $payloadLen) {
237+
$buffer .= ($payload ?: '');
238+
break;
239+
}
240+
$buffer .= $payload;
241+
} else {
242+
$payload = '';
243+
}
244+
245+
// If this marker is APP1 (0xFFE1), check for XMP header
246+
if ($marker === self::JPEG_APP1_MARKER) {
247+
$xmpHeader = self::JPEG_APP1_XMP_HEADER;
248+
if (strncmp($payload, $xmpHeader, strlen($xmpHeader)) === 0) {
249+
$foundXmp = true;
250+
}
251+
}
252+
253+
// Check if this is a SOF marker (0xFFC0,0xFFC1,0xFFC2,…)
254+
$secondByte = ord($marker[1]);
255+
$isSOF = in_array($secondByte, [
256+
0xC0, 0xC1, 0xC2, 0xC3,
257+
0xC5, 0xC6, 0xC7,
258+
0xC9, 0xCA, 0xCB,
259+
0xCD, 0xCE, 0xCF,
260+
], true);
261+
if ($isSOF) {
262+
$sawSOF = true;
263+
}
264+
265+
// If we now have both APP1-XMP and SOF, stop reading further segments.
266+
if ($foundXmp && $sawSOF) {
267+
break;
268+
}
269+
}
270+
271+
// We've captured XMP and SOF (if present).
272+
// Append EOI (0xFFD9) so that fromStream() won't unpack an empty marker.
273+
$buffer .= self::JPEG_EOI;
274+
275+
return $buffer;
276+
}
277+
278+
/**
279+
* Buffer a WebP RIFF sub‐chunk by sub‐chunk until we find "XMP " or "EXIF",
280+
* then stop. If none, buffer until EOF.
281+
*
282+
* @param resource $stream
283+
* @return string|null
284+
*/
285+
private static function bufferWebpUpToXmp($stream)
286+
{
287+
$riffHdr = fread($stream, 12);
288+
if ($riffHdr === false || strlen($riffHdr) < 12) {
289+
return null;
290+
}
291+
if (substr($riffHdr, 0, 4) !== self::RIFF_SIGNATURE || substr($riffHdr, 8, 4) !== self::WEBP_SIGNATURE) {
292+
return null;
293+
}
294+
295+
$buffer = $riffHdr;
296+
297+
while (true) {
298+
$hdr = fread($stream, 8);
299+
if ($hdr === false || strlen($hdr) < 8) {
300+
break;
301+
}
302+
$buffer .= $hdr;
303+
304+
$type = substr($hdr, 0, 4);
305+
$sizeLE = substr($hdr, 4, 4);
306+
307+
$un = @unpack('VchunkSize', $sizeLE);
308+
if ($un === false || ! isset($un['chunkSize'])) {
309+
break;
310+
}
311+
$chunkSize = (int) $un['chunkSize'];
312+
313+
if ($chunkSize > 0) {
314+
$data = fread($stream, $chunkSize);
315+
if ($data === false || strlen($data) < $chunkSize) {
316+
$buffer .= ($data ?: '');
317+
break;
318+
}
319+
$buffer .= $data;
320+
321+
if ($chunkSize % 2 !== 0) {
322+
$pad = fread($stream, 1);
323+
if ($pad !== false && strlen($pad) === 1) {
324+
$buffer .= $pad;
325+
}
326+
}
327+
}
328+
329+
if ($type === 'XMP ' || $type === 'EXIF') {
330+
break;
331+
}
332+
}
333+
334+
return $buffer;
335+
}
336+
}

0 commit comments

Comments
 (0)