Skip to content

Commit 3219c9a

Browse files
authored
Add checks for surrogate pairing for UTF-8 generation (#1500)
1 parent 74cd91f commit 3219c9a

File tree

2 files changed

+32
-12
lines changed

2 files changed

+32
-12
lines changed

release-notes/VERSION-2.x

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ a pure JSON library.
2626
#1470: Add method `copyCurrentStructureExact()` to `JsonGenerator`
2727
(contributed by Lars H)
2828
#1477: Add `JsonGenerator.has(StreamWriteCapability)` convenience method
29+
#1500: Add checks for surrogate pairing for UTF-8 generation
30+
(fix by @cowtowncoder, w/ Claude code)
2931

3032
2.20.1 (30-Oct-2025)
3133

src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,9 +1528,12 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
15281528
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
15291529
if (combineSurrogates && offset < end) {
15301530
char highSurrogate = (char) ch;
1531-
char lowSurrogate = cbuf[offset++];
1532-
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1533-
continue;
1531+
char lowSurrogate = cbuf[offset];
1532+
if (_isEndOfSurrogatePair(lowSurrogate)) {
1533+
offset++;
1534+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1535+
continue;
1536+
}
15341537
}
15351538
}
15361539
outputPtr = _outputMultiByteChar(ch, outputPtr);
@@ -1576,9 +1579,12 @@ private final void _writeStringSegment2(final String text, int offset, final int
15761579
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
15771580
if (combineSurrogates && offset < end) {
15781581
char highSurrogate = (char) ch;
1579-
char lowSurrogate = text.charAt(offset++);
1580-
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1581-
continue;
1582+
char lowSurrogate = text.charAt(offset);
1583+
if (_isEndOfSurrogatePair(lowSurrogate)) {
1584+
offset++;
1585+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1586+
continue;
1587+
}
15821588
}
15831589
}
15841590
outputPtr = _outputMultiByteChar(ch, outputPtr);
@@ -1752,9 +1758,12 @@ private final void _writeCustomStringSegment2(final char[] cbuf, int offset, fin
17521758
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
17531759
if (combineSurrogates && offset < end) {
17541760
char highSurrogate = (char) ch;
1755-
char lowSurrogate = cbuf[offset++];
1756-
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1757-
continue;
1761+
char lowSurrogate = cbuf[offset];
1762+
if (_isEndOfSurrogatePair(lowSurrogate)) {
1763+
offset++;
1764+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1765+
continue;
1766+
}
17581767
}
17591768
}
17601769
outputPtr = _outputMultiByteChar(ch, outputPtr);
@@ -1819,9 +1828,12 @@ private final void _writeCustomStringSegment2(final String text, int offset, fin
18191828
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
18201829
if (combineSurrogates && offset < end) {
18211830
char highSurrogate = (char) ch;
1822-
char lowSurrogate = text.charAt(offset++);
1823-
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1824-
continue;
1831+
char lowSurrogate = text.charAt(offset);
1832+
if (_isEndOfSurrogatePair(lowSurrogate)) {
1833+
offset++;
1834+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1835+
continue;
1836+
}
18251837
}
18261838
}
18271839
outputPtr = _outputMultiByteChar(ch, outputPtr);
@@ -2291,5 +2303,11 @@ private static boolean _isStartOfSurrogatePair(final int ch) {
22912303
// In 0xD800 - 0xDBFF range?
22922304
return (ch & 0xFC00) == 0xD800;
22932305
}
2306+
2307+
// @since 2.21
2308+
private static boolean _isEndOfSurrogatePair(final int ch) {
2309+
// In 0xDC00 - 0xDFFF range?
2310+
return (ch & 0xFC00) == 0xDC00;
2311+
}
22942312
}
22952313

0 commit comments

Comments
 (0)