Skip to content

Commit b65ecf8

Browse files
committed
turn off regex \X preprocessing
1 parent ff27e51 commit b65ecf8

File tree

1 file changed

+30
-30
lines changed

1 file changed

+30
-30
lines changed

src/main/java/org/perlonjava/regex/RegexPreprocessor.java

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -296,31 +296,30 @@ private static int handleCharacterClass(String s, boolean flag_xx, StringBuilder
296296
// System.out.println("\n\n");
297297
// }
298298

299-
private static String generateGraphemeClusterRegex() {
300-
return "(?x: # Free-spacing mode\n" +
301-
" # Basic grapheme cluster\n" +
302-
" \\P{M}\\p{M}*\n" +
303-
" |\n" +
304-
305-
" \\uD83D\\uDC4B\\uD83C\\uDFFB" + // Special case
306-
" |\n" +
307-
" \\uD83C \\uDDFA \\uD83C \\uDDF8" + // Special case
308-
" |\n" +
309-
310-
" # Regional indicators for flags\n" +
311-
" (?:[\uD83C][\uDDE6-\uDDFF]){2}\n" +
312-
" |\n" +
313-
" # Emoji with modifiers and ZWJ sequences\n" +
314-
" (?:[\uD83C-\uDBFF\uDC00-\uDFFF]|[\u2600-\u27BF])\n" +
315-
" (?:[\uD83C][\uDFFB-\uDFFF])?\n" +
316-
" (?:\u200D\n" +
317-
" (?:[\uD83C-\uDBFF\uDC00-\uDFFF]|[\u2600-\u27BF])\n" +
318-
" (?:[\uD83C][\uDFFB-\uDFFF])?\n" +
319-
" )*\n" +
320-
" (?:[\uFE00-\uFE0F])?\n" +
321-
")";
322-
}
323-
299+
// private static String generateGraphemeClusterRegex() {
300+
// return "(?x: # Free-spacing mode\n" +
301+
// " # Basic grapheme cluster\n" +
302+
// " \\P{M}\\p{M}*\n" +
303+
// " |\n" +
304+
//
305+
// " \\uD83D\\uDC4B\\uD83C\\uDFFB" + // Special case
306+
// " |\n" +
307+
// " \\uD83C \\uDDFA \\uD83C \\uDDF8" + // Special case
308+
// " |\n" +
309+
//
310+
// " # Regional indicators for flags\n" +
311+
// " (?:[\uD83C][\uDDE6-\uDDFF]){2}\n" +
312+
// " |\n" +
313+
// " # Emoji with modifiers and ZWJ sequences\n" +
314+
// " (?:[\uD83C-\uDBFF\uDC00-\uDFFF]|[\u2600-\u27BF])\n" +
315+
// " (?:[\uD83C][\uDFFB-\uDFFF])?\n" +
316+
// " (?:\u200D\n" +
317+
// " (?:[\uD83C-\uDBFF\uDC00-\uDFFF]|[\u2600-\u27BF])\n" +
318+
// " (?:[\uD83C][\uDFFB-\uDFFF])?\n" +
319+
// " )*\n" +
320+
// " (?:[\uFE00-\uFE0F])?\n" +
321+
// ")";
322+
// }
324323

325324
private static int handleEscapeSequences(String s, StringBuilder sb, int c, int offset) {
326325
sb.append(Character.toChars(c));
@@ -334,11 +333,12 @@ private static int handleEscapeSequences(String s, StringBuilder sb, int c, int
334333
// Note: \Q .. \E sequences are handled separately, in escapeQ()
335334

336335
char nextChar = s.charAt(offset);
337-
if (nextChar == 'X') {
338-
// Translate \X to a Java-compatible grapheme cluster pattern
339-
sb.setLength(sb.length() - 1); // Remove the backslash
340-
sb.append(generateGraphemeClusterRegex());
341-
} else if (nextChar == 'g' && offset + 1 < length && s.charAt(offset + 1) == '{') {
336+
// if (nextChar == 'X') {
337+
// // Translate \X to a Java-compatible grapheme cluster pattern
338+
// sb.setLength(sb.length() - 1); // Remove the backslash
339+
// sb.append(generateGraphemeClusterRegex());
340+
// } else
341+
if (nextChar == 'g' && offset + 1 < length && s.charAt(offset + 1) == '{') {
342342
// Handle \g{name} backreference
343343
offset += 2; // Skip past \g{
344344
int endBrace = s.indexOf('}', offset);

0 commit comments

Comments
 (0)