From 91ba3d670a5fe9515dc610749f9a027dedb5357e Mon Sep 17 00:00:00 2001 From: Tim Allison Date: Tue, 28 Jan 2025 11:41:47 -0500 Subject: [PATCH] TIKA-4361 -- follow on fix (#2108) (cherry picked from commit 05db89db3d24c13ee8794999c18af3a0b9a094e5) --- .../parser/microsoft/rtf/TextExtractor.java | 69 +++++++++++-------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java index 5d57ca3620..1fe173259f 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java @@ -941,27 +941,32 @@ private void processControlWord(int param, PushbackInputStream in) } else { // In document if (equals("b")) { - //TIKA-4361 -- need to make sure we're not in an href? // b0 assert param == 0; - if (groupState.bold) { - pushText(); - if (groupState.italic) { - end("i"); - } - end("b"); - if (groupState.italic) { - start("i"); + //only modify styles if we're not in a hyperlink + if (fieldState == 0) { + if (groupState.bold) { + pushText(); + if (groupState.italic) { + end("i"); + } + end("b"); + if (groupState.italic) { + start("i"); + } + groupState.bold = false; } - groupState.bold = false; } } else if (equals("i")) { // i0 assert param == 0; - if (groupState.italic) { - pushText(); - end("i"); - groupState.italic = false; + //only modify styles if we're not in a hyperlink + if (fieldState == 0) { + if (groupState.italic) { + pushText(); + end("i"); + groupState.italic = false; + } } } else if (equals("f")) { // Change current font @@ -1174,23 +1179,27 @@ private void processControlWord() throws IOException, SAXException, TikaExceptio inHeader = false; } } else { - if (equals("b")) { - if (!groupState.bold) { - pushText(); - lazyStartParagraph(); - if (groupState.italic) { - // Make sure nesting is always - end("i"); + //only modify styles if we're not in a hyperlink + if (fieldState == 0) { + if (equals("b")) { + if (!groupState.bold) { + pushText(); + lazyStartParagraph(); + if (groupState.italic) { + // Make sure nesting is always + end("i"); + } + groupState.bold = true; + startStyles(groupState); + } + } else if (equals("i")) { + //START I + if (!groupState.italic) { + pushText(); + lazyStartParagraph(); + groupState.italic = true; + start("i"); } - groupState.bold = true; - startStyles(groupState); - } - } else if (equals("i")) { - if (!groupState.italic) { - pushText(); - lazyStartParagraph(); - groupState.italic = true; - start("i"); } } }