From 7b5505c4ef00fe254d91b5062b5e7a5361f6ab1a Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Thu, 24 Jul 2025 02:24:30 +0200 Subject: [PATCH 01/18] fixes #12810: Implement escaping for keyword separators (FKA #12888) - Addresses unresolved previous reviews: https://github.com/JabRef/jabref/pull/12888#discussion_r2030256080 --- .../org/jabref/model/entry/KeywordList.java | 38 ++++++++++++++++--- .../jabref/model/entry/KeywordListTest.java | 37 ++++++++++++++++++ 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index 76adb9add8d..248c699104b 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -8,7 +8,6 @@ import java.util.List; import java.util.Objects; import java.util.Set; -import java.util.StringTokenizer; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -52,13 +51,40 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara Objects.requireNonNull(hierarchicalDelimiter); KeywordList keywordList = new KeywordList(); + List hierarchy = new ArrayList<>(); + StringBuilder currentToken = new StringBuilder(); + boolean isEscaping = false; + + for (int i = 0; i < keywordString.length(); i++) { + char currentChar = keywordString.charAt(i); + + if (isEscaping) { + currentToken.append(currentChar); + isEscaping = false; + } else if (currentChar == '\\') { + // Escape next character (i) + isEscaping = true; + } else if (currentChar == hierarchicalDelimiter) { + // Hierarchical delimiter reached: push current token as sublevel + hierarchy.add(currentToken.toString().trim()); + currentToken.setLength(0); + } else if (currentChar == delimiter) { + // Keyword delimiter reached: push current token and then flush full hierarchy as keyword + hierarchy.add(currentToken.toString().trim()); + currentToken.setLength(0); + keywordList.add(Keyword.of(hierarchy.toArray(new String[0]))); + hierarchy.clear(); + } else { + currentToken.append(currentChar); + } + } - StringTokenizer tok = new StringTokenizer(keywordString, delimiter.toString()); - while (tok.hasMoreTokens()) { - String chain = tok.nextToken(); - Keyword chainRoot = Keyword.of(chain.split(hierarchicalDelimiter.toString())); - keywordList.add(chainRoot); + // Handle the final token + if (!currentToken.isEmpty() || !hierarchy.isEmpty()) { + hierarchy.add(currentToken.toString().trim()); + keywordList.add(Keyword.of(hierarchy.toArray(new String[0]))); } + return keywordList; } diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java index 033d2c26615..bed7090fd3e 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java @@ -115,4 +115,41 @@ void mergeTwoDistinctKeywordsShouldReturnTheTwoKeywordsMerged() { void mergeTwoListsOfKeywordsShouldReturnTheKeywordsMerged() { assertEquals(new KeywordList("Figma", "Adobe", "JabRef", "Eclipse", "JetBrains"), KeywordList.merge("Figma, Adobe, JetBrains, Eclipse", "Adobe, JabRef", ',')); } + + @Test + void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword() { + assertEquals(new KeywordList("keyword,one", "keywordTwo"), + KeywordList.parse("keyword\\,one, keywordTwo", ',', '>')); + } + + @Test + void parseKeywordWithEscapedDelimiterAtEndPreservesDelimiter() { + assertEquals(new KeywordList("keywordOne,", "keywordTwo"), + KeywordList.parse("keywordOne\\,, keywordTwo", ',', '>')); + } + + @Test + void parseKeywordWithEscapedBackslashTreatsItAsLiteral() { + assertEquals(new KeywordList("keyword\\", "keywordTwo"), + KeywordList.parse("keyword\\\\, keywordTwo", ',', '>')); + } + + @Test + void parseKeywordWithEscapedDelimiterAndHierarchicalDelimiterCreatesHierarchy() { + Keyword expected = Keyword.of("keyword,one", "sub"); + assertEquals(new KeywordList(expected), + KeywordList.parse("keyword\\,one > sub", ',', '>')); + } + + @Test + void parseKeywordWithMultipleEscapedDelimitersTreatsThemAsLiteral() { + assertEquals(new KeywordList("one,two,three", "four"), + KeywordList.parse("one\\,two\\,three, four", ',', '>')); + } + + @Test + void parseKeywordWithTrailingEscapeCharacterTreatsItAsLiteralBackslash() { + assertEquals(new KeywordList("keywordOne\\"), + KeywordList.parse("keywordOne\\\\", ',', '>')); + } } From 44257add994a7b56fff72ae5667c542cf025bf05 Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Thu, 24 Jul 2025 04:16:30 +0200 Subject: [PATCH 02/18] fixes #12810: Implement escaping for keyword separators (FKA #12888) - Addresses unresolved previous reviews: https://github.com/JabRef/jabref/pull/12888#discussion_r2030256080 - Adds CHANGELOG.md entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67855ca84f8..5c8e5e33613 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv ### Added +- We added escaping characters `\ ,`for KeyworList.parse(). [#12810](https://github.com/JabRef/jabref/issues/12810) - We added focus on the field Link in the "Add file link" dialog. [#13486](https://github.com/JabRef/jabref/issues/13486) - We introduced a settings parameter to manage citations' relations local storage time-to-live with a default value set to 30 days. [#11189](https://github.com/JabRef/jabref/issues/11189) - We distribute arm64 images for Linux. [#10842](https://github.com/JabRef/jabref/issues/10842) From e6c52dd94f1e26ba520e46805559e62f80041baf Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Thu, 24 Jul 2025 04:17:14 +0200 Subject: [PATCH 03/18] fixes #12810: Implement escaping for keyword separators (FKA #12888) - Addresses unresolved previous reviews: https://github.com/JabRef/jabref/pull/12888#discussion_r2030256080 - Adds CHANGELOG.md entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c8e5e33613..4807e5e8c2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv ### Added -- We added escaping characters `\ ,`for KeyworList.parse(). [#12810](https://github.com/JabRef/jabref/issues/12810) +- We added escaping characters `\ ,`for KeywordList.parse(). [#12810](https://github.com/JabRef/jabref/issues/12810) - We added focus on the field Link in the "Add file link" dialog. [#13486](https://github.com/JabRef/jabref/issues/13486) - We introduced a settings parameter to manage citations' relations local storage time-to-live with a default value set to 30 days. [#11189](https://github.com/JabRef/jabref/issues/11189) - We distribute arm64 images for Linux. [#10842](https://github.com/JabRef/jabref/issues/10842) From d2ad73d57d8e6ff67c68734b5e5c7f9d30dcb7fb Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Thu, 24 Jul 2025 13:42:26 +0200 Subject: [PATCH 04/18] removes obvious comment, improves CHANGELOG message --- CHANGELOG.md | 2 +- jablib/src/main/java/org/jabref/model/entry/KeywordList.java | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4807e5e8c2c..aa6744b59ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv ### Added -- We added escaping characters `\ ,`for KeywordList.parse(). [#12810](https://github.com/JabRef/jabref/issues/12810) +- We implemented escaping characters `\ ,` when parsing KeywordLists [#12810](https://github.com/JabRef/jabref/issues/12810) - We added focus on the field Link in the "Add file link" dialog. [#13486](https://github.com/JabRef/jabref/issues/13486) - We introduced a settings parameter to manage citations' relations local storage time-to-live with a default value set to 30 days. [#11189](https://github.com/JabRef/jabref/issues/11189) - We distribute arm64 images for Linux. [#10842](https://github.com/JabRef/jabref/issues/10842) diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index 248c699104b..33bb77b3aba 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -62,7 +62,6 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara currentToken.append(currentChar); isEscaping = false; } else if (currentChar == '\\') { - // Escape next character (i) isEscaping = true; } else if (currentChar == hierarchicalDelimiter) { // Hierarchical delimiter reached: push current token as sublevel From 49285369e4ed79162c2780f116f8848b629d4382 Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Thu, 24 Jul 2025 14:42:10 +0200 Subject: [PATCH 05/18] removes obvious comments --- jablib/src/main/java/org/jabref/model/entry/KeywordList.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index 33bb77b3aba..febc1e01349 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -64,11 +64,9 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara } else if (currentChar == '\\') { isEscaping = true; } else if (currentChar == hierarchicalDelimiter) { - // Hierarchical delimiter reached: push current token as sublevel hierarchy.add(currentToken.toString().trim()); currentToken.setLength(0); } else if (currentChar == delimiter) { - // Keyword delimiter reached: push current token and then flush full hierarchy as keyword hierarchy.add(currentToken.toString().trim()); currentToken.setLength(0); keywordList.add(Keyword.of(hierarchy.toArray(new String[0]))); From 1111794d1869e43eea3467c34caab1f9c2a416b2 Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Thu, 24 Jul 2025 16:36:12 +0200 Subject: [PATCH 06/18] tackle List.of() review comment --- jablib/src/main/java/org/jabref/model/entry/KeywordList.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index febc1e01349..3f3df25ecb6 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -51,7 +51,7 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara Objects.requireNonNull(hierarchicalDelimiter); KeywordList keywordList = new KeywordList(); - List hierarchy = new ArrayList<>(); + List hierarchy = new ArrayList<>(List.of()); StringBuilder currentToken = new StringBuilder(); boolean isEscaping = false; From 83198c0d2b30409c1c65cafc55d71ff58f948f2e Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Thu, 24 Jul 2025 16:40:43 +0200 Subject: [PATCH 07/18] undo tackle List.of() review comment --- jablib/src/main/java/org/jabref/model/entry/KeywordList.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index 3f3df25ecb6..febc1e01349 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -51,7 +51,7 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara Objects.requireNonNull(hierarchicalDelimiter); KeywordList keywordList = new KeywordList(); - List hierarchy = new ArrayList<>(List.of()); + List hierarchy = new ArrayList<>(); StringBuilder currentToken = new StringBuilder(); boolean isEscaping = false; From 1b455d351ac03c017d9835cb8b3deb465617c81b Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Tue, 29 Jul 2025 18:07:18 +0200 Subject: [PATCH 08/18] adds tests after review --- .../jabref/model/entry/KeywordListTest.java | 63 +++++++++---------- .../org/jabref/model/entry/KeywordTest.java | 13 ++++ 2 files changed, 44 insertions(+), 32 deletions(-) diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java index bed7090fd3e..89603fa18e6 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java @@ -1,7 +1,13 @@ package org.jabref.model.entry; +import java.util.stream.Stream; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -16,6 +22,17 @@ void setUp() { keywords.add("keywordTwo"); } + private static Stream provideParseKeywordCases() { + return Stream.of( + Arguments.of("keyword\\,one, keywordTwo", new KeywordList("keyword,one", "keywordTwo")), + Arguments.of("keywordOne\\,, keywordTwo", new KeywordList("keywordOne,", "keywordTwo")), + Arguments.of("keyword\\\\, keywordTwo", new KeywordList("keyword\\", "keywordTwo")), + Arguments.of("keyword\\,one > sub", new KeywordList(Keyword.of("keyword,one", "sub"))), + Arguments.of("one\\,two\\,three, four", new KeywordList("one,two,three", "four")), + Arguments.of("keywordOne\\\\", new KeywordList("keywordOne\\")) + ); + } + @Test void parseEmptyStringReturnsEmptyList() { assertEquals(new KeywordList(), KeywordList.parse("", ',')); @@ -116,40 +133,22 @@ void mergeTwoListsOfKeywordsShouldReturnTheKeywordsMerged() { assertEquals(new KeywordList("Figma", "Adobe", "JabRef", "Eclipse", "JetBrains"), KeywordList.merge("Figma, Adobe, JetBrains, Eclipse", "Adobe, JabRef", ',')); } - @Test - void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword() { - assertEquals(new KeywordList("keyword,one", "keywordTwo"), - KeywordList.parse("keyword\\,one, keywordTwo", ',', '>')); - } - - @Test - void parseKeywordWithEscapedDelimiterAtEndPreservesDelimiter() { - assertEquals(new KeywordList("keywordOne,", "keywordTwo"), - KeywordList.parse("keywordOne\\,, keywordTwo", ',', '>')); - } - - @Test - void parseKeywordWithEscapedBackslashTreatsItAsLiteral() { - assertEquals(new KeywordList("keyword\\", "keywordTwo"), - KeywordList.parse("keyword\\\\, keywordTwo", ',', '>')); - } - - @Test - void parseKeywordWithEscapedDelimiterAndHierarchicalDelimiterCreatesHierarchy() { - Keyword expected = Keyword.of("keyword,one", "sub"); - assertEquals(new KeywordList(expected), - KeywordList.parse("keyword\\,one > sub", ',', '>')); + @ParameterizedTest + @MethodSource("provideParseKeywordCases") + void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword(String input, KeywordList expected) { + assertEquals(expected, KeywordList.parse(input, ',', '>')); } - @Test - void parseKeywordWithMultipleEscapedDelimitersTreatsThemAsLiteral() { - assertEquals(new KeywordList("one,two,three", "four"), - KeywordList.parse("one\\,two\\,three, four", ',', '>')); - } + @ParameterizedTest + @ValueSource(strings = { + "Keyword > Keyword", + "Keyword \\> Keyword" + }) + void roundTripPreservesStructure(String original) { + KeywordList parsed = KeywordList.parse(original, ',', '>'); + String serialized = parsed.toString(); // wraps Keyword#getSubchainAsString + KeywordList reparsed = KeywordList.parse(serialized, ',', '>'); - @Test - void parseKeywordWithTrailingEscapeCharacterTreatsItAsLiteralBackslash() { - assertEquals(new KeywordList("keywordOne\\"), - KeywordList.parse("keywordOne\\\\", ',', '>')); + assertEquals(parsed.toString(), reparsed.toString()); } } diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java index 2651ba514f8..9ef40895360 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java @@ -4,6 +4,8 @@ import java.util.Set; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -25,4 +27,15 @@ void getAllSubchainsAsStringForSimpleChain() { assertEquals(expected, keywordChain.getAllSubchainsAsString('>')); } + + @ParameterizedTest + @ValueSource(strings = { + "Keyword > Keyword", + "Keyword \\> Keyword" + }) + void getSubchainAsString(String input) { + Keyword keyword = new Keyword(input); + String result = keyword.toString(); // wraps Keyword#getSubchainAsString + assertEquals(input, result); + } } From 2b85d0c5bceb9916d5c621269c29404eb23d6510 Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Tue, 29 Jul 2025 18:12:17 +0200 Subject: [PATCH 09/18] adds trag-bot changes --- .../src/test/java/org/jabref/model/entry/KeywordListTest.java | 2 +- jablib/src/test/java/org/jabref/model/entry/KeywordTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java index 89603fa18e6..826df1b7fe2 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java @@ -146,7 +146,7 @@ void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword(String input, KeywordLi }) void roundTripPreservesStructure(String original) { KeywordList parsed = KeywordList.parse(original, ',', '>'); - String serialized = parsed.toString(); // wraps Keyword#getSubchainAsString + String serialized = parsed.toString(); KeywordList reparsed = KeywordList.parse(serialized, ',', '>'); assertEquals(parsed.toString(), reparsed.toString()); diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java index 9ef40895360..7cc30cfa5e4 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java @@ -35,7 +35,7 @@ void getAllSubchainsAsStringForSimpleChain() { }) void getSubchainAsString(String input) { Keyword keyword = new Keyword(input); - String result = keyword.toString(); // wraps Keyword#getSubchainAsString + String result = keyword.toString(); assertEquals(input, result); } } From d15f2ef84c8af33c42d3dd2aeff2558e62c9ac00 Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Wed, 30 Jul 2025 13:30:00 +0200 Subject: [PATCH 10/18] - extends Keyword#toString to ensure round-trip integrity - extends unitTests --- .../java/org/jabref/model/entry/Keyword.java | 8 +++++- .../jabref/model/entry/KeywordListTest.java | 12 +++------ .../org/jabref/model/entry/KeywordTest.java | 26 +++++++++++++------ 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/Keyword.java b/jablib/src/main/java/org/jabref/model/entry/Keyword.java index d3df45becca..3afe13ae70c 100644 --- a/jablib/src/main/java/org/jabref/model/entry/Keyword.java +++ b/jablib/src/main/java/org/jabref/model/entry/Keyword.java @@ -79,7 +79,13 @@ private void addAtEnd(String keyword) { * E.g., calling {@link #getSubchainAsString(Character)} on the node "B" in "A > B > C" returns "B > C". */ private String getSubchainAsString(Character hierarchicalDelimiter) { - return keyword + + // Undoing escaping and delimiter handling done at parsing ensures round-trip integrity + String escapedKeyword = keyword + .replace("\\", "\\\\") + .replace(hierarchicalDelimiter.toString(), "\\" + hierarchicalDelimiter) + .replace(",", "\\,"); + + return escapedKeyword + getChild().map(child -> " " + hierarchicalDelimiter + " " + child.getSubchainAsString(hierarchicalDelimiter)) .orElse(""); } diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java index 826df1b7fe2..29aa9ba7d34 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java @@ -7,7 +7,6 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; -import org.junit.jupiter.params.provider.ValueSource; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -140,15 +139,10 @@ void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword(String input, KeywordLi } @ParameterizedTest - @ValueSource(strings = { - "Keyword > Keyword", - "Keyword \\> Keyword" - }) + @MethodSource("provideParseKeywordCases") void roundTripPreservesStructure(String original) { KeywordList parsed = KeywordList.parse(original, ',', '>'); - String serialized = parsed.toString(); - KeywordList reparsed = KeywordList.parse(serialized, ',', '>'); - - assertEquals(parsed.toString(), reparsed.toString()); + // We need to test the toString() functionality + assertEquals(original, parsed.toString()); } } diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java index 7cc30cfa5e4..c6050a879ba 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordTest.java @@ -2,15 +2,28 @@ import java.util.HashSet; import java.util.Set; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.junit.jupiter.api.Assertions.assertEquals; class KeywordTest { + private static Stream provideParseKeywordCases() { + return Stream.of( + Arguments.of("keyword\\,one"), + Arguments.of("keywordOne\\,"), + Arguments.of("keyword\\\\"), + Arguments.of("keyword\\,one > sub"), + Arguments.of("one\\,two > three"), + Arguments.of("keywordOne\\\\") + ); + } + @Test void getPathFromRootAsStringForSimpleChain() { Keyword keywordChain = Keyword.of("A", "B", "C"); @@ -29,13 +42,10 @@ void getAllSubchainsAsStringForSimpleChain() { } @ParameterizedTest - @ValueSource(strings = { - "Keyword > Keyword", - "Keyword \\> Keyword" - }) + @MethodSource("provideParseKeywordCases") void getSubchainAsString(String input) { - Keyword keyword = new Keyword(input); - String result = keyword.toString(); - assertEquals(input, result); + Keyword keyword = KeywordList.parse(input, ',', '>').get(0); + // we are testing toString() functionality + assertEquals(input, keyword.toString()); } } From 02c252d871f7a9b52bde7498b481edc991e47db2 Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Wed, 30 Jul 2025 13:39:51 +0200 Subject: [PATCH 11/18] - improves Changelog message --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03a5c34bb1f..f1294c13679 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv ### Added -- We implemented escaping characters `\ ,` when parsing KeywordLists [#12810](https://github.com/JabRef/jabref/issues/12810) +- We implemented escaping of delimiters (`\`, `,`, `>`) when parsing KeywordLists [#12810](https://github.com/JabRef/jabref/issues/12810) - We added automatic lookup of DOI at citation information. [#13561](https://github.com/JabRef/jabref/issues/13561) - We added a field for the citation count field on the General tab. [#13477](https://github.com/JabRef/jabref/issues/13477) - We added automatic lookup of DOI at citation relations [#13234](https://github.com/JabRef/jabref/issues/13234) From ea690d618c685ebb69dde5cc03c5c56df58fdbee Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Wed, 30 Jul 2025 17:56:35 +0200 Subject: [PATCH 12/18] - removes autoscaping on keyword#toString -> KeywordListTest#roundTripPreservesStructure fails --- jablib/src/main/java/org/jabref/model/entry/Keyword.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/Keyword.java b/jablib/src/main/java/org/jabref/model/entry/Keyword.java index 3afe13ae70c..d3df45becca 100644 --- a/jablib/src/main/java/org/jabref/model/entry/Keyword.java +++ b/jablib/src/main/java/org/jabref/model/entry/Keyword.java @@ -79,13 +79,7 @@ private void addAtEnd(String keyword) { * E.g., calling {@link #getSubchainAsString(Character)} on the node "B" in "A > B > C" returns "B > C". */ private String getSubchainAsString(Character hierarchicalDelimiter) { - // Undoing escaping and delimiter handling done at parsing ensures round-trip integrity - String escapedKeyword = keyword - .replace("\\", "\\\\") - .replace(hierarchicalDelimiter.toString(), "\\" + hierarchicalDelimiter) - .replace(",", "\\,"); - - return escapedKeyword + + return keyword + getChild().map(child -> " " + hierarchicalDelimiter + " " + child.getSubchainAsString(hierarchicalDelimiter)) .orElse(""); } From 4c5181b7eacd51827bcf4829a3caa420c262477a Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Sun, 3 Aug 2025 22:18:35 +0200 Subject: [PATCH 13/18] - removes autoscaping on keyword#toString -> KeywordListTest#roundTripPreservesStructure fails --- .../java/org/jabref/model/entry/Keyword.java | 18 ++++++++++++++++-- .../org/jabref/model/entry/KeywordList.java | 4 ++-- .../jabref/model/entry/KeywordListTest.java | 4 ++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/Keyword.java b/jablib/src/main/java/org/jabref/model/entry/Keyword.java index d3df45becca..e266feb6e37 100644 --- a/jablib/src/main/java/org/jabref/model/entry/Keyword.java +++ b/jablib/src/main/java/org/jabref/model/entry/Keyword.java @@ -2,6 +2,8 @@ import java.util.Objects; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -13,6 +15,7 @@ */ public class Keyword extends ChainNode implements Comparable { + // Note: {@link org.jabref.model.entry.KeywordList#parse(java.lang.String, java.lang.Character, java.lang.Character) offers configuration, which is not available here public static Character DEFAULT_HIERARCHICAL_DELIMITER = '>'; private final String keyword; @@ -78,12 +81,23 @@ private void addAtEnd(String keyword) { * Returns a text representation of the subchain starting at this item. * E.g., calling {@link #getSubchainAsString(Character)} on the node "B" in "A > B > C" returns "B > C". */ - private String getSubchainAsString(Character hierarchicalDelimiter) { - return keyword + + private String getSubchainAsString(Character hierarchicalDelimiter) {return getEscaped(hierarchicalDelimiter) + getChild().map(child -> " " + hierarchicalDelimiter + " " + child.getSubchainAsString(hierarchicalDelimiter)) .orElse(""); } + /** + * Returns the keyword string with all unescaped occurrences of the given hierarchical delimiter escaped. + * This ensures that delimiters within keyword values are not misinterpreted as separators. + */ + // TODO: This method needs refactoring, Expected :keyword\,one > sub + // Actual :keyword,one ----> it is eating the delimiter up + public String getEscaped(Character hierarchicalDelimiter) { + String escapedDelimiter = Pattern.quote(String.valueOf(hierarchicalDelimiter)); + Pattern pattern = Pattern.compile("(? keywords, Character delimiter) { - return keywords.stream().map(Keyword::get).collect(Collectors.joining(delimiter.toString())); + return keywords.stream().map(keyword -> keyword.getEscaped(delimiter)).collect(Collectors.joining(delimiter.toString())); } public static KeywordList merge(String keywordStringA, String keywordStringB, Character delimiter) { diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java index 29aa9ba7d34..78d2b28cc86 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java @@ -138,6 +138,10 @@ void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword(String input, KeywordLi assertEquals(expected, KeywordList.parse(input, ',', '>')); } + // TODO: We need to redefine the roundtrip test depending on the context GUI or BibTex, + // we want the user to type in escaping character but see the "clean" String as in: + // keyword1\,keyword2, keyword3 --> "keyword1,keyword2", "keyword3" + // how is the .bib parser handling this? will there be escaping characters at all? @ParameterizedTest @MethodSource("provideParseKeywordCases") void roundTripPreservesStructure(String original) { From ffeb78de85321d8f818b8ef50b405e340f510704 Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Wed, 6 Aug 2025 18:15:34 +0200 Subject: [PATCH 14/18] Working on new approach for parse/serielide depending on context (UI/BibTex) --- jablib/src/main/java/org/jabref/model/entry/BibEntry.java | 3 ++- jablib/src/main/java/org/jabref/model/entry/Keyword.java | 4 +++- jablib/src/main/java/org/jabref/model/entry/KeywordList.java | 5 ++++- .../test/java/org/jabref/model/entry/KeywordListTest.java | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java index 9710500f104..ca7ac6e9d95 100644 --- a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java +++ b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java @@ -884,6 +884,7 @@ public KeywordList getKeywords(Character delimiter) { public KeywordList getResolvedKeywords(Character delimiter, BibDatabase database) { Optional keywordsContent = getResolvedFieldOrAlias(StandardField.KEYWORDS, database); + // TODO: we need to implement new KeywordList.parse for BibTeX context parsing return keywordsContent.map(content -> KeywordList.parse(content, delimiter)).orElse(new KeywordList()); } @@ -1052,7 +1053,7 @@ public KeywordList getFieldAsKeywords(Field field, Character keywordSeparator) { return storedList.get(); } } - + // TODO: We need to implement new KeywordList.parse() method for bibtexContext parsing KeywordList keywords = getField(field) .map(content -> KeywordList.parse(content, keywordSeparator)) .orElse(new KeywordList()); diff --git a/jablib/src/main/java/org/jabref/model/entry/Keyword.java b/jablib/src/main/java/org/jabref/model/entry/Keyword.java index e266feb6e37..e67ec2d222e 100644 --- a/jablib/src/main/java/org/jabref/model/entry/Keyword.java +++ b/jablib/src/main/java/org/jabref/model/entry/Keyword.java @@ -81,7 +81,8 @@ private void addAtEnd(String keyword) { * Returns a text representation of the subchain starting at this item. * E.g., calling {@link #getSubchainAsString(Character)} on the node "B" in "A > B > C" returns "B > C". */ - private String getSubchainAsString(Character hierarchicalDelimiter) {return getEscaped(hierarchicalDelimiter) + + private String getSubchainAsString(Character hierarchicalDelimiter) { + return keyword + getChild().map(child -> " " + hierarchicalDelimiter + " " + child.getSubchainAsString(hierarchicalDelimiter)) .orElse(""); } @@ -101,6 +102,7 @@ public String getEscaped(Character hierarchicalDelimiter) { /** * Gets the keyword of this node in the chain. */ + public String get() { return keyword; } diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index d59197087ad..464400579ca 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -42,6 +42,8 @@ public KeywordList(Keyword... keywordChains) { this(Arrays.asList(keywordChains)); } + // TODO: #12810: we need a different parse method for BibTeX Context, + // which preserves escaping and autoescaping public static KeywordList parse(String keywordString, Character delimiter, Character hierarchicalDelimiter) { if (StringUtil.isBlank(keywordString)) { return new KeywordList(); @@ -58,7 +60,7 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara for (int i = 0; i < keywordString.length(); i++) { char currentChar = keywordString.charAt(i); - if (isEscaping) { + if (isEscaping && currentChar == delimiter) { // we only escape the keyword delimiter currentToken.append(currentChar); isEscaping = false; } else if (currentChar == '\\') { @@ -96,6 +98,7 @@ public static KeywordList parse(String keywordString, Character delimiter) { return parse(keywordString, delimiter, Keyword.DEFAULT_HIERARCHICAL_DELIMITER); } + // TODO: this will be the method we will use for BibTeX Context serializing -> escaping and autoescaping public static String serialize(List keywords, Character delimiter) { return keywords.stream().map(keyword -> keyword.getEscaped(delimiter)).collect(Collectors.joining(delimiter.toString())); } diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java index 78d2b28cc86..3b9d7fa7903 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java @@ -142,7 +142,7 @@ void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword(String input, KeywordLi // we want the user to type in escaping character but see the "clean" String as in: // keyword1\,keyword2, keyword3 --> "keyword1,keyword2", "keyword3" // how is the .bib parser handling this? will there be escaping characters at all? - @ParameterizedTest + // @ParameterizedTest @MethodSource("provideParseKeywordCases") void roundTripPreservesStructure(String original) { KeywordList parsed = KeywordList.parse(original, ',', '>'); From 634d3022c2ab31beeb4150c3463b8c3a3b17c5d0 Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Wed, 13 Aug 2025 22:44:10 +0200 Subject: [PATCH 15/18] Working on new approach for parse/serielide depending on context (UI/BibTex) --- .../org/jabref/model/entry/KeywordList.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index 464400579ca..a63203da655 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -8,6 +8,7 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.StringTokenizer; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -98,6 +99,25 @@ public static KeywordList parse(String keywordString, Character delimiter) { return parse(keywordString, delimiter, Keyword.DEFAULT_HIERARCHICAL_DELIMITER); } + public static KeywordList bibTeXparse(String keywordString, Character delimiter, Character hierarchicalDelimiter) { + if (StringUtil.isBlank(keywordString)) { + return new KeywordList(); + } + + Objects.requireNonNull(delimiter); + Objects.requireNonNull(hierarchicalDelimiter); + + KeywordList keywordList = new KeywordList(); + + StringTokenizer tok = new StringTokenizer(keywordString, delimiter.toString()); + while (tok.hasMoreTokens()) { + String chain = tok.nextToken(); + Keyword chainRoot = Keyword.of(chain.split(hierarchicalDelimiter.toString())); + keywordList.add(chainRoot); + } + return keywordList; + } + // TODO: this will be the method we will use for BibTeX Context serializing -> escaping and autoescaping public static String serialize(List keywords, Character delimiter) { return keywords.stream().map(keyword -> keyword.getEscaped(delimiter)).collect(Collectors.joining(delimiter.toString())); From 42710fa14b33f23b8893e1817a534dbfe5a4e85d Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Thu, 14 Aug 2025 00:53:37 +0200 Subject: [PATCH 16/18] WIP: adds old KeyWordList#parse as #bibtexParse --- jablib/src/main/java/org/jabref/model/entry/BibEntry.java | 2 +- jablib/src/main/java/org/jabref/model/entry/KeywordList.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java index ad48131eef3..f7cfd7e9f7f 100644 --- a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java +++ b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java @@ -1054,7 +1054,7 @@ public KeywordList getFieldAsKeywords(Field field, Character keywordSeparator) { } // TODO: We need to implement new KeywordList.parse() method for bibtexContext parsing KeywordList keywords = getField(field) - .map(content -> KeywordList.parse(content, keywordSeparator)) + .map(content -> KeywordList.bibteXparse(content, keywordSeparator, Keyword.DEFAULT_HIERARCHICAL_DELIMITER)) .orElse(new KeywordList()); if (field instanceof StandardField standardField) { diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index a63203da655..f2f0163957a 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -99,7 +99,7 @@ public static KeywordList parse(String keywordString, Character delimiter) { return parse(keywordString, delimiter, Keyword.DEFAULT_HIERARCHICAL_DELIMITER); } - public static KeywordList bibTeXparse(String keywordString, Character delimiter, Character hierarchicalDelimiter) { + public static KeywordList bibteXparse(String keywordString, Character delimiter, Character hierarchicalDelimiter) { if (StringUtil.isBlank(keywordString)) { return new KeywordList(); } From af3c50e7b2b185cad83a7a80d1f55cc099d47f71 Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Sun, 24 Aug 2025 22:18:44 +0200 Subject: [PATCH 17/18] WIP: adds KeywordList#bibtexSerialize (autoescapes delimiter). Needs more and better tests. --- .../java/org/jabref/model/entry/BibEntry.java | 7 ++--- .../java/org/jabref/model/entry/Keyword.java | 23 ++++++++------- .../org/jabref/model/entry/KeywordList.java | 29 ++++--------------- 3 files changed, 20 insertions(+), 39 deletions(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java index f7cfd7e9f7f..64fa3d2e706 100644 --- a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java +++ b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java @@ -842,7 +842,7 @@ public Optional putKeywords(KeywordList keywords, Character delimit } // Set new keyword field - String newValue = keywords.getAsString(delimiter); + String newValue = keywords.bibtexSerialize(delimiter); return this.setField(StandardField.KEYWORDS, newValue); } @@ -883,7 +883,6 @@ public KeywordList getKeywords(Character delimiter) { public KeywordList getResolvedKeywords(Character delimiter, BibDatabase database) { Optional keywordsContent = getResolvedFieldOrAlias(StandardField.KEYWORDS, database); - // TODO: we need to implement new KeywordList.parse for BibTeX context parsing return keywordsContent.map(content -> KeywordList.parse(content, delimiter)).orElse(new KeywordList()); } @@ -1052,9 +1051,7 @@ public KeywordList getFieldAsKeywords(Field field, Character keywordSeparator) { return storedList.get(); } } - // TODO: We need to implement new KeywordList.parse() method for bibtexContext parsing - KeywordList keywords = getField(field) - .map(content -> KeywordList.bibteXparse(content, keywordSeparator, Keyword.DEFAULT_HIERARCHICAL_DELIMITER)) + KeywordList keywords = getField(field).map(content -> KeywordList.parse(content, keywordSeparator)) .orElse(new KeywordList()); if (field instanceof StandardField standardField) { diff --git a/jablib/src/main/java/org/jabref/model/entry/Keyword.java b/jablib/src/main/java/org/jabref/model/entry/Keyword.java index e67ec2d222e..2d2a1839767 100644 --- a/jablib/src/main/java/org/jabref/model/entry/Keyword.java +++ b/jablib/src/main/java/org/jabref/model/entry/Keyword.java @@ -2,8 +2,6 @@ import java.util.Objects; import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -87,17 +85,20 @@ private String getSubchainAsString(Character hierarchicalDelimiter) { .orElse(""); } - /** - * Returns the keyword string with all unescaped occurrences of the given hierarchical delimiter escaped. + /* + * Used for BibTex export, where we need to escape the delimiter with \ + */ + public String getSubchainAsStringWithEscaping(Character delimiter) { + return getEscaped(delimiter) + + getChild().map(child -> " " + DEFAULT_HIERARCHICAL_DELIMITER + " " + child.getSubchainAsStringWithEscaping(DEFAULT_HIERARCHICAL_DELIMITER)) + .orElse(""); + } + + /* * This ensures that delimiters within keyword values are not misinterpreted as separators. */ - // TODO: This method needs refactoring, Expected :keyword\,one > sub - // Actual :keyword,one ----> it is eating the delimiter up - public String getEscaped(Character hierarchicalDelimiter) { - String escapedDelimiter = Pattern.quote(String.valueOf(hierarchicalDelimiter)); - Pattern pattern = Pattern.compile("(? keywords, Character delimiter) { + return keywords.stream().map(Keyword::get).collect(Collectors.joining(delimiter.toString())); } - // TODO: this will be the method we will use for BibTeX Context serializing -> escaping and autoescaping - public static String serialize(List keywords, Character delimiter) { - return keywords.stream().map(keyword -> keyword.getEscaped(delimiter)).collect(Collectors.joining(delimiter.toString())); + // This method serializes Keywords supporting escaping of the delimiter for BibTeX Serialization (Issue #12810, #12532) + public String bibtexSerialize(Character delimiter) { + return keywordChains.stream().map(keyword -> keyword.getSubchainAsStringWithEscaping(delimiter)).collect(Collectors.joining(delimiter.toString() + " ")); } + public static KeywordList merge(String keywordStringA, String keywordStringB, Character delimiter) { KeywordList keywordListA = parse(keywordStringA, delimiter); KeywordList keywordListB = parse(keywordStringB, delimiter); From e955fd1d7d9218e16d4515825a7f15363f05182e Mon Sep 17 00:00:00 2001 From: miguel-cordoba Date: Sun, 31 Aug 2025 23:38:57 +0200 Subject: [PATCH 18/18] WIP: extends KeywordList#parse and #bibtexSerialize --- .../org/jabref/model/entry/KeywordList.java | 29 +++++++++++++++++-- .../jabref/model/entry/KeywordListTest.java | 7 ++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java index 01d2d9dc7d1..1be493fc192 100644 --- a/jablib/src/main/java/org/jabref/model/entry/KeywordList.java +++ b/jablib/src/main/java/org/jabref/model/entry/KeywordList.java @@ -8,6 +8,7 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.StringTokenizer; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -20,6 +21,7 @@ public class KeywordList implements Iterable { private final List keywordChains; + private boolean spaceAfterDelimiter; public KeywordList() { keywordChains = new ArrayList<>(); @@ -55,6 +57,8 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara StringBuilder currentToken = new StringBuilder(); boolean isEscaping = false; + keywordList.spaceAfterDelimiter = keywordString.contains(delimiter + " "); + for (int i = 0; i < keywordString.length(); i++) { char currentChar = keywordString.charAt(i); @@ -85,6 +89,26 @@ public static KeywordList parse(String keywordString, Character delimiter, Chara return keywordList; } + public static KeywordList oldParse(String keywordString, Character delimiter, Character hierarchicalDelimiter) { + if (StringUtil.isBlank(keywordString)) { + return new KeywordList(); + } + + Objects.requireNonNull(delimiter); + Objects.requireNonNull(hierarchicalDelimiter); + + KeywordList keywordList = new KeywordList(); + keywordList.spaceAfterDelimiter = keywordString.contains(delimiter + " "); + + StringTokenizer tok = new StringTokenizer(keywordString, delimiter.toString()); + while (tok.hasMoreTokens()) { + String chain = tok.nextToken(); + Keyword chainRoot = Keyword.of(chain.split(hierarchicalDelimiter.toString())); + keywordList.add(chainRoot); + } + return keywordList; + } + /** * Parses the keyword list and uses {@link Keyword#DEFAULT_HIERARCHICAL_DELIMITER} as hierarchical delimiter. * @@ -102,10 +126,11 @@ public static String serialize(List keywords, Character delimiter) { // This method serializes Keywords supporting escaping of the delimiter for BibTeX Serialization (Issue #12810, #12532) public String bibtexSerialize(Character delimiter) { - return keywordChains.stream().map(keyword -> keyword.getSubchainAsStringWithEscaping(delimiter)).collect(Collectors.joining(delimiter.toString() + " ")); + // If the keywords contain ", " (as in PubMed records) we keep the space. + String joiner = spaceAfterDelimiter ? delimiter + " " : delimiter.toString(); + return keywordChains.stream().map(keyword -> keyword.getSubchainAsStringWithEscaping(delimiter)).collect(Collectors.joining(joiner)); } - public static KeywordList merge(String keywordStringA, String keywordStringB, Character delimiter) { KeywordList keywordListA = parse(keywordStringA, delimiter); KeywordList keywordListB = parse(keywordStringB, delimiter); diff --git a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java index 3b9d7fa7903..5e53c5fe672 100644 --- a/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java +++ b/jablib/src/test/java/org/jabref/model/entry/KeywordListTest.java @@ -141,12 +141,11 @@ void parseKeywordWithEscapedDelimiterDoesNotSplitKeyword(String input, KeywordLi // TODO: We need to redefine the roundtrip test depending on the context GUI or BibTex, // we want the user to type in escaping character but see the "clean" String as in: // keyword1\,keyword2, keyword3 --> "keyword1,keyword2", "keyword3" - // how is the .bib parser handling this? will there be escaping characters at all? - // @ParameterizedTest + @ParameterizedTest @MethodSource("provideParseKeywordCases") void roundTripPreservesStructure(String original) { - KeywordList parsed = KeywordList.parse(original, ',', '>'); + KeywordList parsed = KeywordList.oldParse(original, ',', '>'); // We need to test the toString() functionality - assertEquals(original, parsed.toString()); + assertEquals(original, parsed.bibtexSerialize(',')); } }