Skip to content

Commit 4bc5c46

Browse files
Akronmargaretha
authored andcommitted
Fix casefolding for case-insensitive queries
Change-Id: I23db7454c7ab0a54fee4c9c450665b294ccc1324
1 parent 2da6b4c commit 4bc5c46

File tree

3 files changed

+47
-2
lines changed

3 files changed

+47
-2
lines changed

Changes

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
0.61.3 2023-07-11
1+
0.61.3 2023-07-17
22
- Add totalResources to results (diewald)
3+
- [bugfix] Fix casefolding for case-insensitive queries
4+
(diewald).
35

46
0.61.2 2023-04-05
57
- [bugfix] Fix pagebreak retrieval (margaretha, diewald)

src/main/java/de/ids_mannheim/korap/KrillQuery.java

+16-1
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,22 @@ else if (layer.equals("i")) {
13091309
value.setLength(offset);
13101310

13111311
// Add key to value
1312-
value.append(isCaseInsensitive ? key.toLowerCase() : key);
1312+
1313+
if (isCaseInsensitive) {
1314+
1315+
// This supports both legacy search and locale-dependent case-folding.
1316+
// It mimics the Perl fc behaviour probably better than icu4j.
1317+
if (key.toLowerCase().equals(key.toUpperCase().toLowerCase())) {
1318+
value.append(key.toLowerCase());
1319+
} else {
1320+
value.append(key.toLowerCase());
1321+
values.push(value.toString());
1322+
value.setLength(offset);
1323+
value.append(key.toUpperCase().toLowerCase());
1324+
};
1325+
} else {
1326+
value.append(key);
1327+
};
13131328

13141329
// TODO:
13151330
// This should iterate over all values as well

src/test/java/de/ids_mannheim/korap/search/TestKrill.java

+28
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,34 @@ public void searchIndex () throws IOException {
154154
assertTrue(res.at("/matches/0/snippet").isMissingNode());
155155
assertEquals("dem", res.at("/matches/0/tokens/left/0").asText());
156156
assertEquals("Buchstaben", res.at("/matches/0/tokens/match/0").asText());
157+
158+
// The test-data is old and therefore precedes the correct testfolding.
159+
// However, we can check the correct behaviour nonetheless.
160+
String json = "{\"query\":{\"@type\":\"koral:token\",\"wrap\":{\"@type\":\"koral:term\",\"flags\": [\"flags:caseInsensitive\"],\"key\": \"Grösstenteils\",\"layer\":\"orth\",\"match\": \"match:eq\"}}}";
161+
162+
ObjectMapper mapper = new ObjectMapper();
163+
164+
ks = new Krill(json);
165+
kr = ks.apply(ki);
166+
assertEquals(kr.getTotalResults(), 0);
167+
assertEquals(kr.getItemsPerPage(), 25);
168+
assertEquals(kr.getMatches().size(), 0);
169+
170+
res = mapper.readTree(kr.toJsonString());
171+
assertEquals(res.at("/meta/serialQuery").asText(),"tokens:i:grösstenteils");
172+
173+
json = "{\"query\":{\"@type\":\"koral:token\",\"wrap\":{\"@type\":\"koral:term\",\"flags\": [\"flags:caseInsensitive\"],\"key\": \"Größtenteils\",\"layer\":\"orth\",\"match\": \"match:eq\"}}}";
174+
175+
ks = new Krill(json);
176+
kr = ks.apply(ki);
177+
178+
assertEquals(kr.getTotalResults(), 2);
179+
assertEquals(kr.getItemsPerPage(), 25);
180+
assertEquals(kr.getMatches().size(), 2);
181+
182+
res = mapper.readTree(kr.toJsonString());
183+
assertEquals(res.at("/meta/serialQuery").asText(),
184+
"spanOr([tokens:i:grösstenteils, tokens:i:größtenteils])");
157185
};
158186

159187

0 commit comments

Comments
 (0)