Skip to content

Commit e2cc49f

Browse files
committed
Remove maxTokenMatchSize variable from KrillIndex and getMatchInfo.
Allow requesting maxTokenMatchSize via Krill with the value in KrillProperties as the limit. Change-Id: I82a1ad2c3a81abf69168d7cc9f9a6972fb9ba49e
1 parent 7695559 commit e2cc49f

File tree

4 files changed

+40
-66
lines changed

4 files changed

+40
-66
lines changed

src/main/java/de/ids_mannheim/korap/KrillIndex.java

+11-30
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,6 @@ public final class KrillIndex implements IndexInfo {
175175
private HashMap termContexts;
176176
private ObjectMapper mapper = new ObjectMapper();
177177

178-
private int maxTokenMatchSize;
179-
180178
// private ByteBuffer bbTerm;
181179

182180
// Some initializations ...
@@ -193,7 +191,6 @@ public final class KrillIndex implements IndexInfo {
193191
String autoCommitStr = null;
194192
if (prop != null) {
195193
autoCommitStr = prop.getProperty("krill.index.commit.auto");
196-
this.maxTokenMatchSize = KrillProperties.maxTokenMatchSize;
197194
}
198195

199196
if (autoCommitStr != null) {
@@ -243,7 +240,6 @@ public KrillIndex (Path path) throws IOException {
243240
this.directory = new MMapDirectory(path);
244241
};
245242

246-
247243
/**
248244
* Get the version number of the index.
249245
*
@@ -435,14 +431,6 @@ public void setAutoCommit (int value) {
435431
this.autoCommit = value;
436432
};
437433

438-
public int getMaxTokenMatchSize () {
439-
return maxTokenMatchSize;
440-
}
441-
442-
public void setMaxTokenMatchSize (int maxMatchTokens) {
443-
this.maxTokenMatchSize = maxMatchTokens;
444-
}
445-
446434
/**
447435
* Update a document in the index as a {@link FieldDocument}
448436
* if it already exists (based on the textSigle), otherwise
@@ -984,20 +972,12 @@ public Match getMatchInfo (String idString, String field, boolean info,
984972
boolean includeSnippets, boolean includeTokens,
985973
boolean includeHighlights, boolean extendToSentence)
986974
throws QueryException {
987-
return getMatchInfo(idString, field, info, foundry, layer, includeSpans,
988-
includeSnippets, includeTokens, includeHighlights,
989-
extendToSentence, maxTokenMatchSize);
990-
};
991-
992-
public Match getMatchInfo (String idString, String field, boolean info,
993-
List<String> foundry, List<String> layer, boolean includeSpans,
994-
boolean includeSnippets, boolean includeTokens,
995-
boolean includeHighlights, boolean extendToSentence,
996-
int maxMatchTokens) throws QueryException {
975+
997976
if (DEBUG)
998977
log.trace("Get info on {}", idString);
999978

1000-
Match match = new Match(maxMatchTokens, idString, includeHighlights);
979+
int maxTokenMatchSize = KrillProperties.maxTokenMatchSize;
980+
Match match = new Match(maxTokenMatchSize, idString, includeHighlights);
1001981

1002982
if (this.getVersion() != null)
1003983
match.setVersion(this.getVersion());
@@ -1223,8 +1203,8 @@ else if (includeSpans) {
12231203
&& spanContext[0] < spanContext[1]) {
12241204

12251205
// Match needs to be cutted!
1226-
if ((spanContext[1] - spanContext[0]) > maxMatchTokens) {
1227-
int contextLength = maxMatchTokens - match.getLength();
1206+
if ((spanContext[1] - spanContext[0]) > maxTokenMatchSize) {
1207+
int contextLength = maxTokenMatchSize - match.getLength();
12281208
int halfContext = contextLength / 2;
12291209

12301210
// This is the extended context calculated
@@ -1237,8 +1217,8 @@ else if (includeSpans) {
12371217
}
12381218
}
12391219

1240-
match.setStartPos(maxMatchTokens,spanContext[0]);
1241-
match.setEndPos(maxMatchTokens,spanContext[1]);
1220+
match.setStartPos(maxTokenMatchSize,spanContext[0]);
1221+
match.setEndPos(maxTokenMatchSize,spanContext[1]);
12421222
match.potentialStartPosChar = spanContext[2];
12431223
match.potentialEndPosChar = spanContext[3];
12441224
match.startMore = false;
@@ -1591,9 +1571,10 @@ public Result search (Krill ks) {
15911571
? lreader.document(localDocID, fieldsSet)
15921572
: lreader.document(localDocID);
15931573

1594-
int maxMatchSize = maxTokenMatchSize;
1595-
if (ks.getMaxTokenMatchSize() > 0) {
1596-
maxMatchSize = ks.getMaxTokenMatchSize();
1574+
int maxMatchSize = ks.getMaxTokenMatchSize();
1575+
if (maxMatchSize <= 0
1576+
|| maxMatchSize > KrillProperties.maxTokenMatchSize) {
1577+
maxMatchSize = KrillProperties.maxTokenMatchSize;
15971578
};
15981579

15991580
// Create new Match

src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java

+21-35
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import java.io.IOException;
88
import java.util.ArrayList;
99

10+
import org.junit.AfterClass;
1011
import org.junit.Test;
1112

1213
import de.ids_mannheim.korap.Krill;
@@ -34,35 +35,22 @@ public TestMaxMatchTokens () throws IOException {
3435
.getResource("/queries/position/sentence-contain-token.json")
3536
.getFile());
3637
}
38+
39+
@AfterClass
40+
public static void resetMaxTokenMatchSize() {
41+
KrillProperties.maxTokenMatchSize = 50;
42+
}
3743

3844
@Test
3945
public void testLimitingMatchWithProperties () throws IOException {
40-
// from properties
41-
assertEquals(50, ki.getMaxTokenMatchSize());
42-
46+
// default properties file
4347
Krill ks = new Krill(json);
4448
Result kr = ks.apply(ki);
4549
Match km = kr.getMatch(0);
46-
assertTrue(km.getLength()<ki.getMaxTokenMatchSize());
50+
assertEquals(40, KrillProperties.maxTokenMatchSize);
51+
assertTrue(km.getLength() < 40);
4752
};
4853

49-
@Test
50-
public void testLimitingMatchInKrillIndex () throws IOException {
51-
// Limiting default match token size in KrillIndex
52-
ki.setMaxTokenMatchSize(2);
53-
54-
Krill ks = new Krill(json);
55-
Result kr = ks.apply(ki);
56-
assertEquals(78, kr.getTotalResults());
57-
58-
assertEquals(
59-
"... sechsthäufigste Buchstabe in deutschen Texten. [[Mit Ausnahme]<!>] von Fremdwörtern und Namen ist ...",
60-
kr.getMatch(0).getSnippetBrackets());
61-
assertEquals(
62-
"<span class=\"context-left\"><span class=\"more\"></span>sechsthäufigste Buchstabe in deutschen Texten. </span><span class=\"match\"><mark>Mit Ausnahme</mark><span class=\"cutted\"></span></span><span class=\"context-right\"> von Fremdwörtern und Namen ist<span class=\"more\"></span></span>",
63-
kr.getMatch(0).getSnippetHTML());
64-
}
65-
6654
@Test
6755
public void testLimitingMatchInKrill () throws IOException {
6856
// Change limit via Krill
@@ -80,7 +68,7 @@ public void testLimitingMatchInKrill () throws IOException {
8068
};
8169

8270
@Test
83-
public void testMatchInfoWithKrillConfig ()
71+
public void testMatchInfo ()
8472
throws IOException, QueryException {
8573
KrillIndex ki = new KrillIndex();
8674
// Indexing test files
@@ -95,27 +83,25 @@ public void testMatchInfoWithKrillConfig ()
9583
ArrayList<String> layer = new ArrayList<String>();
9684
layer.add("opennlp");
9785

98-
// maxMatchTokens from properties = 5
86+
// maxMatchTokens from properties = 40
9987
km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens", false,
10088
foundry, layer, false, false, false, false, false);
10189

10290
assertEquals("... [[g. Artikel vornimmst, wäre es fein]] ...",
10391
km.getSnippetBrackets());
10492

105-
// lower than limit
106-
int maxMatchTokens = 2;
107-
km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens",
108-
false, foundry, layer, false, false, false, false, true, // extendToSentence
109-
maxMatchTokens);
93+
// request lower than limit
94+
// int maxMatchTokens = 2;
95+
km = ki.getMatchInfo("match-WUD17/C94/39360-p390-392", "tokens",
96+
false, foundry, layer, false, false, false, false, true);
11097

111-
assertTrue(km.endCutted);
112-
assertEquals("... [[g. Artikel]<!>] ...", km.getSnippetBrackets());
98+
assertEquals("... [[g. Artikel]] ...", km.getSnippetBrackets());
11399

114-
// more than limit
115-
maxMatchTokens = 51;
100+
// request more than limit
101+
// maxMatchTokens = 51;
116102
km = ki.getMatchInfo("match-WUD17/C94/39360-p380-431", "tokens",
117-
false, foundry, layer, false, false, false, false, true, // extendToSentence
118-
maxMatchTokens);
119-
assertEquals(KrillProperties.maxTokenMatchSize, (km.getSnippetBrackets().split(" ").length -2));
103+
false, foundry, layer, false, false, false, false, false);
104+
assertTrue(km.endCutted);
105+
assertEquals(420, km.getEndPos());
120106
}
121107
}

src/test/java/de/ids_mannheim/korap/response/TestMatch.java

+7
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,17 @@
66
import org.junit.runner.RunWith;
77
import org.junit.runners.JUnit4;
88

9+
import de.ids_mannheim.korap.util.KrillProperties;
10+
911
@RunWith(JUnit4.class)
1012
public class TestMatch {
1113

1214
int maxMatchTokens = 50;
15+
16+
public TestMatch () {
17+
KrillProperties.maxTokenMatchSize = 50;
18+
}
19+
1320

1421
@Test
1522
public void testNoMatch () {

src/test/resources/krill.properties

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@ krill.index.commit.count = 15
66
krill.namedVC = queries/collections/named-vcs/
77
krill.test = true
88

9-
krill.match.max.token=50
9+
krill.match.max.token=40
1010
krill.context.max.token=25

0 commit comments

Comments
 (0)