Skip to content

Commit ee0f53c

Browse files
committed
intrinsic evaluation for Europarl v3
1 parent 4a3ffd1 commit ee0f53c

File tree

4 files changed

+284
-165
lines changed

4 files changed

+284
-165
lines changed

src/main/java/lineup/StatAlign.java

+35-12
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ public class StatAlign<T extends NtoNTranslation> implements Aligner {
3636
private WordParser wordParser;
3737
private CognateModel cognateModel = new CognateModel(4, 0.10);
3838

39+
private int candidateLimit = 6;
40+
private int pruneMatches = 3;
41+
3942
ExecutorService exec = Executors.newFixedThreadPool(
4043
Runtime.getRuntime().availableProcessors(),
4144
new ThreadFactory() {
@@ -93,10 +96,10 @@ public Tuple<Sentences, Sentences> getSentences(int startIndex, int length) {
9396
* @param maxTranslationDistance Maximum token distance before discarding suggested translations due to wrapping infeasability.
9497
*/
9598
public Tuple<Sentences, Sentences> getSentences(int startIndex, int length, double maxTranslationDistance) {
96-
List<PossibleTranslations> pts = associate(startIndex, 6);
99+
List<PossibleTranslations> pts = associate(startIndex, getCandidateLimit());
97100

98101
for (int i = 1; i < length; ++i) {
99-
pts.addAll(associate(startIndex + i, 6));
102+
pts.addAll(associate(startIndex + i, getCandidateLimit()));
100103
try {
101104
Thread.sleep(250); // concurrency bug (during evaluation) workaround
102105
} catch (InterruptedException e) {
@@ -121,10 +124,6 @@ public Tuple<Sentences, Sentences> getSentences(int startIndex, int length, doub
121124
return Sentences.wire(de.toString(), en.toString(), pts, maxTranslationDistance, getWordParser());
122125
}
123126

124-
public List<PossibleTranslations> associate(NtoNTranslation translation) {
125-
return associate(translation, 6, 3, true);
126-
}
127-
128127
/**
129128
* Computes a word alignment for a given translation. Each word of the source sentence is associated with a number
130129
* of candidates that are possible translations in the target sentence. The alignment is based both on the basic
@@ -180,16 +179,24 @@ public List<PossibleTranslations> associate(NtoNTranslation translation, int lim
180179
return matches;
181180
}
182181

182+
public List<PossibleTranslations> associate(NtoNTranslation translation) {
183+
return associate(translation, getCandidateLimit(), getPruneMatches(), true);
184+
}
185+
186+
public List<PossibleTranslations> associate(NtoNTranslation translation, int limit) {
187+
return associate(translation, limit, getPruneMatches(), true);
188+
}
189+
183190
public List<PossibleTranslations> associate(int index) {
184-
return associate(index, 6);
191+
return associate(index, getCandidateLimit());
185192
}
186193

187194
public List<PossibleTranslations> associateRetainingAll(int index) {
188-
return associate(index, 6, 3, false);
195+
return associate(index, getCandidateLimit(), getPruneMatches(), false);
189196
}
190197

191198
public List<PossibleTranslations> associate(int index, int limit) {
192-
return associate(index, limit, 3, true);
199+
return associate(index, limit, getPruneMatches(), true);
193200
}
194201

195202
public List<PossibleTranslations> associate(int index, int limit, int prune, boolean retainMostLikely) {
@@ -210,8 +217,8 @@ public List<PossibleTranslations> associate(int index, int limit, int prune, boo
210217
*/
211218
public List<PossibleTranslations> matches(NtoNTranslation translation, int limit) {
212219
List<PossibleTranslations> result = new LinkedList<PossibleTranslations>();
213-
List<PossibleTranslations> forth = possibleTranslations(translation, limit != -1 ? limit : 3);
214-
List<PossibleTranslations> back = reversePossibleTranslations(translation, limit != -1 ? limit : 3);
220+
List<PossibleTranslations> forth = possibleTranslations(translation, limit != -1 ? limit : getCandidateLimit());
221+
List<PossibleTranslations> back = reversePossibleTranslations(translation, limit != -1 ? limit : getCandidateLimit());
215222

216223
for (PossibleTranslations ptForth : forth) {
217224
List<Candidate> candidates = new LinkedList<Candidate>();
@@ -238,7 +245,7 @@ public List<PossibleTranslations> matches(NtoNTranslation translation, int limit
238245
}
239246

240247
public List<PossibleTranslations> matches(int index) {
241-
return matches(index, 3);
248+
return matches(index, getCandidateLimit());
242249
}
243250

244251
public List<PossibleTranslations> matches(int index, int limit) {
@@ -699,6 +706,22 @@ public int getTargetWordCount() {
699706
return targetWordCount;
700707
}
701708

709+
public void setCandidateLimit(int limit) {
710+
this.candidateLimit = limit;
711+
}
712+
713+
public int getCandidateLimit() {
714+
return candidateLimit;
715+
}
716+
717+
public void setPruneMatches(int pruneTo) {
718+
this.pruneMatches = pruneTo;
719+
}
720+
721+
public int getPruneMatches() {
722+
return pruneMatches;
723+
}
724+
702725
public void setWordParser(WordParser wordParser) {
703726
this.wordParser = wordParser;
704727
}

0 commit comments

Comments
 (0)