Skip to content

Commit 7fdfdb0

Browse files
committed
basic linebreaking implemented
1 parent d2ac41b commit 7fdfdb0

19 files changed

+7151
-209
lines changed

.gitignore

+7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
11
.gradle
22
build
33
target
4+
.cache
5+
.classpath
6+
.history
7+
.project
8+
lib
9+
out
10+
META-INF

src/main/java/lineup/Alignment.java

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package lineup;
2+
3+
import java.util.List;
4+
5+
public class Alignment {
6+
public static DistAlign byWordDistribution(List<Translation> corpus) {
7+
if (corpus == null || corpus.isEmpty())
8+
throw new IllegalArgumentException("DistAlign requires a non-empty corpus.");
9+
10+
DistAlign dist = new DistAlign(corpus);
11+
String srcLang = corpus.get(0).getSourceLanguage();
12+
String tgtLang = corpus.get(0).getTargetLanguage();
13+
14+
if ("en".equals(tgtLang)) {
15+
dist.getTargetBlacklist().add("s"); // s is only a particle indicating genitive
16+
}
17+
18+
return dist;
19+
}
20+
}

src/main/java/lineup/Candidate.java

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package lineup;
2+
3+
public class Candidate {
4+
private String word;
5+
private double probability;
6+
7+
public Candidate(String word, double probability) {
8+
this.word = word;
9+
this.probability = probability;
10+
}
11+
12+
public String getWord() {
13+
return word;
14+
}
15+
16+
public void setProbability(double probability) {
17+
this.probability = probability;
18+
}
19+
20+
public double getProbability() {
21+
return probability;
22+
}
23+
24+
@Override
25+
public boolean equals(Object o) {
26+
if (this == o) return true;
27+
if (o == null || getClass() != o.getClass()) return false;
28+
29+
Candidate candidate = (Candidate) o;
30+
31+
if (!word.equals(candidate.word)) return false;
32+
33+
return true;
34+
}
35+
36+
@Override
37+
public int hashCode() {
38+
return word.hashCode();
39+
}
40+
41+
@Override
42+
public String toString() {
43+
return "Candidate('" + word + "', " + probability + ")";
44+
}
45+
}

src/main/java/lineup/Demo.java

+194
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
package lineup;
2+
3+
import java.io.*;
4+
import java.util.Collection;
5+
import java.util.LinkedList;
6+
import java.util.List;
7+
import java.util.Scanner;
8+
9+
import static lineup.util.Fun.*;
10+
11+
/**
12+
* Demo of functionality so far.
13+
*
14+
* @author Markus Kahl
15+
*/
16+
public class Demo {
17+
18+
private List<Translation> corpus;
19+
private DistAlign dist;
20+
private BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
21+
private Collection<Command> commands = new LinkedList<Command>();
22+
private PrintStream out;
23+
24+
public Demo() throws UnsupportedEncodingException {
25+
corpus = loadCorpus();
26+
dist = new DistAlign(corpus);
27+
28+
commands.addAll(List(new Exit(), new Help(), new Corpus(), new Show(), new Break(), new Details()));
29+
dist.setOut(new PrintStream(System.out, true, "UTF8"));
30+
31+
out = dist.getOut();
32+
}
33+
34+
protected List<Translation> loadCorpus() throws UnsupportedEncodingException {
35+
try {
36+
return new LineupCorpusReader().readCorpus("src/main/resources/europarl3.txt");
37+
} catch (FileNotFoundException e) {
38+
InputStream in = getClass().getClassLoader().getResourceAsStream("europarl3.txt");
39+
40+
if (in != null) {
41+
return new LineupCorpusReader().readCorpus(new InputStreamReader(in, "UTF8"));
42+
} else {
43+
System.err.println("Could not find corpus: " + e.getMessage());
44+
System.exit(1);
45+
}
46+
}
47+
return null;
48+
}
49+
50+
public static void main(String[] args) throws UnsupportedEncodingException {
51+
new Demo().run(args);
52+
}
53+
54+
public void run(String[] args) {
55+
showCommands();
56+
repl: while (true) {
57+
prompt();
58+
59+
String input = nextCommand();
60+
for (Command cmd : commands) {
61+
if (cmd.respondTo(input)) {
62+
cmd.perform(input);
63+
continue repl;
64+
}
65+
}
66+
out.println("Unknown command");
67+
}
68+
}
69+
70+
public void showCommands() {
71+
out.println("+-----------------------------------------------------------------------------+");
72+
out.println("| line-up ==== Commands ===== 12.07.2013 |");
73+
out.println("+-----------------------------------------------------------------------------+");
74+
out.println("| |");
75+
out.println("| help - all this |");
76+
out.println("| exit - Quit the Demo |");
77+
out.println("| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |");
78+
out.println("| corpus - Info about corpus |");
79+
out.println("| show n - Show sentence n |");
80+
out.println("| break n - Break sentence(s) n [breaks random sentence(s) if no n is given] |");
81+
out.println("| details - Shows the relation table for the last shown sentence(s) |");
82+
out.println("| |");
83+
out.println("+-----------------------------------------------------------------------------+");
84+
}
85+
86+
public void prompt() {
87+
out.println();
88+
out.print("demo> ");
89+
}
90+
91+
public String nextCommand() {
92+
try {
93+
return in.readLine();
94+
} catch (IOException e) {
95+
System.err.println("Unexpected error while reading stdin: " + e.getMessage());
96+
System.exit(1); // I could try opening a new input stream on std in but you could also just rerun the demo
97+
}
98+
99+
return null;
100+
}
101+
102+
interface Command {
103+
public abstract boolean respondTo(String input);
104+
public abstract void perform(String input);
105+
}
106+
107+
class Exit implements Command {
108+
public boolean respondTo(String input) {
109+
return "exit".equals(input);
110+
}
111+
112+
public void perform(String input) {
113+
System.exit(0);
114+
}
115+
}
116+
117+
class Corpus implements Command {
118+
public boolean respondTo(String input) {
119+
return "corpus".equals(input);
120+
}
121+
122+
public void perform(String input) {
123+
out.println("Europarl 3 Corpus - German to English - " + corpus.size() + " sentence-aligned pairs");
124+
}
125+
}
126+
127+
class Show implements Command {
128+
public boolean respondTo(String input) {
129+
return input != null && input.startsWith("show");
130+
}
131+
132+
public void perform(String input) {
133+
List<String> args = drop(1, List(input.split("\\s+")));
134+
135+
if (args.size() == 1) {
136+
Scanner scanner = new Scanner(head(args));
137+
if (scanner.hasNextInt()) {
138+
int index = scanner.nextInt() - 1;
139+
if (index >= 0 && index < corpus.size()) {
140+
dist.printSentence(index);
141+
} else {
142+
out.println("n out of range");
143+
}
144+
return;
145+
}
146+
}
147+
out.println("I was expecting a number, you know?");
148+
}
149+
}
150+
151+
class Break implements Command {
152+
public boolean respondTo(String input) {
153+
return input != null && input.startsWith("break");
154+
}
155+
156+
public void perform(String input) {
157+
List<String> args = drop(1, List(input.split("\\s+")));
158+
159+
if (args.size() == 1) {
160+
Scanner scanner = new Scanner(head(args));
161+
if (scanner.hasNextInt()) {
162+
int index = scanner.nextInt() - 1;
163+
if (index >= 0 && index < corpus.size()) {
164+
dist.show(index);
165+
} else {
166+
out.println("n out of range");
167+
}
168+
return;
169+
}
170+
}
171+
dist.showRandom();
172+
}
173+
}
174+
175+
class Details implements Command {
176+
public boolean respondTo(String input) {
177+
return "details".equals(input);
178+
}
179+
180+
public void perform(String input) {
181+
dist.details();
182+
}
183+
}
184+
185+
class Help implements Command {
186+
public boolean respondTo(String input) {
187+
return "help".equals(input);
188+
}
189+
190+
public void perform(String input) {
191+
showCommands();
192+
}
193+
}
194+
}

0 commit comments

Comments
 (0)