-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathMakefile
154 lines (133 loc) · 4.96 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
version_number = 2.1
version = $(version_number) PoliMorf
release_date = $(shell date --rfc-3339=seconds)
copyright_date = $(shell date +%Y)
githash = $(shell git log --pretty=format:'%h' -n 1)
input = eksport.tab
morfologik = lib/target/morfologik-tools-2.1.0.jar
sortopts = --buffer-size=1G
javaopts = -ea -Xmx1G
polimorfologik.txt = build/polimorfologik-$(version_number).txt
#
# Aggregate targets.
#
all: compile \
compile-fsamorph \
build/polish_tags.txt \
test \
zip
compile: eksport.tab \
build/polish.dict \
build/polish_synth.dict
#
# Fetch morfologik-tools (FSA compilers) using Apache Maven.
#
$(morfologik):
cd lib && mvn dependency:copy-dependencies
#
# Check if the input is present.
#
eksport.tab:
@test -s eksport.tab || { wget --continue --progress=bar:force http://marcinmilkowski.pl/downloads/eksport.tab.gz && gzip -d eksport.tab.gz; }
@test -s eksport.tab || { echo "ERROR: eksport.tab not found."; exit 1; }
#
# Preprocess the raw input.
#
$(polimorfologik.txt):
mkdir -p build
LANG=C sort $(sortopts) -u $(input) eksport.quickfix.tab | gawk -f awk/join_tags_reverse.awk > $(polimorfologik.txt)
#
# Build the stemming dictionary.
#
build/polish.dict: $(morfologik) $(polimorfologik.txt) build/polish.info
cp $(polimorfologik.txt) build/polish.input
@echo "### Building CFSA2 (morfologik-stemming, LT) polish.dict"
java $(javaopts) -jar $(morfologik) dict_compile --format cfsa2 -i build/polish.input --overwrite
@echo "### Dumping raw automaton for polish.dict -> build/polish.dump"
java $(javaopts) -jar $(morfologik) fsa_dump -i build/polish.dict -o build/polish.dump
#
# Build the synthesis dictionary.
#
build/polish_synth.dict: $(morfologik) build/polish_synth.input build/polish_synth.info
@echo "### Building CFSA2 (morfologik-stemming, LT) polish_synth.dict"
java $(javaopts) -jar $(morfologik) dict_compile --format cfsa2 -i build/polish_synth.input --overwrite
@echo "### Dumping raw automaton for polish_synth.dict -> build/polish_synth.dump"
java $(javaopts) -jar $(morfologik) fsa_dump -i build/polish_synth.dict -o build/polish_synth.dump
build/polish_synth.input: $(polimorfologik.txt)
gawk -f awk/combined-to-synth.awk $(polimorfologik.txt) > build/polish_synth.input
#
# fsa_morph backwards-compatible dictionaries.
#
compile-fsamorph: build/fsa_morph/polish.dict \
build/fsa_morph/polish_synth.dict
build/fsa_morph/polish.dict: build/polish.dict
mkdir -p build/fsa_morph
@echo "### Building FSA5 (fsa_morph-compatible) polish.dict"
tr ';+' '+|' < build/polish.input > build/fsa_morph/polish.input
cp src/fsa_morph.info build/fsa_morph/polish.info
java $(javaopts) -jar $(morfologik) dict_compile --format fsa5 -i build/fsa_morph/polish.input --overwrite
build/fsa_morph/polish_synth.dict: build/polish_synth.dict
mkdir -p build/fsa_morph
@echo "### Building FSA5 (fsa_morph-compatible) polish_synth.dict"
tr ';+' '+|' < build/polish_synth.input > build/fsa_morph/polish_synth.input
cp src/fsa_morph.info build/fsa_morph/polish_synth.info
java $(javaopts) -jar $(morfologik) dict_compile --format fsa5 -i build/fsa_morph/polish_synth.input --overwrite
#
# Extract unique tags
#
build/polish_tags.txt: $(polimorfologik.txt)
LANG=C gawk -f awk/tags.awk $(polimorfologik.txt) | sort -u > build/polish_tags.txt
#
# Sanity checks.
#
.PHONY: test
test:
cd lib && mvn test -Dpolish.dict=../build/polish.dict \
-Dpolish_synth.dict=../build/polish_synth.dict \
-Dcombined.input=../$(polimorfologik.txt)
#
# Substitute variables in template files.
#
TXT_FILES := $(wildcard src/*.txt)
build/%.txt: src/%.txt
sed -e 's/$$version/$(version)/g' \
-e 's/$$release_date/$(release_date)/g' \
-e 's/$$copyright_date/$(copyright_date)/g' \
-e 's/$$githash/$(githash)/g' \
$< >$@
INFO_FILES := $(wildcard src/*.info)
build/%.info: src/%.info
sed -e 's/$$version/$(version)/g' \
-e 's/$$release_date/$(release_date)/g' \
-e 's/$$copyright_date/$(copyright_date)/g' \
-e 's/$$githash/$(githash)/g' \
$< >$@
#
# Create a ZIP distribution.
#
.PHONY: zip
zip: compile compile-fsamorph \
build/README.txt \
build/README.Polish.txt \
build/LICENSE.txt \
build/LICENSE.Polish.txt
rm -f build/polimorfologik-$(version_number).zip
cp CHANGES.md build
(cd build && zip -9 polimorfologik-$(version_number).zip \
polish.info \
polish.dict \
polish_synth.info \
polish_synth.dict \
README.* \
LICENSE.* \
polimorfologik*.txt \
CHANGES.md \
fsa_morph/*.dict )
@echo -e "\n\n### Distribution ZIP ready: build/polimorfologik-$(version_number).zip"
#
# clean
#
.PHONY: clean
clean:
rm -rf build
rm -rf lib/target