diff --git a/src/Makefile b/src/Makefile index f1ee17c..4c2aff4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -14,14 +14,17 @@ # export MATEPLUS_HOME=~/u/sandboxes/mateplus MATEPLUS_HOME ?= ~/u/tools/MATE/mateplus-master/src +MATETOOLS_HOME ?= $(MATEPLUS_HOME) + JAVA_HOME ?= /space/group/nrc_ict/pkgs/centos6/gcc-4.9.3/jdk1.8.0_131 MATEPLUS_PATH ?= $(MATEPLUS_HOME)/mateplus.jar +MATETOOLS_SRL_PATH ?= $(MATETOOLS_HOME)/srl.jar -ifneq ("$(wildcard $(MATEPLUS_PATH))","") +ifneq ("$(wildcard $(MATEPLUS_PATH))$(wildcard $(MATETOOLS_SRL_PATH))","") WITH_SRLMATE ?= True else - $(info *** mateplus.jar not found) + $(info *** mateplus.jar and srl.jar not found) endif ifneq (clean, $(MAKECMDGOALS)) @@ -107,7 +110,10 @@ scripts: | ../bin cd ../obj/java && jar -cvf ../srlmate.jar * en.mplsconfig de.mplsconfig es.mplsconfig zh.mplsconfig: %: %.template - sed -e "s#/#$(dir $(CURDIR))#g; s##$(MATEPLUS_HOME)#g;" < $< > $@ + sed -e "s##$(dir $(CURDIR))#g; s#//#/#g;" \ + -e "s##$(MATEPLUS_HOME)#g;" \ + -e "s##$(MATETOOLS_HOME)#g;" \ + < $< > $@ $(SRLMATE_BINS): LDFLAGS += -L${JAVA_HOME}/jre/lib/amd64/server $(SRLMATE_BINS): LIBRARIES += -ljvm diff --git a/src/Mate.java b/src/Mate.java index 70140aa..fc8342d 100644 --- a/src/Mate.java +++ b/src/Mate.java @@ -14,6 +14,7 @@ package yisi; import se.lth.cs.srl.corpus.Sentence; import se.lth.cs.srl.options.CompletePipelineCMDLineOptions; +import se.lth.cs.srl.options.FullPipelineOptions; import se.lth.cs.srl.util.FileExistenceVerifier; import se.lth.cs.srl.CompletePipeline; import se.lth.cs.srl.languages.Language; @@ -21,11 +22,19 @@ import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; +import java.net.URL; +import java.net.URLClassLoader; +import java.lang.Class; +import java.lang.reflect.Method; public class Mate { - protected CompletePipeline pipeline = null; + // protected CompletePipeline pipeline = null; + protected Object pipeline = null; + protected URLClassLoader classLoader = null; + Class class_CompletePipeline = null; - public String init(String lang, + public String init(String mate_jars, + String lang, boolean rerank, boolean hybrid, String token, @@ -37,6 +46,16 @@ public String init(String lang, String result= new String(); try { System.setOut(System.err); + + // Each Mate object has it's own class loader with it's own "classpath". + String[] mateJars = mate_jars.split(":"); + URL[] urls = new URL[mateJars.length]; + for (int i = 0; i < mateJars.length; ++i) { + urls[i] = new File(mateJars[i]).toURI().toURL(); +// System.err.println("URL: " + urls[i]); + } + classLoader = new URLClassLoader(urls, Thread.currentThread().getContextClassLoader()); + ArrayList argsBuilder = new ArrayList(); argsBuilder.add(lang); argsBuilder.add("-tokenize"); @@ -73,13 +92,31 @@ public String init(String lang, String[] args = new String[argsBuilder.size()]; argsBuilder.toArray(args); System.err.println(java.util.Arrays.toString(args)); - CompletePipelineCMDLineOptions options = new CompletePipelineCMDLineOptions(); - options.parseCmdLineArgs(args); - String error = FileExistenceVerifier.verifyCompletePipelineAllNecessaryModelFiles(options); + + Class class_FullPipelineOptions = classLoader.loadClass("se.lth.cs.srl.options.FullPipelineOptions"); + // CompletePipelineCMDLineOptions options = new CompletePipelineCMDLineOptions(); + Class class_CompletePipelineCMDLineOptions = classLoader.loadClass("se.lth.cs.srl.options.CompletePipelineCMDLineOptions"); + Object options = class_CompletePipelineCMDLineOptions.newInstance(); + // options.parseCmdLineArgs(args); + Method method_parseCmdLineArgs = class_CompletePipelineCMDLineOptions.getMethod("parseCmdLineArgs", String[].class); +// System.err.println("Got Method " + method_parseCmdLineArgs); + method_parseCmdLineArgs.invoke(options, (Object)args); + + // String error = FileExistenceVerifier.verifyCompletePipelineAllNecessaryModelFiles(options); + Class class_FileExistenceVerifier = classLoader.loadClass("se.lth.cs.srl.util.FileExistenceVerifier"); + Method method_verifyCompletePipelineAllNecessaryModelFiles = + class_FileExistenceVerifier.getMethod("verifyCompletePipelineAllNecessaryModelFiles", class_FullPipelineOptions); +// System.err.println("Got Method " + method_verifyCompletePipelineAllNecessaryModelFiles); + String error = (String) method_verifyCompletePipelineAllNecessaryModelFiles.invoke(null, options); + if (error != null){ result += error + "\n"; } else { - pipeline = CompletePipeline.getCompletePipeline(options); + // pipeline = CompletePipeline.getCompletePipeline(options); + class_CompletePipeline = classLoader.loadClass("se.lth.cs.srl.CompletePipeline"); + Method method_getCompletePipeline = class_CompletePipeline.getMethod("getCompletePipeline", class_FullPipelineOptions); +// System.err.println("Got Method " + method_getCompletePipeline); + pipeline = method_getCompletePipeline.invoke(null, options); } } catch (Exception e){ result += e.getMessage(); @@ -90,7 +127,10 @@ public String init(String lang, public String parse(String sentence) { String result = null; try { - result = pipeline.parse(sentence).toString(); + // result = pipeline.parse(sentence).toString(); + Method method_parse = class_CompletePipeline.getMethod("parse", String.class); +// System.err.println("Got Method " + method_parse); + result = method_parse.invoke(pipeline, sentence).toString(); } catch (Exception e) { e.printStackTrace(); System.err.println(sentence); diff --git a/src/de.mplsconfig.template b/src/de.mplsconfig.template index a0d91a5..d7fc3c2 100644 --- a/src/de.mplsconfig.template +++ b/src/de.mplsconfig.template @@ -1,4 +1,8 @@ -matejar=/obj/srlmate.jar:/mateplus.jar:/lib/anna-3.3.jar:/lib/opennlp-maxent-3.0.3.jar:/lib/opennlp-tools-1.5.3.jar:/lib/liblinear-1.92.jar:/lib/stanford-corenlp-3.9.1.jar:/lib/transition-1.30.jar +# yisi_home is needed to locate srlmate.jar within YiSi. +yisi_home= +# mate_jars is a colon (:) separated list of all jar files needed to run +# mateplus or mate-tools for this language. +mate_jars=/mateplus.jar:/lib/opennlp-maxent-3.0.3.jar:/lib/opennlp-tools-1.5.3.jar:/lib/transition-1.30.jar:/lib/stanford-corenlp-3.9.1.jar lang=ger rerank=1 hybrid=1 diff --git a/src/en.mplsconfig.template b/src/en.mplsconfig.template index ac73f51..be98d14 100644 --- a/src/en.mplsconfig.template +++ b/src/en.mplsconfig.template @@ -1,4 +1,8 @@ -matejar=/obj/srlmate.jar:/mateplus.jar:/lib/anna-3.3.jar:/lib/liblinear-1.92.jar:/lib/stanford-corenlp-3.9.1.jar +# yisi_home is needed to locate srlmate.jar within YiSi. +yisi_home= +# mate_jars is a colon (:) separated list of all jar files needed to run +# mateplus or mate-tools for this language. +mate_jars=/mateplus.jar:/lib/anna-3.3.jar:/lib/liblinear-1.92.jar:/lib/stanford-corenlp-3.9.1.jar lang=eng rerank=1 hybrid=0 diff --git a/src/es.mplsconfig.template b/src/es.mplsconfig.template index 07e029b..491b2d3 100644 --- a/src/es.mplsconfig.template +++ b/src/es.mplsconfig.template @@ -1,10 +1,15 @@ -matejar=/obj/srlmate.jar:/lib/srl.jar:/lib/anna-3.3.jar:/lib/opennlp-maxent-3.0.3.jar:/lib/opennlp-tools-1.5.3.jar:/lib/liblinear-1.92.jar:/lib/stanford-corenlp-3.9.1.jar:/lib/transition-1.30.jar:/lib/whatswrong-0.2.3.jar:/lib/seg.jar:/lib/slf4j-api-1.7.25.jar:/lib/slf4j-jdk14-1.7.25.jar +# yisi_home is needed to locate srlmate.jar within YiSi. +yisi_home= +# mate_jars is a colon (:) separated list of all jar files needed to run +# mateplus or mate-tools for this language. +mate_jars=/lib/srl.jar:/lib/anna-3.3.jar:/lib/opennlp-maxent-3.0.2-incubating.jar:/lib/opennlp-tools-1.5.2-incubating.jar:/lib/liblinear-1.51-with-deps.jar:/lib/whatswrong-0.2.3.jar +#mate_jars=/lib/srl.jar:/lib/anna-3.3.jar:/lib/opennlp-maxent-3.0.3.jar:/lib/opennlp-tools-1.5.3.jar:/lib/liblinear-1.92.jar:/lib/stanford-corenlp-3.9.1.jar:/lib/whatswrong-0.2.3.jar://lib/slf4j-api-1.7.25.jar:/lib/slf4j-jdk14-1.7.25.jar lang=spa rerank=0 hybrid=0 token= -morph=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.morphtagger.model -lemma=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.lemmatizer.model -tagger=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.postagger.model -parser=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.parser.model -srl=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.srl-4.21.srl-rr.model +morph=/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.morphtagger.model +lemma=/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.lemmatizer.model +tagger=/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.postagger.model +parser=/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.parser.model +srl=/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.srl-4.21.srl-rr.model diff --git a/src/srlmate.cpp b/src/srlmate.cpp index 025c39e..8bc9bf7 100644 --- a/src/srlmate.cpp +++ b/src/srlmate.cpp @@ -39,6 +39,10 @@ auto name = jni_env_ptr->expr; \ } \ } \ +JavaVM* srlmate_t::jvm_m = NULL; +JNIEnv* srlmate_t::jen_m = NULL; +int srlmate_t::obj_cnt_m = 0; + srlmate_t::srlmate_t(string path) { cerr << "Setting up MATE ..."; @@ -48,7 +52,8 @@ srlmate_t::srlmate_t(string path) { exit(1); } - string matejar = ""; + string yisi_home = ""; + string mate_jars = ""; string lang = ""; string token = ""; string morph = ""; @@ -66,76 +71,83 @@ srlmate_t::srlmate_t(string path) { string cfgn, cfgv; getline(iss, cfgn, '='); getline(iss, cfgv); - if (cfgn == "matejar") { - matejar = cfgv; + if (cfgn == "yisi_home") { + yisi_home = cfgv; + } + else if (cfgn == "mate_jars") { + mate_jars = cfgv; } - if (cfgn == "lang") { + else if (cfgn == "lang") { lang = cfgv; } - if (cfgn == "rerank") { + else if (cfgn == "rerank") { if ((cfgv.compare("0") == 0) || (cfgv.compare("false") == 0)) { rerank = false; } else { rerank = true; } } - if (cfgn == "hybrid") { + else if (cfgn == "hybrid") { if ((cfgv.compare("0") == 0) || (cfgv.compare("false") == 0)) { hybrid = false; } else { hybrid = true; } } - if (cfgn == "token") { + else if (cfgn == "token") { token = cfgv; } - if (cfgn == "morph") { + else if (cfgn == "morph") { morph = cfgv; } - if (cfgn == "lemma") { + else if (cfgn == "lemma") { lemma = cfgv; } - if (cfgn == "tagger") { + else if (cfgn == "tagger") { tagger = cfgv; } - if (cfgn == "parser") { + else if (cfgn == "parser") { parser = cfgv; } - if (cfgn == "srl") { + else if (cfgn == "srl") { srl = cfgv; } } // init JVM - size_t opt_count = 2; - JavaVMOption* vm_opts = new JavaVMOption[opt_count]; - string opt0 = "-Djava.class.path=" + matejar; - string opt1 = "-Xmx6g"; - vm_opts[0].optionString = const_cast(opt0.c_str()); - vm_opts[1].optionString = const_cast(opt1.c_str()); - - JavaVMInitArgs vm_args; - vm_args.version = JNI_VERSION_1_6; - vm_args.options = vm_opts; - vm_args.nOptions = opt_count; - - jint result = JNI_CreateJavaVM(&jvm_m, (void**)&jen_m, &vm_args); - delete vm_opts; - if (result != JNI_OK) { - cerr << "ERROR: Failed to create Java VM. (error code = " << result << "). Exiting..." << endl; - exit(1); - jvm_m = NULL; + if (jvm_m == NULL) { + size_t opt_count = 2; + JavaVMOption* vm_opts = new JavaVMOption[opt_count]; + string opt0 = "-Djava.class.path=" + yisi_home + "/obj/srlmate.jar"; + string opt1 = "-Xmx12g"; + vm_opts[0].optionString = const_cast(opt0.c_str()); + vm_opts[1].optionString = const_cast(opt1.c_str()); + + JavaVMInitArgs vm_args; + vm_args.version = JNI_VERSION_1_6; + vm_args.options = vm_opts; + vm_args.nOptions = opt_count; + + jint result = JNI_CreateJavaVM(&jvm_m, (void**)&jen_m, &vm_args); + delete vm_opts; + if (result != JNI_OK) { + cerr << "ERROR: Failed to create Java VM. (error code = " << result << "). Exiting..." << endl; + exit(1); + jvm_m = NULL; + } } + ++obj_cnt_m; JNI_SAFE_CALL(mcls, jen_m, FindClass("yisi/Mate")); JNI_SAFE_CALL(ctor, jen_m, GetMethodID(mcls, "", "()V")); JNI_SAFE_CALL(mobj, jen_m, NewObject(mcls, ctor)); JNI_SAFE_CALL(init, jen_m, GetMethodID(mcls, "init", - "(Ljava/lang/String;ZZLjava/lang/String;Ljava/lang/String;Ljava/lang/String;" + "(Ljava/lang/String;Ljava/lang/String;ZZLjava/lang/String;Ljava/lang/String;Ljava/lang/String;" "Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;")); JNI_SAFE_CALL(jerr, jen_m, CallObjectMethod(mobj, init, + jen_m->NewStringUTF(mate_jars.c_str()), jen_m->NewStringUTF(lang.c_str()), rerank ? JNI_TRUE : JNI_FALSE, hybrid ? JNI_TRUE : JNI_FALSE, @@ -157,8 +169,11 @@ srlmate_t::srlmate_t(string path) { } // srlmate_t srlmate_t::~srlmate_t() { - if (jvm_m != NULL) { + --obj_cnt_m; + if (obj_cnt_m == 0 && jvm_m != NULL) { jvm_m->DestroyJavaVM(); + jvm_m = NULL; + jen_m = NULL; } } diff --git a/src/srlmate.h b/src/srlmate.h index 206b736..0201287 100644 --- a/src/srlmate.h +++ b/src/srlmate.h @@ -40,8 +40,9 @@ namespace yisi { virtual std::vector parse(std::vector sents); private: std::string noparse(std::vector tokens); - JavaVM* jvm_m; - JNIEnv* jen_m; + static JavaVM* jvm_m; + static JNIEnv* jen_m; + static int obj_cnt_m; jclass mate_class_m; jobject mate_object_m; }; diff --git a/src/zh.mplsconfig.template b/src/zh.mplsconfig.template index 0ec1562..6309669 100644 --- a/src/zh.mplsconfig.template +++ b/src/zh.mplsconfig.template @@ -1,10 +1,15 @@ -matejar=/obj/srlmate.jar:/lib/srl.jar:/lib/anna-3.3.jar:/lib/opennlp-maxent-3.0.3.jar:/lib/opennlp-tools-1.5.3.jar:/lib/liblinear-1.92.jar:/lib/stanford-corenlp-3.9.1.jar:/lib/transition-1.30.jar:/lib/whatswrong-0.2.3.jar:/lib/seg.jar:/lib/slf4j-api-1.7.25.jar:/lib/slf4j-jdk14-1.7.25.jar +# yisi_home is needed to locate srlmate.jar within YiSi. +yisi_home= +# mate_jars is a colon (:) separated list of all jar files needed to run +# mateplus or mate-tools for this language. +mate_jars=/lib/srl.jar:/lib/anna-3.3.jar:/lib/opennlp-maxent-3.0.2-incubating.jar:/lib/opennlp-tools-1.5.2-incubating.jar:/lib/liblinear-1.51-with-deps.jar:/lib/whatswrong-0.2.3.jar:/lib/seg.jar +#mate_jars=/lib/srl.jar:/lib/anna-3.3.jar:/lib/opennlp-maxent-3.0.3.jar:/lib/opennlp-tools-1.5.3.jar:/lib/liblinear-1.92.jar:/lib/whatswrong-0.2.3.jar:/lib/stanford-segmenter-3.6.0.jar://lib/slf4j-api-1.7.25.jar:/lib/slf4j-jdk14-1.7.25.jar lang=chi rerank=0 hybrid=0 -token=/models/stanford-segmenter-2015-12-09/data +token=/models/stanford-segmenter-2015-12-09/data morph= lemma= -tagger=/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.postagger.model -parser=/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.parser.model -srl=/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.srl-4.1.srl.model +tagger=/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.postagger.model +parser=/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.parser.model +srl=/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.srl-4.1.srl.model diff --git a/test/Makefile b/test/Makefile index c303606..66eb343 100644 --- a/test/Makefile +++ b/test/Makefile @@ -49,11 +49,11 @@ SIMPLE_TEST_PROGS += srlgraph_test SIMPLE_TEST_PROGS += yisiscorer_test ifdef WITH_SRLMATE - SIMPLE2_TEST_PROGS += srl_test - SIMPLE2_TEST_PROGS += srlmate_test + SIMPLE_SRL_TEST_PROGS += srl_test + SIMPLE_SRL_TEST_PROGS += srlmate_test endif -ALL_SIMPLE_TEST_PROGS := $(CMDLP_TEST_PROGS) $(SIMPLE_TEST_PROGS) $(SIMPLE2_TEST_PROGS) +ALL_SIMPLE_TEST_PROGS := $(CMDLP_TEST_PROGS) $(SIMPLE_TEST_PROGS) $(SIMPLE_SRL_TEST_PROGS) all: $(ALL_SIMPLE_TEST_PROGS) all: test_yisi @@ -67,13 +67,18 @@ compare.%: % SUB:="s/^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]* */00.00.000 /" -compare2.%: % +compare_w_java_log.%: % diff <(cat $< | sed -e $(SUB)) <(cat ref/$< | sed -e $(SUB)) -q +compare_w_alt.%: % + diff $< ref/$< -q || diff -s $< ref/$<.alt -q + +# Simple tests + .PHONY: $(ALL_SIMPLE_TEST_PROGS) $(CMDLP_TEST_PROGS): %: compare.%.out $(SIMPLE_TEST_PROGS): %: compare.%.out -$(SIMPLE2_TEST_PROGS): %: compare2.%.out +$(SIMPLE_SRL_TEST_PROGS): %: compare_w_java_log.%.out cmdlp_test.1.out: ARGS = --beta 0 --settings <(echo hello) --refs test_ref.en --help cmdlp_test.2.out: ARGS = --beta 0 --settings <(echo hello) --refs /etc/timezone @@ -93,18 +98,19 @@ srl_test.out: ARGS = mate ../src/en.mplsconfig test_ref.en test_ref.en.srl srlmate_test.out: ARGS = ../src/en.mplsconfig <<<'Hello there' $(CMDLP_TEST_PROGS): BIN = ../src/cmdlp/build/bin -$(SIMPLE_TEST_PROGS) $(SIMPLE2_TEST_PROGS): BIN = ../bin +$(SIMPLE_TEST_PROGS) $(SIMPLE_SRL_TEST_PROGS): BIN = ../bin -compare2.srl_test.out: compare.test_ref.en.srl +compare_w_java_log.srl_test.out: compare_w_alt.test_ref.en.srl test_ref.en.srl: srl_test.out ; $(addsuffix .out, $(ALL_SIMPLE_TEST_PROGS)): %.out: $(BIN)/$(firstword $(subst ., ,$*)) $(ARGS) &> $@ +# YiSi tests + YSFX_NOSRL := 0 1 2 -#YSFX_SRL := 1_srl 2_srl -YSFX_SRL := 1_srl +YSFX_SRL := 1_srl 2_srl .PHONY: test_yisi .PHONY: $(foreach n,$(YSFX_NOSRL),test_yisi_$n) @@ -118,8 +124,11 @@ ifdef WITH_SRLMATE test_yisi: $(foreach n,$(YSFX_SRL),test_yisi_$n) endif -$(foreach n,$(YSFX_SRL),test_yisi_$n): test_yisi_%: compare2.test_yisi_%.out -$(foreach n,$(YSFX_SRL),compare2.test_yisi_$n.out): compare2.test_yisi_%.out: compare.test_hyp.docyisi% compare.test_hyp.sntyisi% +# YiSi SRL tests have alternate reference files too because mateplus may return +# an alternate parse. + +$(foreach n,$(YSFX_SRL),test_yisi_$n): test_yisi_%: compare_w_java_log.test_yisi_%.out +$(foreach n,$(YSFX_SRL),compare_w_java_log.test_yisi_$n.out): compare_w_java_log.test_yisi_%.out: compare_w_alt.test_hyp.docyisi% compare_w_alt.test_hyp.sntyisi% test_yisi_%.out: yisi-%.config ../bin/yisi --config $< &> $@ diff --git a/test/ref/test_hyp.docyisi1_srl.alt b/test/ref/test_hyp.docyisi1_srl.alt new file mode 100644 index 0000000..a9768df --- /dev/null +++ b/test/ref/test_hyp.docyisi1_srl.alt @@ -0,0 +1 @@ +0.639393 diff --git a/test/ref/test_hyp.docyisi2_srl b/test/ref/test_hyp.docyisi2_srl new file mode 100644 index 0000000..1ffef82 --- /dev/null +++ b/test/ref/test_hyp.docyisi2_srl @@ -0,0 +1 @@ +0.0652749 diff --git a/test/ref/test_hyp.docyisi2_srl.alt b/test/ref/test_hyp.docyisi2_srl.alt new file mode 100644 index 0000000..462088b --- /dev/null +++ b/test/ref/test_hyp.docyisi2_srl.alt @@ -0,0 +1 @@ +0.0641709 diff --git a/test/ref/test_hyp.sntyisi1_srl.alt b/test/ref/test_hyp.sntyisi1_srl.alt new file mode 100644 index 0000000..f9eeee9 --- /dev/null +++ b/test/ref/test_hyp.sntyisi1_srl.alt @@ -0,0 +1,10 @@ +0.859824 +0.691795 +0.645973 +0.633111 +0.455921 +0.59255 +0.557174 +0.54644 +0.546505 +0.864636 diff --git a/test/ref/test_hyp.sntyisi2_srl b/test/ref/test_hyp.sntyisi2_srl new file mode 100644 index 0000000..ecaa858 --- /dev/null +++ b/test/ref/test_hyp.sntyisi2_srl @@ -0,0 +1,10 @@ +0.0464296 +0.0116361 +0.0696774 +0.0665215 +0.0274319 +0.0927175 +0.00336682 +0.0519643 +0.141262 +0.141742 diff --git a/test/ref/test_hyp.sntyisi2_srl.alt b/test/ref/test_hyp.sntyisi2_srl.alt new file mode 100644 index 0000000..ccab905 --- /dev/null +++ b/test/ref/test_hyp.sntyisi2_srl.alt @@ -0,0 +1,10 @@ +0.0354018 +0.0116361 +0.0696774 +0.0665215 +0.0274319 +0.0927175 +0.00336682 +0.0519643 +0.141262 +0.14173 diff --git a/test/ref/test_ref.en.srl.alt b/test/ref/test_ref.en.srl.alt new file mode 100644 index 0000000..445d76e --- /dev/null +++ b/test/ref/test_ref.en.srl.alt @@ -0,0 +1,45 @@ +0: A [A0 Republican] [V strategy] [A1 to counter the re - election of Obama] +0: A Republican strategy to [V counter] [A1 the re - election of Obama] +0: A Republican strategy to counter the re - [V election] [A1 of Obama] +1: [A0 [A2 Republican] [V leaders]] justified their policy by the need to combat electoral fraud . +1: [A0 Republican leaders] [V justified] [A1 their policy] [A2 by the need to combat electoral fraud] . +1: Republican leaders justified [A0 their] [V policy] by the need to combat electoral fraud . +1: Republican leaders justified their policy by the [V need] [A1 to combat electoral fraud] . +1: [A0 Republican leaders] justified their policy by the need to [V combat] [A1 electoral fraud] . +1: Republican leaders justified their policy by the need to combat [A1 electoral] [V fraud] . +2: [AM-DIS However] , [A0 the Brennan Centre] [V considers] [A1 this] [A2 a myth] , [AM-ADV stating that electoral fraud is rarer in the United States than the number of people killed by lightning] . +2: However , [A0 the Brennan Centre] considers this a myth , [V stating] [A1 that electoral fraud is rarer in the United States than the number of people killed by lightning] . +2: However , the Brennan Centre considers this a myth , stating that [A1 electoral] [V fraud] is rarer in the United States than the number of people killed by lightning . +2: However , the Brennan Centre considers this a myth , stating that electoral fraud is rarer in the United States than the [V number] [A1 of people killed by lightning] . +2: However , the Brennan Centre considers this a myth , stating that electoral fraud is rarer in the United States than the number of [A1 people] [V killed] [A0 by lightning] . +3: Indeed , [A0 [A2 Republican] [V lawyers]] identified only 300 cases of electoral fraud in the United States in a decade . +3: [AM-DIS Indeed] , [A0 Republican lawyers] [V identified] [A1 only 300 cases of electoral fraud in the United States] [AM-TMP in a decade] . +3: Indeed , Republican lawyers identified only 300 [V cases] [A1 of electoral fraud] in the United States in a decade . +3: Indeed , Republican lawyers identified only 300 cases of [A1 electoral] [V fraud] in the United States in a decade . +4: One thing is certain : [A0 these new provisions] [AM-MOD will] [V have] [A1 a negative impact on voter turn - out] . +4: One thing is certain : these new provisions will have a [AM-MNR negative] [V impact] [A1 on voter turn - out] . +4: One thing is certain : these new provisions will have a negative impact on [A1 voter] [A1 turn -] [V out] . +5: [AM-ADV In this sense] , [A0 the measures] [AM-MOD will] [AM-MNR partially] [V undermine] [A1 the American democratic system] . +5: In this sense , the measures will partially undermine the American [A1 democratic] [V system] . +6: Unlike in Canada , the American States are responsible for the [V organisation] [A1 of federal elections in the United States] . +6: Unlike in Canada , the American States are responsible for the organisation of [A2 federal] [V elections] [AM-LOC in the United States] . +7: It is in this spirit that a [V majority] [A1 of American governments] have passed new laws since 2009 making the registration or voting process more difficult . +7: It is in this spirit that a majority of [A0 [A2 American] [V governments]] have passed new laws since 2009 making the registration or voting process more difficult . +7: It is in this spirit that [A0 a majority of American governments] have [V passed] [A1 new laws] [AM-TMP since 2009] making the registration or voting process more difficult . +7: It is in this spirit that [A0 a majority of American governments] have passed [A1 new [V laws]] since 2009 making the registration or voting process more difficult . +7: It is in this spirit that [A0 a majority of American governments] have passed new laws since 2009 [V making] [A1 the registration or voting process] [A2 more difficult] . +7: It is in this spirit that a majority of American governments have passed new laws since 2009 making the [V registration] or voting process more difficult . +7: It is in this spirit that a majority of American governments have passed new laws since 2009 making the registration or [V voting] process more difficult . +7: It is in this spirit that a majority of American governments have passed new laws since 2009 making the registration or [A1 voting] [V process] more difficult . +8: [A1 This phenomenon] [V gained] [A2 momentum] [AM-TMP following the November 2010 elections , which saw 675 new Republican representatives added in 26 States] . +8: [A1 This phenomenon] gained [A2 [V momentum]] following the November 2010 elections , which saw 675 new Republican representatives added in 26 States . +8: This phenomenon gained momentum [V following] [A2 the November 2010 elections , which saw 675 new Republican representatives added in 26 States] . +8: This phenomenon gained momentum following the November 2010 [A0 elections] , [R-A0 which] [V saw] [A1 675 new Republican representatives] [C-A1 added in 26 States] . +8: This phenomenon gained momentum following the November 2010 elections , which saw [A0 675 new [A4 Republican] [V representatives]] added in 26 States . +8: This phenomenon gained momentum following the November 2010 elections , which saw [A1 675 new Republican representatives] [V added] [AM-LOC in 26 States] . +9: [A2 As a [V result] , 180 bills restricting the exercise of the right to vote in 41 States were introduced in 2011 alone .] +9: As a result , 180 [A0 bills] [V restricting] [A1 the exercise of the right to vote in 41 States] were introduced in 2011 alone . +9: As a result , 180 bills restricting the [V exercise] [A1 of the right to vote in 41 States] were introduced in 2011 alone . +9: As a result , 180 bills restricting the exercise of the [V right] [A1 to vote in 41 States] were introduced in 2011 alone . +9: As a result , 180 bills restricting the exercise of the right to [V vote] [AM-LOC in 41 States] were introduced in 2011 alone . +9: [AM-DIS As a result] , [A1 180 bills restricting the exercise of the right to vote in 41 States] were [V introduced] [AM-TMP in 2011 alone] . diff --git a/test/ref/test_yisi_2_srl.out b/test/ref/test_yisi_2_srl.out new file mode 100644 index 0000000..c45f324 --- /dev/null +++ b/test/ref/test_yisi_2_srl.out @@ -0,0 +1,71 @@ +Reading labelconfig from ../src/yisi_srl.labelconfig ... Done. +Reading w2v text model from mini.d300.en +Size of voc: 500 Dimension: 300 +Finished reading w2v model. +Reading w2v text model from mini.d300.de +Size of voc: 500 Dimension: 300 +Finished reading w2v model. +Setting up MATE ...[eng, -tokenize, -reranker, -lemma, /home/das011/u/sandboxes/mateplus/models/CoNLL2009-ST-English-ALL.anna-3.3.lemmatizer.model, -tagger, /home/das011/u/sandboxes/mateplus/models/CoNLL2009-ST-English-ALL.anna-3.3.postagger.model, -parser, /home/das011/u/sandboxes/mateplus/models/CoNLL2009-ST-English-ALL.anna-3.3.parser.model, -srl, /home/das011/u/sandboxes/mateplus/models/srl-EMNLP14+fs-eng.model] +34.12.536 is2.data.ParametersFloat 121:read -> read parameters 134217727 not zero 296071 +34.12.541 is2.data.Cluster 113: -> Read cluster with 0 words +34.12.541 is2.lemmatizer.Lemmatizer 192:readModel -> Loading data finished. +34.12.542 is2.lemmatizer.Lemmatizer 194:readModel -> number of params 134217727 +34.12.542 is2.lemmatizer.Lemmatizer 195:readModel -> number of classes 92 +34.18.323 is2.data.ParametersFloat 121:read -> read parameters 134217727 not zero 1613201 +34.18.324 is2.data.Cluster 113: -> Read cluster with 0 words +34.18.325 is2.tag.Lexicon 103: -> Read lexicon with 0 words +34.18.326 is2.tag.Tagger 141:readModel -> Loading data finished. +34.18.336 is2.parser.Parser 188:readModel -> Reading data started +34.18.364 is2.data.Cluster 113: -> Read cluster with 0 words +34.25.37 is2.parser.ParametersFloat 101:read -> read parameters 134217727 not zero 19957525 +34.25.37 is2.parser.Parser 201:readModel -> parsing -- li size 134217727 +34.25.44 is2.parser.Parser 211:readModel -> Stacking false +34.25.44 is2.parser.Extractor 56:initStat -> mult (d4) +Used parser class is2.parser.Parser +Creation date 2012.11.02 14:33:53 +Training data CoNLL2009-ST-English-ALL.txt.crossannotated +Iterations 10 Used sentences 10000000 +Cluster null +34.25.46 is2.parser.Parser 240:readModel -> Reading data finnished +34.25.47 is2.parser.Extractor 56:initStat -> mult (d4) +Loading pipeline from /home/das011/u/sandboxes/mateplus/models/srl-EMNLP14+fs-eng.model +Loading reranker from /home/das011/u/sandboxes/mateplus/models/srl-EMNLP14+fs-eng.model +Done. +Setting up MATE ...[ger, -tokenize, -reranker, -hybrid, -token, /home/das011/u/sandboxes/mateplus/models/de-token.bin, -lemma, /home/das011/u/sandboxes/mateplus/models/lemma-ger-3.6.model, -tagger, /home/das011/u/sandboxes/mateplus/models/tag-ger-3.6.model, -parser, /home/das011/u/sandboxes/mateplus/models/pet-ger-S2a-40-0.25-0.1-2-2-ht4-hm4-kk0, -srl, /home/das011/u/sandboxes/mateplus/models/srl-EMNLP14+fs-ger.model] +34.47.11 is2.data.ParametersFloat 142:read -> read parameters 50000000 not zero 840044 +34.47.16 is2.data.Cluster 111: -> Read cluster with 0 words +34.47.17 is2.lemmatizer.Lemmatizer 185:readModel -> Loading data finished. +34.47.17 is2.lemmatizer.Lemmatizer 187:readModel -> number of params 50000000 +34.47.17 is2.lemmatizer.Lemmatizer 188:readModel -> number of classes 336 +34.47.25 is2.transitionS2a.Parser 720:readModel -> Reading data started +34.47.196 is2.data.Cluster 111: -> Read cluster with 0 words +34.47.196 is2.transitionS2a.Lexicon 296:read -> read 0 lexicon entries. +35.2.492 is2.transitionS2a.ParametersFloat 197:read -> read parameters 300000001 not zero 64569303 +35.2.493 is2.transitionS2a.Parser 734:readModel -> parsing -- li size 300000001 +35.2.499 is2.transitionS2a.ExtractorR 82:initStat -> values 1824 +35.5.298 is2.transitionS2a.ParametersFloat 197:read -> read parameters 70000001 not zero 1456038 +35.8.436 is2.transitionS2a.ParametersFloat 197:read -> read parameters 70000001 not zero 5355132 +35.8.437 is2.transitionS2a.Parser 776:readModel -> projective false +config {tags-considered=2.0, threshold-tag=0.25, beam=40.0, tagger-iterations=2.0, mthreshold-tag=0.1, morph-tags-considered=2.0, beam-tag=4.0, beam-morph=4.0} +1st 2nd abcd 3rd ab +Used parser class is2.transitionS2apply.Parser +Creation date 2014.02.23 18:43:23 +Training data corpora/Tiger2Dep//tiger2dep.standard.train.ltm +Iterations 25 Used sentences 10000000 +Cluster null +35.8.444 is2.transitionS2a.Parser 813:readModel -> Reading data finnished +Loading pipeline from /home/das011/u/sandboxes/mateplus/models/srl-EMNLP14+fs-ger.model +Loading reranker from /home/das011/u/sandboxes/mateplus/models/srl-EMNLP14+fs-ger.model +Done. +Tokenizing/SRL-ing hyp ... Done. +Tokenizing/SRL-ing inp ... Done. +Evaluating line 1 +Evaluating line 2 +Evaluating line 3 +Evaluating line 4 +Evaluating line 5 +Evaluating line 6 +Evaluating line 7 +Evaluating line 8 +Evaluating line 9 +Evaluating line 10