Skip to content

Commit

Permalink
Fully support YiSi-2 by facilitating SRL calls for both languages.
Browse files Browse the repository at this point in the history
YiSi now supports calling mateplus/mate for both the input and the
hypotheses in a single JVM using separate class loaders within each
Mate object.

Squashed merge commit of the following:

commit 0ede584117de5f6453781d653374f009ff4f267f
Author: Darlene Stewart <[email protected]>
Date:   Wed May 22 12:13:52 2019 -0400

    Added alternate reference for srl_test.

commit 9ce21f9ad0895f0818ae205518fb649bdfab5920
Author: Darlene Stewart <[email protected]>
Date:   Tue May 21 14:28:58 2019 -0400

    Updated es.mplsconfig.template.

commit a9c328c0fc504c35da22604ac658647cd199a74f
Merge: d578e94 6a6a217
Author: Darlene Stewart <[email protected]>
Date:   Mon May 6 14:51:15 2019 -0400

    Merge branch 'dev.yisi_2_srl' into master.

    YiSi now supports calling mateplus/mate for both the input and the
    hypotheses in a single JVM using separate class loaders within each
    Mate object.

commit 6a6a21770b616ca22810f272fed4c350f3f1ac1c
Author: Darlene Stewart <[email protected]>
Date:   Mon May 6 14:41:16 2019 -0400

    Added MATETOOLS_HOME and updated zh.mplsconfig generation.

commit 3c2556b4c06bf2f2b1a2a660342eec533c9f00e7
Author: Darlene Stewart <[email protected]>
Date:   Mon May 6 12:04:26 2019 -0400

    Support alternate reference output files for YiSi SRL tests.

commit aca412c8de0ac1098e796713a2b50ba9030d6de4
Author: Darlene Stewart <[email protected]>
Date:   Fri May 3 17:48:41 2019 -0400

    Enabled test_yisi_2_srl.

commit 346ec683cc1732f7e283cdfe1f27e5db8c981453
Author: Darlene Stewart <[email protected]>
Date:   Fri May 3 17:45:24 2019 -0400

    Have each Mate object use a separate class loader to call mateplus.

    This is needed because different versions of the same underlying
    mate-tools classes are included for different languages.

commit b4baa0c280fe5b69d9440f34672bbc7143425f9e
Author: Darlene Stewart <[email protected]>
Date:   Thu Apr 18 14:30:50 2019 -0400

    Use a singleton for the JVM.
  • Loading branch information
DarleneStewart committed May 28, 2019
1 parent 7788b1f commit f487c4e
Show file tree
Hide file tree
Showing 17 changed files with 306 additions and 68 deletions.
12 changes: 9 additions & 3 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,17 @@
# export MATEPLUS_HOME=~/u/sandboxes/mateplus
MATEPLUS_HOME ?= ~/u/tools/MATE/mateplus-master/src

MATETOOLS_HOME ?= $(MATEPLUS_HOME)

JAVA_HOME ?= /space/group/nrc_ict/pkgs/centos6/gcc-4.9.3/jdk1.8.0_131

MATEPLUS_PATH ?= $(MATEPLUS_HOME)/mateplus.jar
MATETOOLS_SRL_PATH ?= $(MATETOOLS_HOME)/srl.jar

ifneq ("$(wildcard $(MATEPLUS_PATH))","")
ifneq ("$(wildcard $(MATEPLUS_PATH))$(wildcard $(MATETOOLS_SRL_PATH))","")
WITH_SRLMATE ?= True
else
$(info *** mateplus.jar not found)
$(info *** mateplus.jar and srl.jar not found)
endif

ifneq (clean, $(MAKECMDGOALS))
Expand Down Expand Up @@ -107,7 +110,10 @@ scripts: | ../bin
cd ../obj/java && jar -cvf ../srlmate.jar *

en.mplsconfig de.mplsconfig es.mplsconfig zh.mplsconfig: %: %.template
sed -e "s#<YISI_HOME>/#$(dir $(CURDIR))#g; s#<MATEPLUS_HOME>#$(MATEPLUS_HOME)#g;" < $< > $@
sed -e "s#<YISI_HOME>#$(dir $(CURDIR))#g; s#//#/#g;" \
-e "s#<MATEPLUS_HOME>#$(MATEPLUS_HOME)#g;" \
-e "s#<MATETOOLS_HOME>#$(MATETOOLS_HOME)#g;" \
< $< > $@

$(SRLMATE_BINS): LDFLAGS += -L${JAVA_HOME}/jre/lib/amd64/server
$(SRLMATE_BINS): LIBRARIES += -ljvm
Expand Down
54 changes: 47 additions & 7 deletions src/Mate.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,27 @@
package yisi;
import se.lth.cs.srl.corpus.Sentence;
import se.lth.cs.srl.options.CompletePipelineCMDLineOptions;
import se.lth.cs.srl.options.FullPipelineOptions;
import se.lth.cs.srl.util.FileExistenceVerifier;
import se.lth.cs.srl.CompletePipeline;
import se.lth.cs.srl.languages.Language;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.net.URL;
import java.net.URLClassLoader;
import java.lang.Class;
import java.lang.reflect.Method;

public class Mate {
protected CompletePipeline pipeline = null;
// protected CompletePipeline pipeline = null;
protected Object pipeline = null;
protected URLClassLoader classLoader = null;
Class<?> class_CompletePipeline = null;

public String init(String lang,
public String init(String mate_jars,
String lang,
boolean rerank,
boolean hybrid,
String token,
Expand All @@ -37,6 +46,16 @@ public String init(String lang,
String result= new String();
try {
System.setOut(System.err);

// Each Mate object has it's own class loader with it's own "classpath".
String[] mateJars = mate_jars.split(":");
URL[] urls = new URL[mateJars.length];
for (int i = 0; i < mateJars.length; ++i) {
urls[i] = new File(mateJars[i]).toURI().toURL();
// System.err.println("URL: " + urls[i]);
}
classLoader = new URLClassLoader(urls, Thread.currentThread().getContextClassLoader());

ArrayList<String> argsBuilder = new ArrayList<String>();
argsBuilder.add(lang);
argsBuilder.add("-tokenize");
Expand Down Expand Up @@ -73,13 +92,31 @@ public String init(String lang,
String[] args = new String[argsBuilder.size()];
argsBuilder.toArray(args);
System.err.println(java.util.Arrays.toString(args));
CompletePipelineCMDLineOptions options = new CompletePipelineCMDLineOptions();
options.parseCmdLineArgs(args);
String error = FileExistenceVerifier.verifyCompletePipelineAllNecessaryModelFiles(options);

Class<?> class_FullPipelineOptions = classLoader.loadClass("se.lth.cs.srl.options.FullPipelineOptions");
// CompletePipelineCMDLineOptions options = new CompletePipelineCMDLineOptions();
Class<?> class_CompletePipelineCMDLineOptions = classLoader.loadClass("se.lth.cs.srl.options.CompletePipelineCMDLineOptions");
Object options = class_CompletePipelineCMDLineOptions.newInstance();
// options.parseCmdLineArgs(args);
Method method_parseCmdLineArgs = class_CompletePipelineCMDLineOptions.getMethod("parseCmdLineArgs", String[].class);
// System.err.println("Got Method " + method_parseCmdLineArgs);
method_parseCmdLineArgs.invoke(options, (Object)args);

// String error = FileExistenceVerifier.verifyCompletePipelineAllNecessaryModelFiles(options);
Class<?> class_FileExistenceVerifier = classLoader.loadClass("se.lth.cs.srl.util.FileExistenceVerifier");
Method method_verifyCompletePipelineAllNecessaryModelFiles =
class_FileExistenceVerifier.getMethod("verifyCompletePipelineAllNecessaryModelFiles", class_FullPipelineOptions);
// System.err.println("Got Method " + method_verifyCompletePipelineAllNecessaryModelFiles);
String error = (String) method_verifyCompletePipelineAllNecessaryModelFiles.invoke(null, options);

if (error != null){
result += error + "\n";
} else {
pipeline = CompletePipeline.getCompletePipeline(options);
// pipeline = CompletePipeline.getCompletePipeline(options);
class_CompletePipeline = classLoader.loadClass("se.lth.cs.srl.CompletePipeline");
Method method_getCompletePipeline = class_CompletePipeline.getMethod("getCompletePipeline", class_FullPipelineOptions);
// System.err.println("Got Method " + method_getCompletePipeline);
pipeline = method_getCompletePipeline.invoke(null, options);
}
} catch (Exception e){
result += e.getMessage();
Expand All @@ -90,7 +127,10 @@ public String init(String lang,
public String parse(String sentence) {
String result = null;
try {
result = pipeline.parse(sentence).toString();
// result = pipeline.parse(sentence).toString();
Method method_parse = class_CompletePipeline.getMethod("parse", String.class);
// System.err.println("Got Method " + method_parse);
result = method_parse.invoke(pipeline, sentence).toString();
} catch (Exception e) {
e.printStackTrace();
System.err.println(sentence);
Expand Down
6 changes: 5 additions & 1 deletion src/de.mplsconfig.template
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
matejar=<YISI_HOME>/obj/srlmate.jar:<MATEPLUS_HOME>/mateplus.jar:<MATEPLUS_HOME>/lib/anna-3.3.jar:<MATEPLUS_HOME>/lib/opennlp-maxent-3.0.3.jar:<MATEPLUS_HOME>/lib/opennlp-tools-1.5.3.jar:<MATEPLUS_HOME>/lib/liblinear-1.92.jar:<MATEPLUS_HOME>/lib/stanford-corenlp-3.9.1.jar:<MATEPLUS_HOME>/lib/transition-1.30.jar
# yisi_home is needed to locate srlmate.jar within YiSi.
yisi_home=<YISI_HOME>
# mate_jars is a colon (:) separated list of all jar files needed to run
# mateplus or mate-tools for this language.
mate_jars=<MATEPLUS_HOME>/mateplus.jar:<MATEPLUS_HOME>/lib/opennlp-maxent-3.0.3.jar:<MATEPLUS_HOME>/lib/opennlp-tools-1.5.3.jar:<MATEPLUS_HOME>/lib/transition-1.30.jar:<MATEPLUS_HOME>/lib/stanford-corenlp-3.9.1.jar
lang=ger
rerank=1
hybrid=1
Expand Down
6 changes: 5 additions & 1 deletion src/en.mplsconfig.template
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
matejar=<YISI_HOME>/obj/srlmate.jar:<MATEPLUS_HOME>/mateplus.jar:<MATEPLUS_HOME>/lib/anna-3.3.jar:<MATEPLUS_HOME>/lib/liblinear-1.92.jar:<MATEPLUS_HOME>/lib/stanford-corenlp-3.9.1.jar
# yisi_home is needed to locate srlmate.jar within YiSi.
yisi_home=<YISI_HOME>
# mate_jars is a colon (:) separated list of all jar files needed to run
# mateplus or mate-tools for this language.
mate_jars=<MATEPLUS_HOME>/mateplus.jar:<MATEPLUS_HOME>/lib/anna-3.3.jar:<MATEPLUS_HOME>/lib/liblinear-1.92.jar:<MATEPLUS_HOME>/lib/stanford-corenlp-3.9.1.jar
lang=eng
rerank=1
hybrid=0
Expand Down
17 changes: 11 additions & 6 deletions src/es.mplsconfig.template
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
matejar=<YISI_HOME>/obj/srlmate.jar:<MATEPLUS_HOME>/lib/srl.jar:<MATEPLUS_HOME>/lib/anna-3.3.jar:<MATEPLUS_HOME>/lib/opennlp-maxent-3.0.3.jar:<MATEPLUS_HOME>/lib/opennlp-tools-1.5.3.jar:<MATEPLUS_HOME>/lib/liblinear-1.92.jar:<MATEPLUS_HOME>/lib/stanford-corenlp-3.9.1.jar:<MATEPLUS_HOME>/lib/transition-1.30.jar:<MATEPLUS_HOME>/lib/whatswrong-0.2.3.jar:<MATEPLUS_HOME>/lib/seg.jar:<MATEPLUS_HOME>/lib/slf4j-api-1.7.25.jar:<MATEPLUS_HOME>/lib/slf4j-jdk14-1.7.25.jar
# yisi_home is needed to locate srlmate.jar within YiSi.
yisi_home=<YISI_HOME>
# mate_jars is a colon (:) separated list of all jar files needed to run
# mateplus or mate-tools for this language.
mate_jars=<MATETOOLS_HOME>/lib/srl.jar:<MATETOOLS_HOME>/lib/anna-3.3.jar:<MATETOOLS_HOME>/lib/opennlp-maxent-3.0.2-incubating.jar:<MATETOOLS_HOME>/lib/opennlp-tools-1.5.2-incubating.jar:<MATETOOLS_HOME>/lib/liblinear-1.51-with-deps.jar:<MATETOOLS_HOME>/lib/whatswrong-0.2.3.jar
#mate_jars=<MATETOOLS_HOME>/lib/srl.jar:<MATETOOLS_HOME>/lib/anna-3.3.jar:<MATETOOLS_HOME>/lib/opennlp-maxent-3.0.3.jar:<MATETOOLS_HOME>/lib/opennlp-tools-1.5.3.jar:<MATETOOLS_HOME>/lib/liblinear-1.92.jar:<MATETOOLS_HOME>/lib/stanford-corenlp-3.9.1.jar:<MATETOOLS_HOME>/lib/whatswrong-0.2.3.jar:/<MATETOOLS_HOME>/lib/slf4j-api-1.7.25.jar:<MATETOOLS_HOME>/lib/slf4j-jdk14-1.7.25.jar
lang=spa
rerank=0
hybrid=0
token=
morph=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.morphtagger.model
lemma=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.lemmatizer.model
tagger=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.postagger.model
parser=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.parser.model
srl=MATEPLUS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.srl-4.21.srl-rr.model
morph=<MATETOOLS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.morphtagger.model
lemma=<MATETOOLS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.lemmatizer.model
tagger=<MATETOOLS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.postagger.model
parser=<MATETOOLS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.parser.model
srl=<MATETOOLS_HOME>/models/CoNLL2009-ST-Spanish-ALL.anna-3.3.srl-4.21.srl-rr.model
79 changes: 47 additions & 32 deletions src/srlmate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ auto name = jni_env_ptr->expr; \
} \
} \

JavaVM* srlmate_t::jvm_m = NULL;
JNIEnv* srlmate_t::jen_m = NULL;
int srlmate_t::obj_cnt_m = 0;

srlmate_t::srlmate_t(string path) {
cerr << "Setting up MATE ...";

Expand All @@ -48,7 +52,8 @@ srlmate_t::srlmate_t(string path) {
exit(1);
}

string matejar = "";
string yisi_home = "";
string mate_jars = "";
string lang = "";
string token = "";
string morph = "";
Expand All @@ -66,76 +71,83 @@ srlmate_t::srlmate_t(string path) {
string cfgn, cfgv;
getline(iss, cfgn, '=');
getline(iss, cfgv);
if (cfgn == "matejar") {
matejar = cfgv;
if (cfgn == "yisi_home") {
yisi_home = cfgv;
}
else if (cfgn == "mate_jars") {
mate_jars = cfgv;
}
if (cfgn == "lang") {
else if (cfgn == "lang") {
lang = cfgv;
}
if (cfgn == "rerank") {
else if (cfgn == "rerank") {
if ((cfgv.compare("0") == 0) || (cfgv.compare("false") == 0)) {
rerank = false;
} else {
rerank = true;
}
}
if (cfgn == "hybrid") {
else if (cfgn == "hybrid") {
if ((cfgv.compare("0") == 0) || (cfgv.compare("false") == 0)) {
hybrid = false;
} else {
hybrid = true;
}
}
if (cfgn == "token") {
else if (cfgn == "token") {
token = cfgv;
}
if (cfgn == "morph") {
else if (cfgn == "morph") {
morph = cfgv;
}
if (cfgn == "lemma") {
else if (cfgn == "lemma") {
lemma = cfgv;
}
if (cfgn == "tagger") {
else if (cfgn == "tagger") {
tagger = cfgv;
}
if (cfgn == "parser") {
else if (cfgn == "parser") {
parser = cfgv;
}
if (cfgn == "srl") {
else if (cfgn == "srl") {
srl = cfgv;
}
}

// init JVM
size_t opt_count = 2;
JavaVMOption* vm_opts = new JavaVMOption[opt_count];
string opt0 = "-Djava.class.path=" + matejar;
string opt1 = "-Xmx6g";
vm_opts[0].optionString = const_cast<char*>(opt0.c_str());
vm_opts[1].optionString = const_cast<char*>(opt1.c_str());

JavaVMInitArgs vm_args;
vm_args.version = JNI_VERSION_1_6;
vm_args.options = vm_opts;
vm_args.nOptions = opt_count;

jint result = JNI_CreateJavaVM(&jvm_m, (void**)&jen_m, &vm_args);
delete vm_opts;
if (result != JNI_OK) {
cerr << "ERROR: Failed to create Java VM. (error code = " << result << "). Exiting..." << endl;
exit(1);
jvm_m = NULL;
if (jvm_m == NULL) {
size_t opt_count = 2;
JavaVMOption* vm_opts = new JavaVMOption[opt_count];
string opt0 = "-Djava.class.path=" + yisi_home + "/obj/srlmate.jar";
string opt1 = "-Xmx12g";
vm_opts[0].optionString = const_cast<char*>(opt0.c_str());
vm_opts[1].optionString = const_cast<char*>(opt1.c_str());

JavaVMInitArgs vm_args;
vm_args.version = JNI_VERSION_1_6;
vm_args.options = vm_opts;
vm_args.nOptions = opt_count;

jint result = JNI_CreateJavaVM(&jvm_m, (void**)&jen_m, &vm_args);
delete vm_opts;
if (result != JNI_OK) {
cerr << "ERROR: Failed to create Java VM. (error code = " << result << "). Exiting..." << endl;
exit(1);
jvm_m = NULL;
}
}
++obj_cnt_m;

JNI_SAFE_CALL(mcls, jen_m, FindClass("yisi/Mate"));
JNI_SAFE_CALL(ctor, jen_m, GetMethodID(mcls, "<init>", "()V"));
JNI_SAFE_CALL(mobj, jen_m, NewObject(mcls, ctor));
JNI_SAFE_CALL(init, jen_m, GetMethodID(mcls, "init",
"(Ljava/lang/String;ZZLjava/lang/String;Ljava/lang/String;Ljava/lang/String;"
"(Ljava/lang/String;Ljava/lang/String;ZZLjava/lang/String;Ljava/lang/String;Ljava/lang/String;"
"Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"));
JNI_SAFE_CALL(jerr, jen_m,
CallObjectMethod(mobj,
init,
jen_m->NewStringUTF(mate_jars.c_str()),
jen_m->NewStringUTF(lang.c_str()),
rerank ? JNI_TRUE : JNI_FALSE,
hybrid ? JNI_TRUE : JNI_FALSE,
Expand All @@ -157,8 +169,11 @@ srlmate_t::srlmate_t(string path) {
} // srlmate_t

srlmate_t::~srlmate_t() {
if (jvm_m != NULL) {
--obj_cnt_m;
if (obj_cnt_m == 0 && jvm_m != NULL) {
jvm_m->DestroyJavaVM();
jvm_m = NULL;
jen_m = NULL;
}
}

Expand Down
5 changes: 3 additions & 2 deletions src/srlmate.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ namespace yisi {
virtual std::vector<srlgraph_t> parse(std::vector<std::string> sents);
private:
std::string noparse(std::vector<std::string> tokens);
JavaVM* jvm_m;
JNIEnv* jen_m;
static JavaVM* jvm_m;
static JNIEnv* jen_m;
static int obj_cnt_m;
jclass mate_class_m;
jobject mate_object_m;
};
Expand Down
15 changes: 10 additions & 5 deletions src/zh.mplsconfig.template
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
matejar=<YISI_HOME>/obj/srlmate.jar:<MATEPLUS_HOME>/lib/srl.jar:<MATEPLUS_HOME>/lib/anna-3.3.jar:<MATEPLUS_HOME>/lib/opennlp-maxent-3.0.3.jar:<MATEPLUS_HOME>/lib/opennlp-tools-1.5.3.jar:<MATEPLUS_HOME>/lib/liblinear-1.92.jar:<MATEPLUS_HOME>/lib/stanford-corenlp-3.9.1.jar:<MATEPLUS_HOME>/lib/transition-1.30.jar:<MATEPLUS_HOME>/lib/whatswrong-0.2.3.jar:<MATEPLUS_HOME>/lib/seg.jar:<MATEPLUS_HOME>/lib/slf4j-api-1.7.25.jar:<MATEPLUS_HOME>/lib/slf4j-jdk14-1.7.25.jar
# yisi_home is needed to locate srlmate.jar within YiSi.
yisi_home=<YISI_HOME>
# mate_jars is a colon (:) separated list of all jar files needed to run
# mateplus or mate-tools for this language.
mate_jars=<MATETOOLS_HOME>/lib/srl.jar:<MATETOOLS_HOME>/lib/anna-3.3.jar:<MATETOOLS_HOME>/lib/opennlp-maxent-3.0.2-incubating.jar:<MATETOOLS_HOME>/lib/opennlp-tools-1.5.2-incubating.jar:<MATETOOLS_HOME>/lib/liblinear-1.51-with-deps.jar:<MATETOOLS_HOME>/lib/whatswrong-0.2.3.jar:<MATETOOLS_HOME>/lib/seg.jar
#mate_jars=<MATETOOLS_HOME>/lib/srl.jar:<MATETOOLS_HOME>/lib/anna-3.3.jar:<MATETOOLS_HOME>/lib/opennlp-maxent-3.0.3.jar:<MATETOOLS_HOME>/lib/opennlp-tools-1.5.3.jar:<MATETOOLS_HOME>/lib/liblinear-1.92.jar:<MATETOOLS_HOME>/lib/whatswrong-0.2.3.jar:<MATETOOLS_HOME>/lib/stanford-segmenter-3.6.0.jar:/<MATETOOLS_HOME>/lib/slf4j-api-1.7.25.jar:<MATETOOLS_HOME>/lib/slf4j-jdk14-1.7.25.jar
lang=chi
rerank=0
hybrid=0
token=<MATEPLUS_HOME>/models/stanford-segmenter-2015-12-09/data
token=<MATETOOLS_HOME>/models/stanford-segmenter-2015-12-09/data
morph=
lemma=
tagger=<MATEPLUS_HOME>/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.postagger.model
parser=<MATEPLUS_HOME>/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.parser.model
srl=<MATEPLUS_HOME>/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.srl-4.1.srl.model
tagger=<MATETOOLS_HOME>/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.postagger.model
parser=<MATETOOLS_HOME>/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.parser.model
srl=<MATETOOLS_HOME>/models/CoNLL2009-ST-Chinese-ALL.anna-3.3.srl-4.1.srl.model
Loading

0 comments on commit f487c4e

Please sign in to comment.