Skip to content

Commit d7ece93

Browse files
Grace MuznyStanford NLP
Grace Muzny
authored and
Stanford NLP
committed
working on some propernoun chain creation
1 parent 04dc7f3 commit d7ece93

File tree

1,009 files changed

+127600
-259592
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,009 files changed

+127600
-259592
lines changed

JavaNLP-core.eml

+53-21
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<component LANGUAGE_LEVEL="JDK_1_6" inheritJdk="true">
3-
<output-test url="file://$MODULE_DIR$/classes"/>
2+
<component inheritJdk="true">
43
<exclude-output/>
54
<contentEntry url="file://$MODULE_DIR$">
65
<testFolder url="file://$MODULE_DIR$/test/src"/>
@@ -20,11 +19,6 @@
2019
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/tomcat/apache-tomcat-7.0.12-src.zip!/"/>
2120
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/tomcat/jasper.jar!/"/>
2221
</lib>
23-
<lib name="servlet-api.jar" scope="COMPILE">
24-
<srcroot url="jar://$MODULE_DIR$/libsrc/tomcat/apache-tomcat-7.0.12-src.zip!/"/>
25-
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/tomcat/apache-tomcat-7.0.12-src.zip!/"/>
26-
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/tomcat/servlet-api.jar!/"/>
27-
</lib>
2822
<lib name="tomcat-juli.jar" scope="COMPILE">
2923
<srcroot url="jar://$MODULE_DIR$/libsrc/tomcat/apache-tomcat-7.0.12-src.zip!/"/>
3024
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/tomcat/apache-tomcat-7.0.12-src.zip!/"/>
@@ -56,20 +50,21 @@
5650
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/junit.jar!/"/>
5751
</lib>
5852
<lib name="javax.servlet.jar" scope="COMPILE">
53+
<srcroot url="jar://$MODULE_DIR$/libsrc/tomcat/apache-tomcat-7.0.12-src.zip!/"/>
54+
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/tomcat/apache-tomcat-7.0.12-src.zip!/"/>
5955
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/javax.servlet.jar!/"/>
6056
</lib>
6157
<lib name="AppleJavaExtensions.jar" scope="COMPILE">
6258
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/AppleJavaExtensions.jar!/"/>
6359
</lib>
64-
<lib name="JFlex.jar" scope="COMPILE">
65-
<srcroot url="jar://$MODULE_DIR$/libsrc/jflex-1.4.3-src.zip!/"/>
66-
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/jflex-1.4.3-src.zip!/"/>
67-
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/JFlex.jar!/"/>
60+
<lib name="jflex-1.5.1.jar" scope="COMPILE">
61+
<srcroot url="jar://$MODULE_DIR$/libsrc/jflex-1.5.1-src.zip!/"/>
62+
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/jflex-1.5.1-src.zip!/"/>
63+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/jflex-1.5.1.jar!/"/>
6864
</lib>
69-
<lib name="xom-1.2.8.jar" scope="COMPILE">
70-
<srcroot url="jar://$MODULE_DIR$/libsrc/xom-src-1.2.8.zip!/"/>
71-
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/xom-src-1.2.8.zip!/"/>
72-
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/xom-1.2.8.jar!/"/>
65+
<lib name="xom-1.2.10.jar" scope="COMPILE">
66+
<srcroot url="file://libsrc/xom-1.2.10-src.zip"/>
67+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/xom-1.2.10.jar!/"/>
7368
</lib>
7469
<lib name="joda-time.jar" scope="COMPILE">
7570
<srcroot url="jar://$MODULE_DIR$/libsrc/joda-time-2.1-sources.jar!/"/>
@@ -81,14 +76,51 @@
8176
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/jollyday-0.4.7-sources.jar!/"/>
8277
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/jollyday-0.4.7.jar!/"/>
8378
</lib>
84-
<lib name="commons-lang-2.5.jar" scope="COMPILE">
85-
<srcroot url="jar://$MODULE_DIR$/libsrc/commons-lang-2.5-sources.jar!/"/>
86-
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/commons-lang-2.5-sources.jar!/"/>
87-
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/commons-lang-2.5.jar!/"/>
88-
</lib>
8979
<lib name="log4j-1.2.16.jar" scope="COMPILE">
9080
<srcroot url="jar://$MODULE_DIR$/libsrc/log4j-1.2.16-src.zip!/"/>
9181
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/log4j-1.2.16-src.zip!/"/>
9282
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/log4j-1.2.16.jar!/"/>
9383
</lib>
94-
</component>
84+
<lib name="ejml-0.23.jar" scope="COMPILE">
85+
<srcroot url="jar://$MODULE_DIR$/libsrc/ejml-0.23-src.zip!/"/>
86+
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/ejml-0.23-src.zip!/"/>
87+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/ejml-0.23.jar!/"/>
88+
</lib>
89+
<lib name="commons-lang3-3.1.jar" scope="COMPILE">
90+
<srcroot url="jar://$MODULE_DIR$/libsrc/commons-lang3-3.1-sources.jar!/"/>
91+
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/commons-lang3-3.1-sources.jar!/"/>
92+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/commons-lang3-3.1.jar!/"/>
93+
</lib>
94+
<lib name="protobuf.jar" scope="COMPILE">
95+
<srcroot url="jar://$MODULE_DIR$/libsrc/protobuf-src.jar!/"/>
96+
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/protobuf-src.jar!/"/>
97+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/protobuf.jar!/"/>
98+
</lib>
99+
<lib name="javacc.jar" scope="COMPILE">
100+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/javacc.jar!/"/>
101+
</lib>
102+
<lib name="javax.json.jar" scope="COMPILE">
103+
<srcroot url="jar://$MODULE_DIR$/libsrc/javax.json-api-1.0-sources.jar!/"/>
104+
<relative-module-src project-related="jar://$PROJECT_DIR$/projects/core/libsrc/javax.json-api-1.0-sources.jar!/"/>
105+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/javax.json.jar!/"/>
106+
</lib>
107+
<lib name="lucene-core-4.10.3.jar" scope="COMPILE">
108+
<srcroot url="file://$MODULE_DIR$/libsrc/lucene-4.10.3-src.tgz"/>
109+
<relative-module-src project-related="file://$PROJECT_DIR$/projects/core/libsrc/lucene-4.10.3-src.tgz"/>
110+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/lucene-core-4.10.3.jar!/"/>
111+
</lib>
112+
<lib name="lucene-demo-4.10.3.jar" scope="COMPILE">
113+
<srcroot url="file://$MODULE_DIR$/libsrc/lucene-4.10.3-src.tgz"/>
114+
<relative-module-src project-related="file://$PROJECT_DIR$/projects/core/libsrc/lucene-4.10.3-src.tgz"/>
115+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/lucene-demo-4.10.3.jar!/"/>
116+
</lib>
117+
<lib name="lucene-analyzers-common-4.10.3.jar" scope="COMPILE">
118+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/lucene-analyzers-common-4.10.3.jar!/"/>
119+
</lib>
120+
<lib name="lucene-queryparser-4.10.3.jar" scope="COMPILE">
121+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/lucene-queryparser-4.10.3.jar!/"/>
122+
</lib>
123+
<lib name="lucene-queries-4.10.3.jar" scope="COMPILE">
124+
<relative-module-cls project-related="jar://$PROJECT_DIR$/projects/core/lib/lucene-queries-4.10.3.jar!/"/>
125+
</lib>
126+
</component>

JavaNLP-core.iml

+1-61
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,2 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<module classpath="eclipse" classpath-dir="$MODULE_DIR$" type="JAVA_MODULE" version="4">
3-
<component name="FacetManager">
4-
<facet type="web" name="Web">
5-
<configuration>
6-
<descriptors>
7-
<deploymentDescriptor name="web.xml" url="file://$MODULE_DIR$/src/edu/stanford/nlp/parser/webapp/WEB-INF/web.xml" />
8-
</descriptors>
9-
<webroots>
10-
<root url="file://$MODULE_DIR$/src/edu/stanford/nlp/parser/webapp" relative="/" />
11-
</webroots>
12-
<sourceRoots>
13-
<root url="file://$MODULE_DIR$/itest/src" />
14-
<root url="file://$MODULE_DIR$/src" />
15-
</sourceRoots>
16-
</configuration>
17-
</facet>
18-
<facet type="web" name="Web2">
19-
<configuration>
20-
<descriptors>
21-
<deploymentDescriptor name="web.xml" url="file://$MODULE_DIR$/src/edu/stanford/nlp/time/suservlet/WEB-INF/web.xml" />
22-
</descriptors>
23-
<webroots>
24-
<root url="file://$MODULE_DIR$/src/edu/stanford/nlp/time/suservlet" relative="/" />
25-
</webroots>
26-
<sourceRoots>
27-
<root url="file://$MODULE_DIR$/itest/src" />
28-
<root url="file://$MODULE_DIR$/src" />
29-
</sourceRoots>
30-
</configuration>
31-
</facet>
32-
<facet type="web" name="Web3">
33-
<configuration>
34-
<descriptors>
35-
<deploymentDescriptor name="web.xml" url="file://$MODULE_DIR$/src/edu/stanford/nlp/ie/ner/webapp/WEB-INF/web.xml" />
36-
</descriptors>
37-
<webroots>
38-
<root url="file://$MODULE_DIR$/src/edu/stanford/nlp/ie/ner/webapp" relative="/" />
39-
</webroots>
40-
<sourceRoots>
41-
<root url="file://$MODULE_DIR$/itest/src" />
42-
<root url="file://$MODULE_DIR$/src" />
43-
</sourceRoots>
44-
</configuration>
45-
</facet>
46-
<facet type="web" name="Web4">
47-
<configuration>
48-
<descriptors>
49-
<deploymentDescriptor name="web.xml" url="file://$MODULE_DIR$/src/edu/stanford/nlp/pipeline/webapp/WEB-INF/web.xml" />
50-
</descriptors>
51-
<webroots>
52-
<root url="file://$MODULE_DIR$/src/edu/stanford/nlp/pipeline/webapp" relative="/" />
53-
</webroots>
54-
<sourceRoots>
55-
<root url="file://$MODULE_DIR$/itest/src" />
56-
<root url="file://$MODULE_DIR$/src" />
57-
</sourceRoots>
58-
</configuration>
59-
</facet>
60-
</component>
61-
</module>
62-
2+
<module classpath="eclipse" classpath-dir="$MODULE_DIR$" type="JAVA_MODULE" version="4" />

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
Stanford CoreNLP
22
================
33

4-
Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize and interpret dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases or word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for (Modern Standard) Arabic, (mainland) Chinese, French, German, and Spanish. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, industry, and government. The tools variously use rule-based, probabilistic machine learning, and deep learning components.
4+
Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, and mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for Arabic, (mainland) Chinese, French, German, and Spanish. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools on it with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, government, and industry.
55

6-
The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute to others.
6+
The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute.
77

88
You can find releases of Stanford CoreNLP on [Maven Central](http://search.maven.org/#browse%7C11864822).
99

1010
You can find more explanation and documentation on [the Stanford CoreNLP homepage](http://nlp.stanford.edu/software/corenlp.shtml#Demo).
1111

1212
The most recent models associated with the code in the HEAD of this repository can be found [here](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar).
1313

14-
For information about making contributions to Stanford CoreNLP, see the file [CONTRIBUTING.md](CONTRIBUTING.md).
14+
For information about making contributions to Stanford CoreNLP, see the file `CONTRIBUTING.md`.
1515

1616
Questions about CoreNLP can either be posted on StackOverflow with the tag [stanford-nlp](http://stackoverflow.com/questions/tagged/stanford-nlp),
1717
or on the [mailing lists](http://nlp.stanford.edu/software/corenlp.shtml#Mail).

build.gradle

+1-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ sourceCompatibility = 1.8
1111
targetCompatibility = 1.8
1212
compileJava.options.encoding = 'UTF-8'
1313

14-
version = '3.6.0'
14+
version = '3.4.1'
1515

1616
// Gradle application plugin
1717
mainClassName = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
@@ -47,7 +47,6 @@ task listDeps << {
4747

4848
dependencies {
4949
compile fileTree(dir: 'lib', include: '*.jar')
50-
testCompile fileTree(dir: 'liblocal', include: '*.jar')
5150
}
5251

5352
// Eclipse plugin setup

build.xml

-9
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,6 @@
2626
<include name="*.jar"/>
2727
<exclude name="javanlp*"/>
2828
</fileset>
29-
<fileset dir="${basedir}/liblocal">
30-
<include name="*.jar"/>
31-
<exclude name="javanlp*"/>
32-
</fileset>
3329
</path>
3430
</target>
3531

@@ -128,11 +124,6 @@
128124
<compilerarg value="-Xmaxwarns"/>
129125
<compilerarg value="10000"/> -->
130126
</javac>
131-
<copy todir="${build.path}/edu/stanford/nlp/pipeline/demo">
132-
<fileset dir="${source.path}/edu/stanford/nlp/pipeline/demo">
133-
<exclude name="**/*.java"/>
134-
</fileset>
135-
</copy>
136127
</target>
137128

138129
<target name="test" depends="classpath,compile"

data/edu/stanford/nlp/patterns/surface/example.properties

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,16 @@ outDir=SPIEDPatternsout
1717
#Number of threads available on the machine
1818
numThreads=1
1919
#***Use these options if you are limited by memory
20-
batchProcessSents = false
20+
batchProcessSents = true
2121
#This name is a misnomer. Max number of *lines* per batch file. Works only for text file format; ser files cannot be broken down
2222
numMaxSentencesPerBatchFile=100
23-
saveInvertedIndex=false
23+
saveInvertedIndex=true
2424
invertedIndexDirectory=${outDir}/invertedIndex
2525
#Loading index from invertedIndexDirectory
2626
#loadInvertedIndex=true
2727

2828
#Useful for memory heavy apps.
29-
#invertedIndexClass=edu.stanford.nlp.patterns.LuceneSentenceIndex
29+
invertedIndexClass=edu.stanford.nlp.patterns.LuceneSentenceIndex
3030

3131

3232
### Example for running it on presidents biographies. For more data examples, see the bottom of this file
@@ -43,7 +43,7 @@ saveSentencesSerDir=${outDir}/sents
4343
#fileFormat=ser
4444
#file=${outDir}/sents
4545

46-
#We are learning names of presidential candidates, places, and other names. In each line, all text after tabs are ignored in these seed files
46+
#We are learning names of presidential candidates, places, and other names
4747
seedWordsFiles=NAME,${DIR}/names.txt;PLACE,${DIR}/places.txt;OTHER,${DIR}/otherpeople.txt
4848
#Useful for matching lemmas or spelling mistakes
4949
fuzzyMatch=false
@@ -103,7 +103,7 @@ targetAllowedTagsInitialsStr=NAME,N;OTHER,N
103103
computeAllPatterns = true
104104

105105
#Options: MEMORY, DB, LUCENE. If using SQL for storing patterns for each token --- populate SQLConnection class, that is provide those properties!
106-
storePatsForEachToken=MEMORY
106+
storePatsForEachToken=LUCENE
107107
#***If your code is running too slow, try to reduce this number. Samples % of sentences for learning patterns
108108
sampleSentencesForSufficientStats=1.0
109109

data/edu/stanford/nlp/process/ptblexer.gold

+1-1
Original file line numberDiff line numberDiff line change
@@ -885,7 +885,7 @@ origins
885885
''
886886
Libyan
887887
ruler
888-
Mu`ammar
888+
Muammar
889889
al-Qaddafi
890890
referred
891891
to

data/edu/stanford/nlp/ud/feature_map.txt

-106
This file was deleted.

0 commit comments

Comments
 (0)