From afd443972c5d5a26a856a932839b85ef2aba8c7d Mon Sep 17 00:00:00 2001 From: Casey Hilland <casey.hilland@gmail.com> Date: Fri, 30 Mar 2018 22:12:08 -0400 Subject: [PATCH 1/2] Update corenlp and add shift reduce parser --- Dockerfile | 24 +++++++++++++++++------- README.md | 11 +++++++++-- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2e8cb8b..b77c27e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,24 @@ -FROM java:8 +FROM maven:alpine -MAINTAINER Casey Hilland <casey dot hilland at gmail dot com> +RUN apk add --update --no-cache \ + unzip wget -RUN wget http://nlp.stanford.edu/software/stanford-corenlp-full-2015-12-09.zip -RUN unzip stanford-corenlp-full-2015-12-09.zip && rm stanford-corenlp-full-2015-12-09.zip +RUN wget http://nlp.stanford.edu/software/stanford-corenlp-full-2018-02-27.zip +RUN unzip stanford-corenlp-full-2018-02-27.zip && \ + rm stanford-corenlp-full-2018-02-27.zip -WORKDIR stanford-corenlp-full-2015-12-09 +WORKDIR stanford-corenlp-full-2018-02-27 + +RUN wget https://nlp.stanford.edu/software/stanford-srparser-2014-10-23-models.jar +RUN mvn install:install-file -Dfile=stanford-srparser-2014-10-23-models.jar \ + -DgroupId=edu.stanford.nlp -DartifactId=stanford-srparser \ + -Dversion=3.5.2 -Dpackaging=jar RUN export CLASSPATH="`find . -name '*.jar'`" -EXPOSE 9000 +ENV PORT 9000 + +EXPOSE $PORT -CMD java -cp "*" -mx4g edu.stanford.nlp.pipeline.StanfordCoreNLPServer +CMD java -cp "*" -mx4g edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port \ + $PORT -parse.model edu/stanford/nlp/models/srparser/englishSR.ser.gz diff --git a/README.md b/README.md index ea39624..8a0f8cf 100644 --- a/README.md +++ b/README.md @@ -3,5 +3,12 @@ Dockerfile for Stanford CoreNLP Server --------- This Dockerfile builds the [Stanford CoreNLP -Server](http://stanfordnlp.github.io/CoreNLP/corenlp-server.html) and exposes -the endpoint on port 9000. Requests are made as covered in the documentation. +Server](http://stanfordnlp.github.io/CoreNLP/corenlp-server.html) and +shift-reduce parser. It exposes the endpoint on port 9000. Requests +are made as covered in the documentation. + +## Build + +```shell +docker build -t corenlp:3.9.1 . +``` From 8b48000756efb1bb34fcc6480fc51015b80b4d6b Mon Sep 17 00:00:00 2001 From: Casey Hilland <casey.hilland@gmail.com> Date: Thu, 10 Dec 2020 17:56:54 -0500 Subject: [PATCH 2/2] Catch up versions, add CI --- .github/workflows/docker.yml | 18 ++++++++++++++++++ Dockerfile | 17 ++++++++--------- README.md | 14 ++++++++++++-- 3 files changed, 38 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/docker.yml diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..5dfb3b3 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,18 @@ +name: Publish Docker image +on: + release: + types: [published] +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v2 + - name: Push to Docker Hub + uses: docker/build-push-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + repository: chilland/corenlp + tag_with_ref: true diff --git a/Dockerfile b/Dockerfile index b77c27e..37c20ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,11 +3,13 @@ FROM maven:alpine RUN apk add --update --no-cache \ unzip wget -RUN wget http://nlp.stanford.edu/software/stanford-corenlp-full-2018-02-27.zip -RUN unzip stanford-corenlp-full-2018-02-27.zip && \ - rm stanford-corenlp-full-2018-02-27.zip +ARG CORENLP_VERSION="4.2.0" -WORKDIR stanford-corenlp-full-2018-02-27 +RUN wget http://nlp.stanford.edu/software/stanford-corenlp-${CORENLP_VERSION}.zip +RUN unzip stanford-corenlp-${CORENLP_VERSION}.zip && \ + rm stanford-corenlp-${CORENLP_VERSION}.zip + +WORKDIR stanford-corenlp-${CORENLP_VERSION} RUN wget https://nlp.stanford.edu/software/stanford-srparser-2014-10-23-models.jar RUN mvn install:install-file -Dfile=stanford-srparser-2014-10-23-models.jar \ @@ -16,9 +18,6 @@ RUN mvn install:install-file -Dfile=stanford-srparser-2014-10-23-models.jar \ RUN export CLASSPATH="`find . -name '*.jar'`" -ENV PORT 9000 - -EXPOSE $PORT +EXPOSE 9000 -CMD java -cp "*" -mx4g edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port \ - $PORT -parse.model edu/stanford/nlp/models/srparser/englishSR.ser.gz +CMD java -cp "*" -mx4g edu.stanford.nlp.pipeline.StanfordCoreNLPServer -parse.model edu/stanford/nlp/models/srparser/englishSR.ser.gz diff --git a/README.md b/README.md index 8a0f8cf..2454cb4 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,20 @@ Dockerfile for Stanford CoreNLP Server This Dockerfile builds the [Stanford CoreNLP Server](http://stanfordnlp.github.io/CoreNLP/corenlp-server.html) and shift-reduce parser. It exposes the endpoint on port 9000. Requests -are made as covered in the documentation. +are made as covered in the documentation. Including the shift-reduce parser +makes the image rather large (2.5gb+). You can specify the version you want to +build via `--build-arg`. ## Build ```shell -docker build -t corenlp:3.9.1 . +docker build --build-arg CORENLP_VERSION=${CORENLP_VERSION} -t corenlp:${CORENLP_VERSION} . +``` + +## Run +The container runs the server with some simple defaults and runs the jar with +4gb of memory. The command can be overriden when you start the container. For +example: +```shell +docker run -p 9000:9000 -d corenlp java -cp "*" -mx15g edu.stanford.nlp.pipeline.StanfordCoreNLPServer -parse.model edu/stanford/nlp/models/srparser/englishSR.ser.gz -preload tokenize,ssplit,pos,lemma,depparse,ner,kbp,relation,coref,quote,sentiment -quiet ```