Skip to content

Commit d4d31af

Browse files
authored
Merge pull request #1255 from kermitt2/docker-evaluation-image
Enable running end 2 end evaluation via a docker container
2 parents 8b9d113 + 419715e commit d4d31af

File tree

3 files changed

+480
-0
lines changed

3 files changed

+480
-0
lines changed
+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: Build and push a full docker image
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
custom_tag:
7+
type: string
8+
description: Docker image tag
9+
required: true
10+
default: "latest"
11+
12+
jobs:
13+
build:
14+
runs-on: ubuntu-latest
15+
16+
steps:
17+
- uses: actions/checkout@v4
18+
- name: Set up JDK 17
19+
uses: actions/setup-java@v4
20+
with:
21+
java-version: '17.0.10+7'
22+
distribution: 'temurin'
23+
cache: 'gradle'
24+
- name: Build with Gradle
25+
run: ./gradlew build -x test
26+
27+
docker-build-full:
28+
needs: [ build ]
29+
runs-on: ubuntu-latest
30+
31+
steps:
32+
- name: Create more disk space
33+
run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
34+
- uses: actions/checkout@v4
35+
- name: Build and push
36+
id: docker_build
37+
uses: mr-smithers-excellent/docker-build-push@v5
38+
with:
39+
username: ${{ secrets.DOCKERHUB_USERNAME_LFOPPIANO }}
40+
password: ${{ secrets.DOCKERHUB_TOKEN_LFOPPIANO }}
41+
image: lfoppiano/grobid-evaluation
42+
registry: docker.io
43+
pushImage: true
44+
tags: latest, ${{ github.event.inputs.custom_tag}}
45+
dockerfile: Dockerfile.evaluation
46+
- name: Image digest
47+
run: echo ${{ steps.docker_build.outputs.digest }}

Dockerfile.evaluation

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
## Grobid evaluation image
2+
## ------
3+
# https://grobid.readthedocs.io/en/latest/End-to-end-evaluation/
4+
# NOTE: To match the exact evaluation published in the Grobid documentation is necessary to have a running Biblio-glutton instance
5+
6+
FROM lfoppiano/grobid:0.8.2-RC1-full as runtime
7+
8+
# setting locale is likely useless but to be sure
9+
ENV LANG C.UTF-8
10+
11+
USER root
12+
13+
RUN apt-get update && \
14+
apt-get -y --no-install-recommends install unzip wget
15+
16+
WORKDIR /opt/grobid
17+
18+
# gradle
19+
COPY gradle/ ./gradle/
20+
COPY gradlew ./
21+
COPY gradle.properties ./
22+
COPY build.gradle ./
23+
COPY settings.gradle ./
24+
25+
# source
26+
COPY grobid-core/ ./grobid-core/
27+
COPY grobid-service/ ./grobid-service/
28+
COPY grobid-trainer/ ./grobid-trainer/
29+
30+
# Setting DL-powered configuration
31+
COPY grobid-home/config/grobid-evaluation.yaml grobid-home/config/config.yaml
32+
33+
RUN rm -rf /opt/grobid/grobid-home/models/*-with_ELMo \
34+
&& mkdir /opt/grobid/evaluation
35+
36+
# Download evaluation data
37+
WORKDIR /opt/grobid/evaluation
38+
RUN wget https://zenodo.org/records/3873702/files/biorxiv-10k-test-2000.zip -O biorxiv-10k-test-2000.zip \
39+
&& unzip biorxiv-10k-test-2000.zip -d biorxiv-10k-test-2000 \
40+
&& wget https://zenodo.org/records/7708580/files/eLife_984.zip -O eLife_984.zip \
41+
&& unzip eLife_984.zip -d eLife_984 \
42+
&& wget https://zenodo.org/records/7708580/files/PLOS_1000.zip -O PLOS_1000.zip \
43+
&& unzip PLOS_1000.zip -d PLOS_1000 \
44+
&& wget https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
45+
&& unzip PMC_sample_1943.zip -d PMC_sample_1943 \
46+
&& rm *.zip
47+
48+
#RUN wget -q https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
49+
# && unzip PMC_sample_1943.zip -d PMC_sample_1943 \
50+
# && rm *.zip
51+
52+
VOLUME ["/opt/grobid/grobid-home/tmp"]
53+
54+
WORKDIR /opt/grobid
55+
56+
CMD ["/bin/bash", "-c", "./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PMC_sample_1943 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/eLife_984 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PLOS_1000 -Prun=1 -PfileRatio=1;"]
57+
58+
LABEL \
59+
authors="The contributors" \
60+
org.label-schema.name="Grobid" \
61+
org.label-schema.description="Image running the Grobid End 2 end evaluation" \
62+
org.label-schema.url="https://github.com/kermitt2/Grobid" \
63+
org.label-schema.version=${GROBID_VERSION}

0 commit comments

Comments
 (0)