Skip to content

Commit 020758d

Browse files
authored
Merge pull request #1267 from kermitt2/feature/docker-evaluation-image
End to end evaluation docker image follow-up
2 parents 989b8d5 + 0837eaf commit 020758d

File tree

3 files changed

+16
-20
lines changed

3 files changed

+16
-20
lines changed

.github/workflows/ci-build-manual-eval.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Build and push a full docker image
1+
name: Build and push the Grobid ent to end evaluation docker image
22

33
on:
44
workflow_dispatch:

Dockerfile.evaluation

+14-18
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
## Grobid evaluation image
2-
## ------
2+
# ------
33
# https://grobid.readthedocs.io/en/latest/End-to-end-evaluation/
4-
# NOTE: To match the exact evaluation published in the Grobid documentation is necessary to have a running Biblio-glutton instance
4+
# NOTE: To match the exact evaluation published in the Grobid documentation is necessary to have a
5+
# running Biblio-glutton instance
6+
#
7+
# A project using this image can be found here: https://huggingface.co/spaces/lfoppiano/grobid-evaluation
8+
# Please notice that the evaluation is run through a python script that runs all the needed commands
9+
# TODO: upload the evaluation in Markdown somewhere
510

611
FROM lfoppiano/grobid:0.8.2-RC1-full as runtime
712

@@ -11,7 +16,7 @@ ENV LANG C.UTF-8
1116
USER root
1217

1318
RUN apt-get update && \
14-
apt-get -y --no-install-recommends install unzip wget
19+
apt-get -y --no-install-recommends install unzip wget git git-lfs
1520

1621
WORKDIR /opt/grobid
1722

@@ -27,33 +32,24 @@ COPY grobid-core/ ./grobid-core/
2732
COPY grobid-service/ ./grobid-service/
2833
COPY grobid-trainer/ ./grobid-trainer/
2934

30-
# Setting DL-powered configuration
35+
# Setting DL-powered configuration + biblio-glutton
3136
COPY grobid-home/config/grobid-evaluation.yaml grobid-home/config/config.yaml
3237

3338
RUN rm -rf /opt/grobid/grobid-home/models/*-with_ELMo \
3439
&& mkdir /opt/grobid/evaluation
3540

36-
# Download evaluation data
41+
# Download evaluation data (For space reasons, we are not downloading the evaluation data) \
42+
# See https://huggingface.co/spaces/lfoppiano/grobid-evaluation/blob/main/Dockerfile
3743
WORKDIR /opt/grobid/evaluation
38-
RUN wget https://zenodo.org/records/3873702/files/biorxiv-10k-test-2000.zip -O biorxiv-10k-test-2000.zip \
39-
&& unzip biorxiv-10k-test-2000.zip -d biorxiv-10k-test-2000 \
40-
&& wget https://zenodo.org/records/7708580/files/eLife_984.zip -O eLife_984.zip \
41-
&& unzip eLife_984.zip -d eLife_984 \
42-
&& wget https://zenodo.org/records/7708580/files/PLOS_1000.zip -O PLOS_1000.zip \
43-
&& unzip PLOS_1000.zip -d PLOS_1000 \
44-
&& wget https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
45-
&& unzip PMC_sample_1943.zip -d PMC_sample_1943 \
46-
&& rm *.zip
47-
48-
#RUN wget -q https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
49-
# && unzip PMC_sample_1943.zip -d PMC_sample_1943 \
50-
# && rm *.zip
44+
#RUN git lfs install && git clone --depth 1 https://huggingface.co/datasets/sciencialab/grobid-evaluation evaluation
45+
#RUN chmod -R uog+rw /opt/grobid/evaluation
5146

5247
VOLUME ["/opt/grobid/grobid-home/tmp"]
5348

5449
WORKDIR /opt/grobid
5550

5651
CMD ["/bin/bash", "-c", "./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PMC_sample_1943 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/eLife_984 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PLOS_1000 -Prun=1 -PfileRatio=1;"]
52+
#CMD ["/bin/bash", "-c", "./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PMC_sample_1943 -Prun=1 -PfileRatio=1;"]
5753

5854
LABEL \
5955
authors="The contributors" \

grobid-home/config/grobid-evaluation.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ grobid:
6767
# DeLFT global parameters
6868
# delft installation path if Deep Learning architectures are used to implement one of the sequence labeling model,
6969
# embeddings are usually compiled as lmdb under delft/data (this parameter is ignored if only featured-engineered CRF are used)
70-
install: "../delft"
70+
install: "/opt/delft"
7171
pythonVirtualEnv:
7272

7373
wapiti:

0 commit comments

Comments
 (0)