1
1
## Grobid evaluation image
2
- ## ------
2
+ # ------
3
3
# https://grobid.readthedocs.io/en/latest/End-to-end-evaluation/
4
- # NOTE: To match the exact evaluation published in the Grobid documentation is necessary to have a running Biblio-glutton instance
4
+ # NOTE: To match the exact evaluation published in the Grobid documentation is necessary to have a
5
+ # running Biblio-glutton instance
6
+ #
7
+ # A project using this image can be found here: https://huggingface.co/spaces/lfoppiano/grobid-evaluation
8
+ # Please notice that the evaluation is run through a python script that runs all the needed commands
9
+ # TODO: upload the evaluation in Markdown somewhere
5
10
6
11
FROM lfoppiano/grobid:0.8.2-RC1-full as runtime
7
12
@@ -11,7 +16,7 @@ ENV LANG C.UTF-8
11
16
USER root
12
17
13
18
RUN apt-get update && \
14
- apt-get -y --no-install-recommends install unzip wget
19
+ apt-get -y --no-install-recommends install unzip wget git git-lfs
15
20
16
21
WORKDIR /opt/grobid
17
22
@@ -27,33 +32,24 @@ COPY grobid-core/ ./grobid-core/
27
32
COPY grobid-service/ ./grobid-service/
28
33
COPY grobid-trainer/ ./grobid-trainer/
29
34
30
- # Setting DL-powered configuration
35
+ # Setting DL-powered configuration + biblio-glutton
31
36
COPY grobid-home/config/grobid-evaluation.yaml grobid-home/config/config.yaml
32
37
33
38
RUN rm -rf /opt/grobid/grobid-home/models/*-with_ELMo \
34
39
&& mkdir /opt/grobid/evaluation
35
40
36
- # Download evaluation data
41
+ # Download evaluation data (For space reasons, we are not downloading the evaluation data) \
42
+ # See https://huggingface.co/spaces/lfoppiano/grobid-evaluation/blob/main/Dockerfile
37
43
WORKDIR /opt/grobid/evaluation
38
- RUN wget https://zenodo.org/records/3873702/files/biorxiv-10k-test-2000.zip -O biorxiv-10k-test-2000.zip \
39
- && unzip biorxiv-10k-test-2000.zip -d biorxiv-10k-test-2000 \
40
- && wget https://zenodo.org/records/7708580/files/eLife_984.zip -O eLife_984.zip \
41
- && unzip eLife_984.zip -d eLife_984 \
42
- && wget https://zenodo.org/records/7708580/files/PLOS_1000.zip -O PLOS_1000.zip \
43
- && unzip PLOS_1000.zip -d PLOS_1000 \
44
- && wget https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
45
- && unzip PMC_sample_1943.zip -d PMC_sample_1943 \
46
- && rm *.zip
47
-
48
- #RUN wget -q https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
49
- # && unzip PMC_sample_1943.zip -d PMC_sample_1943 \
50
- # && rm *.zip
44
+ #RUN git lfs install && git clone --depth 1 https://huggingface.co/datasets/sciencialab/grobid-evaluation evaluation
45
+ #RUN chmod -R uog+rw /opt/grobid/evaluation
51
46
52
47
VOLUME ["/opt/grobid/grobid-home/tmp"]
53
48
54
49
WORKDIR /opt/grobid
55
50
56
51
CMD ["/bin/bash", "-c", "./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PMC_sample_1943 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/eLife_984 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PLOS_1000 -Prun=1 -PfileRatio=1;"]
52
+ #CMD ["/bin/bash", "-c", "./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PMC_sample_1943 -Prun=1 -PfileRatio=1;"]
57
53
58
54
LABEL \
59
55
authors="The contributors" \
0 commit comments