From b154d46830a66c059dc8835d0f8405c0eb79841d Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Thu, 19 Sep 2024 18:41:08 +0200 Subject: [PATCH] Add test unit for table options Signed-off-by: Christoph Auer --- tests/test_options.py | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/test_options.py diff --git a/tests/test_options.py b/tests/test_options.py new file mode 100644 index 000000000..3adefc9ca --- /dev/null +++ b/tests/test_options.py @@ -0,0 +1,44 @@ +from pathlib import Path + +import pytest + +from docling.backend.docling_parse_backend import DoclingParseDocumentBackend +from docling.datamodel.base_models import ConversionStatus +from docling.datamodel.document import ConversionResult +from docling.datamodel.pipeline_options import PipelineOptions, TableFormerMode +from docling.document_converter import DocumentConverter + +from .verify_utils import verify_conversion_result + +GENERATE = False + + +@pytest.fixture +def test_doc_path(): + return Path("./tests/data/2206.01062.pdf") + + +def get_converters_with_table_options(): + for cell_matching in [True, False]: + for mode in [TableFormerMode.FAST, TableFormerMode.ACCURATE]: + pipeline_options = PipelineOptions() + pipeline_options.do_ocr = False + pipeline_options.do_table_structure = True + pipeline_options.table_structure_options.do_cell_matching = cell_matching + pipeline_options.table_structure_options.mode = mode + + converter = DocumentConverter( + pipeline_options=pipeline_options, + pdf_backend=DoclingParseDocumentBackend, + ) + + yield converter + + +def test_e2e_conversions(test_doc_path): + for converter in get_converters_with_table_options(): + print(f"converting {test_doc_path}") + + doc_result: ConversionResult = converter.convert_single(test_doc_path) + + assert doc_result.status == ConversionStatus.SUCCESS