diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index e9c51d69d..d828beee8 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -24,6 +24,11 @@ class DocInputType(str, Enum): STREAM = auto() +class TableFormerMode(str, Enum): + FAST = auto() + ACCURATE = auto() + + class CoordOrigin(str, Enum): TOPLEFT = auto() BOTTOMLEFT = auto() @@ -305,6 +310,7 @@ class TableStructureOptions(BaseModel): # are merged across table columns. # False: Let table structure model define the text cells, ignore PDF cells. ) + mode: TableFormerMode = TableFormerMode.FAST class PipelineOptions(BaseModel): diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py index 388a0f9ed..2eb16191d 100644 --- a/docling/models/table_structure_model.py +++ b/docling/models/table_structure_model.py @@ -1,4 +1,6 @@ import copy +import os.path +from pathlib import Path from typing import Iterable, List import numpy @@ -10,6 +12,7 @@ Page, TableCell, TableElement, + TableFormerMode, TableStructurePrediction, ) @@ -18,10 +21,15 @@ class TableStructureModel: def __init__(self, config): self.config = config self.do_cell_matching = config["do_cell_matching"] + self.mode = config["mode"] self.enabled = config["enabled"] if self.enabled: - artifacts_path = config["artifacts_path"] + artifacts_path: Path = config["artifacts_path"] + + if self.mode == TableFormerMode.ACCURATE: + artifacts_path = artifacts_path / "fat" + # Third Party import docling_ibm_models.tableformer.common as c diff --git a/docling/pipeline/standard_model_pipeline.py b/docling/pipeline/standard_model_pipeline.py index 1abf59a3a..b43dc4210 100644 --- a/docling/pipeline/standard_model_pipeline.py +++ b/docling/pipeline/standard_model_pipeline.py @@ -32,6 +32,7 @@ def __init__(self, artifacts_path: Path, pipeline_options: PipelineOptions): "artifacts_path": artifacts_path / StandardModelPipeline._table_model_path, "enabled": pipeline_options.do_table_structure, + "mode": pipeline_options.table_structure_options.mode, "do_cell_matching": pipeline_options.table_structure_options.do_cell_matching, } ),