From 61be78a875121b2583b0b62510187bb25e0a1c48 Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:32:30 +0200 Subject: [PATCH] Fix class re-mapping for table of contents (#33) Signed-off-by: Christoph Auer Co-authored-by: Christoph Auer --- docling/models/layout_model.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index 1b1f2246..93f80d54 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -69,6 +69,10 @@ def postprocess(self, clusters: List[Cluster], cells: List[Cell], page_height): "Key-Value Region": 0.45, } + CLASS_REMAPPINGS = { + "Document Index": "Table", + } + _log.debug("================= Start postprocess function ====================") start_time = time.time() # Apply Confidence Threshold to cluster predictions @@ -79,6 +83,10 @@ def postprocess(self, clusters: List[Cluster], cells: List[Cell], page_height): confidence = CLASS_THRESHOLDS[cluster.label] if cluster.confidence >= confidence: # annotation["created_by"] = "high_conf_pred" + + # Remap class labels where needed. + if cluster.label in CLASS_REMAPPINGS.keys(): + cluster.label = CLASS_REMAPPINGS[cluster.label] clusters_out.append(cluster) # map to dictionary clusters and cells, with bottom left origin