Skip to content

Commit

Permalink
fix: Correct scaling of debug visualizations, tune OCR (#700)
Browse files Browse the repository at this point in the history
* fix: Correct scaling of debug visualizations, tune OCR

Signed-off-by: Christoph Auer <[email protected]>

* chore: remove unused imports

Signed-off-by: Christoph Auer <[email protected]>

* chore: Update docling-core

Signed-off-by: Christoph Auer <[email protected]>

---------

Signed-off-by: Christoph Auer <[email protected]>
  • Loading branch information
cau-git authored Jan 8, 2025
1 parent ead396a commit 5cb4cf6
Show file tree
Hide file tree
Showing 5 changed files with 769 additions and 842 deletions.
15 changes: 14 additions & 1 deletion docling/models/base_ocr_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,31 @@ def post_process_cells(self, ocr_cells, programmatic_cells):

def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
image = copy.deepcopy(page.image)
scale_x = image.width / page.size.width
scale_y = image.height / page.size.height

draw = ImageDraw.Draw(image, "RGBA")

# Draw OCR rectangles as yellow filled rect
for rect in ocr_rects:
x0, y0, x1, y1 = rect.as_tuple()
y0 *= scale_x
y1 *= scale_y
x0 *= scale_x
x1 *= scale_x

shade_color = (255, 255, 0, 40) # transparent yellow
draw.rectangle([(x0, y0), (x1, y1)], fill=shade_color, outline=None)

# Draw OCR and programmatic cells
for tc in page.cells:
x0, y0, x1, y1 = tc.bbox.as_tuple()
color = "red"
y0 *= scale_x
y1 *= scale_y
x0 *= scale_x
x1 *= scale_x

color = "gray"
if isinstance(tc, OcrCell):
color = "magenta"
draw.rectangle([(x0, y0), (x1, y1)], outline=color)
Expand Down
43 changes: 18 additions & 25 deletions docling/models/layout_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,29 +67,9 @@ def draw_clusters_and_cells_side_by_side(
- Right: Clusters including FORM, KEY_VALUE_REGION, and PICTURE.
Includes label names and confidence scores for each cluster.
"""
label_to_color = {
DocItemLabel.TEXT: (255, 255, 153), # Light Yellow
DocItemLabel.CAPTION: (255, 204, 153), # Light Orange
DocItemLabel.LIST_ITEM: (153, 153, 255), # Light Purple
DocItemLabel.FORMULA: (192, 192, 192), # Gray
DocItemLabel.TABLE: (255, 204, 204), # Light Pink
DocItemLabel.PICTURE: (255, 204, 164), # Light Beige
DocItemLabel.SECTION_HEADER: (255, 153, 153), # Light Red
DocItemLabel.PAGE_HEADER: (204, 255, 204), # Light Green
DocItemLabel.PAGE_FOOTER: (
204,
255,
204,
), # Light Green (same as Page-Header)
DocItemLabel.TITLE: (255, 153, 153), # Light Red (same as Section-Header)
DocItemLabel.FOOTNOTE: (200, 200, 255), # Light Blue
DocItemLabel.DOCUMENT_INDEX: (220, 220, 220), # Light Gray
DocItemLabel.CODE: (125, 125, 125), # Gray
DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193), # Pale Green
DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193), # Light Pink
DocItemLabel.FORM: (200, 255, 255), # Light Cyan
DocItemLabel.KEY_VALUE_REGION: (183, 65, 14), # Rusty orange
}
scale_x = page.image.width / page.size.width
scale_y = page.image.height / page.size.height

# Filter clusters for left and right images
exclude_labels = {
DocItemLabel.FORM,
Expand Down Expand Up @@ -118,15 +98,28 @@ def draw_clusters(image, clusters):
cell_color = (0, 0, 0, 40) # Transparent black for cells
for tc in c.cells:
cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
cx0 *= scale_x
cx1 *= scale_x
cy0 *= scale_x
cy1 *= scale_y

draw.rectangle(
[(cx0, cy0), (cx1, cy1)],
outline=None,
fill=cell_color,
)
# Draw cluster rectangle
x0, y0, x1, y1 = c.bbox.as_tuple()
cluster_fill_color = (*list(label_to_color.get(c.label)), 70)
cluster_outline_color = (*list(label_to_color.get(c.label)), 255)
x0 *= scale_x
x1 *= scale_x
y0 *= scale_x
y1 *= scale_y

cluster_fill_color = (*list(DocItemLabel.get_color(c.label)), 70)
cluster_outline_color = (
*list(DocItemLabel.get_color(c.label)),
255,
)
draw.rectangle(
[(x0, y0), (x1, y1)],
outline=cluster_outline_color,
Expand Down
20 changes: 20 additions & 0 deletions docling/models/table_structure_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,23 +66,43 @@ def draw_table_and_cells(
show: bool = False,
):
assert page._backend is not None
assert page.size is not None

image = (
page._backend.get_page_image()
) # make new image to avoid drawing on the saved ones

scale_x = image.width / page.size.width
scale_y = image.height / page.size.height

draw = ImageDraw.Draw(image)

for table_element in tbl_list:
x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
y0 *= scale_x
y1 *= scale_y
x0 *= scale_x
x1 *= scale_x

draw.rectangle([(x0, y0), (x1, y1)], outline="red")

for cell in table_element.cluster.cells:
x0, y0, x1, y1 = cell.bbox.as_tuple()
x0 *= scale_x
x1 *= scale_x
y0 *= scale_x
y1 *= scale_y

draw.rectangle([(x0, y0), (x1, y1)], outline="green")

for tc in table_element.table_cells:
if tc.bbox is not None:
x0, y0, x1, y1 = tc.bbox.as_tuple()
x0 *= scale_x
x1 *= scale_x
y0 *= scale_x
y1 *= scale_y

if tc.column_header:
width = 3
else:
Expand Down
Loading

0 comments on commit 5cb4cf6

Please sign in to comment.