Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added the geometric operations to BoundingBox #136

Merged
merged 4 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 136 additions & 12 deletions docling_core/types/doc/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,21 @@ def scaled(self, scale: float) -> "BoundingBox":
:param scale: float:

"""
out_bbox = copy.deepcopy(self)
out_bbox.l *= scale
out_bbox.r *= scale
out_bbox.t *= scale
out_bbox.b *= scale

return out_bbox
return self.scale_to_size(page_size=Size(width=scale, height=scale))

def normalized(self, page_size: Size) -> "BoundingBox":
"""normalized.

:param page_size: Size:

"""
return self.normalize_to_size(page_size=page_size)

def normalize_to_size(self, page_size: Size) -> "BoundingBox":
"""normalize_to_size.

:param page_size: Size:

"""
out_bbox = copy.deepcopy(self)
out_bbox.l /= page_size.width
Expand All @@ -81,6 +83,20 @@ def normalized(self, page_size: Size) -> "BoundingBox":

return out_bbox

def scale_to_size(self, page_size: Size) -> "BoundingBox":
PeterStaar-IBM marked this conversation as resolved.
Show resolved Hide resolved
"""scale_to_size.

:param page_size: Size:

"""
out_bbox = copy.deepcopy(self)
out_bbox.l *= page_size.width
out_bbox.r *= page_size.width
out_bbox.t *= page_size.height
out_bbox.b *= page_size.height

return out_bbox

def as_tuple(self) -> Tuple[float, float, float, float]:
"""as_tuple."""
if self.coord_origin == CoordOrigin.TOPLEFT:
Expand Down Expand Up @@ -121,17 +137,20 @@ def area(self) -> float:
area = -area
return area

def intersection_area_with(self, other: "BoundingBox") -> float:
def intersection_area_with(self, other: "BoundingBox", page_height: float) -> float:
"""intersection_area_with.

:param other: "BoundingBox":

"""
self_bl = self.to_bottom_left_origin(page_height=page_height)
other_bl = other.to_bottom_left_origin(page_height=page_height)

# Calculate intersection coordinates
left = max(self.l, other.l)
top = max(self.t, other.t)
right = min(self.r, other.r)
bottom = min(self.b, other.b)
left = max(self_bl.l, other_bl.l)
top = max(self_bl.t, other_bl.t)
right = min(self_bl.r, other_bl.r)
bottom = min(self_bl.b, other_bl.b)

# Calculate intersection dimensions
width = right - left
Expand All @@ -143,6 +162,20 @@ def intersection_area_with(self, other: "BoundingBox") -> float:

return width * height

def intersection_over_union(
self, other: "BoundingBox", page_height: float, eps: float = 1.0e-6
) -> float:
"""intersection_over_union."""
intersection_area = self.intersection_area_with(other, page_height=page_height)

union_area = (
abs(self.l - self.r) * abs(self.t - self.b)
+ abs(other.l - other.r) * abs(other.t - other.b)
- intersection_area
)

return intersection_area / (union_area + eps)

def to_bottom_left_origin(self, page_height: float) -> "BoundingBox":
"""to_bottom_left_origin.

Expand Down Expand Up @@ -176,3 +209,94 @@ def to_top_left_origin(self, page_height: float) -> "BoundingBox":
b=page_height - self.b, # self.t
coord_origin=CoordOrigin.TOPLEFT,
)

def overlaps(self, other: "BoundingBox", page_height: float) -> bool:
"""overlaps."""
return self.overlaps_horizontally(other=other) and self.overlaps_vertically(
other=other, page_height=page_height
)

def overlaps_horizontally(self, other: "BoundingBox") -> bool:
"""overlaps_x."""
return (
(self.l <= other.l and other.l < self.r)
or (self.l <= other.r and other.r < self.r)
or (other.l <= self.l and self.l < other.r)
or (other.l <= self.r and self.r < other.r)
)

def overlaps_vertically(self, other: "BoundingBox", page_height: float) -> bool:
"""overlaps_y."""
self_bl = self.to_bottom_left_origin(page_height=page_height)
other_bl = other.to_bottom_left_origin(page_height=page_height)

return (
(self_bl.b <= other_bl.b and other_bl.b < self_bl.t)
or (self_bl.b <= other_bl.t and other_bl.t < self_bl.t)
or (other_bl.b <= self_bl.b and self_bl.b < other_bl.t)
or (other_bl.b <= self_bl.t and self_bl.t < other_bl.t)
)

def overlaps_vertically_with_iou(
self, other: "BoundingBox", iou: float, page_height: float
) -> bool:
"""overlaps_y_with_iou."""
self_bl = self.to_bottom_left_origin(page_height=page_height)
other_bl = other.to_bottom_left_origin(page_height=page_height)

if self_bl.overlaps_vertically(other=other_bl, page_height=page_height):

u0 = min(self_bl.b, other_bl.b)
u1 = max(self_bl.t, other_bl.t)

i0 = max(self_bl.b, other_bl.b)
i1 = min(self_bl.t, other_bl.t)

iou_ = float(i1 - i0) / float(u1 - u0)
return (iou_) > iou

return False

def is_left_of(self, other: "BoundingBox") -> bool:
"""is_left_of."""
return self.r < other.r

def is_strictly_left_of(self, other: "BoundingBox", eps: float = 0.001) -> bool:
"""is_strictly_left_of."""
return (self.r + eps) < other.l

def is_above_of(self, other: "BoundingBox", page_height: float) -> bool:
"""is_above."""
self_bl = self.to_bottom_left_origin(page_height=page_height)
other_bl = other.to_bottom_left_origin(page_height=page_height)

return self_bl.t > other_bl.t

def is_strictly_above_of(
self, other: "BoundingBox", page_height: float, eps: float = 0.001
) -> bool:
"""is_strictly_above."""
self_bl = self.to_bottom_left_origin(page_height=page_height)
other_bl = other.to_bottom_left_origin(page_height=page_height)

return (self_bl.b + eps) > other_bl.t

def is_horizontally_connected(
self, elem_i: "BoundingBox", elem_j: "BoundingBox", page_height: float
) -> bool:
"""is_horizontally_connected."""
self_bl = self.to_bottom_left_origin(page_height=page_height)

elem_i_bl = elem_i.to_bottom_left_origin(page_height=page_height)
elem_j_bl = elem_j.to_bottom_left_origin(page_height=page_height)

min_ij: float = min(elem_i_bl.b, elem_j_bl.b)
max_ij: float = max(elem_i_bl.t, elem_j_bl.t)

if self_bl.b < max_ij and min_ij < self_bl.t: # overlap_y
return False

if self_bl.l < elem_i_bl.r and elem_j_bl.l < self_bl.r:
return True

return False
46 changes: 43 additions & 3 deletions test/test_docling_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
from PIL import ImageDraw
from pydantic import AnyUrl, ValidationError

from docling_core.types.doc.base import ImageRefMode
from docling_core.types.doc.document import (
from docling_core.types.doc.base import BoundingBox, CoordOrigin, ImageRefMode, Size
from docling_core.types.doc.document import ( # BoundingBox,
CURRENT_VERSION,
BoundingBox,
CodeItem,
DocItem,
DoclingDocument,
Expand Down Expand Up @@ -44,6 +43,47 @@ def test_doc_origin():
)


def test_boundingbox():

page_height = 300.0

bbox_i_bl = BoundingBox(l=10, r=30, b=20, t=40, coord_origin=CoordOrigin.BOTTOMLEFT)

bbox_j_bl = BoundingBox(l=20, r=40, b=30, t=50, coord_origin=CoordOrigin.BOTTOMLEFT)

bbox_k_bl = BoundingBox(l=40, r=70, b=50, t=70, coord_origin=CoordOrigin.BOTTOMLEFT)

bbox_p_bl = BoundingBox(l=50, r=70, b=20, t=40, coord_origin=CoordOrigin.BOTTOMLEFT)

bbox_q_bl = BoundingBox(l=20, r=60, b=50, t=70, coord_origin=CoordOrigin.BOTTOMLEFT)

bbox_r_bl = BoundingBox(l=20, r=60, b=40, t=70, coord_origin=CoordOrigin.BOTTOMLEFT)

bbox_i_tl = bbox_i_bl.to_top_left_origin(page_height=page_height)
bbox_j_tl = bbox_j_bl.to_top_left_origin(page_height=page_height)
bbox_k_tl = bbox_k_bl.to_top_left_origin(page_height=page_height)

i_area_bl = bbox_i_bl.intersection_area_with(bbox_j_bl, page_height=page_height)
i_area_tl = bbox_i_tl.intersection_area_with(bbox_j_tl, page_height=page_height)

assert abs(i_area_tl - i_area_bl) < 1.0e-6, "abs(i_area_tl-i_area_bl)<1.0e-6"

assert bbox_k_bl.is_strictly_above_of(bbox_i_bl, page_height=page_height)
assert bbox_k_tl.is_strictly_above_of(bbox_i_bl, page_height=page_height)
assert bbox_k_tl.is_strictly_above_of(bbox_i_tl, page_height=page_height)

assert bbox_i_bl.is_strictly_left_of(bbox_k_bl)
assert bbox_i_tl.is_strictly_left_of(bbox_k_bl)
assert bbox_i_tl.is_strictly_left_of(bbox_k_tl)

assert bbox_q_bl.is_horizontally_connected(
bbox_i_bl, bbox_p_bl, page_height=page_height
)
assert bbox_q_bl.is_horizontally_connected(
bbox_p_bl, bbox_i_bl, page_height=page_height
)


def test_docitems():

# Iterative function to find all subclasses
Expand Down
Loading