Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added the geometric operations to BoundingBox #136

Merged
merged 4 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 228 additions & 39 deletions docling_core/types/doc/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Models for the base data types."""

import copy
from enum import Enum
from typing import Tuple

Expand Down Expand Up @@ -53,33 +52,53 @@ def height(self):
"""height."""
return abs(self.t - self.b)

def scaled(self, scale: float) -> "BoundingBox":
"""scaled.

:param scale: float:

"""
out_bbox = copy.deepcopy(self)
out_bbox.l *= scale
out_bbox.r *= scale
out_bbox.t *= scale
out_bbox.b *= scale

return out_bbox

def normalized(self, page_size: Size) -> "BoundingBox":
"""normalized.

:param page_size: Size:

"""
out_bbox = copy.deepcopy(self)
out_bbox.l /= page_size.width
out_bbox.r /= page_size.width
out_bbox.t /= page_size.height
out_bbox.b /= page_size.height

return out_bbox
def resize_by_scale(self, x_scale: float, y_scale: float):
"""resize_by_scale."""
return BoundingBox(
l=self.l * x_scale,
r=self.r * x_scale,
t=self.t * y_scale,
b=self.b * y_scale,
coord_origin=self.coord_origin,
)

def scale_to_size(self, old_size: Size, new_size: Size):
"""scale_to_size."""
return self.resize_by_scale(
x_scale=new_size.width / old_size.width,
y_scale=new_size.height / old_size.height,
)

# same as before, but using the implementation above
def scaled(self, scale: float):
"""scaled."""
return self.resize_by_scale(x_scale=scale, y_scale=scale)

# same as before, but using the implementation above
def normalized(self, page_size: Size):
"""normalized."""
return self.scale_to_size(
old_size=page_size, new_size=Size(height=1.0, width=1.0)
)

def expand_by_scale(self, x_scale: float, y_scale: float) -> "BoundingBox":
"""expand_to_size."""
if self.coord_origin == CoordOrigin.TOPLEFT:
return BoundingBox(
l=self.l - self.width * x_scale,
r=self.r + self.width * x_scale,
t=self.t - self.height * y_scale,
b=self.b + self.height * y_scale,
coord_origin=self.coord_origin,
)
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
return BoundingBox(
l=self.l - self.width * x_scale,
r=self.r + self.width * x_scale,
t=self.t + self.height * y_scale,
b=self.b - self.height * y_scale,
coord_origin=self.coord_origin,
)

def as_tuple(self) -> Tuple[float, float, float, float]:
"""as_tuple."""
Expand Down Expand Up @@ -116,33 +135,55 @@ def from_tuple(cls, coord: Tuple[float, ...], origin: CoordOrigin):

def area(self) -> float:
"""area."""
area = (self.r - self.l) * (self.b - self.t)
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
area = -area
return area
return abs(self.r - self.l) * abs(self.b - self.t)

def intersection_area_with(self, other: "BoundingBox") -> float:
"""intersection_area_with.

:param other: "BoundingBox":
"""Calculate the intersection area with another bounding box."""
if self.coord_origin != other.coord_origin:
raise ValueError("BoundingBoxes have different CoordOrigin")

"""
# Calculate intersection coordinates
left = max(self.l, other.l)
top = max(self.t, other.t)
right = min(self.r, other.r)
bottom = min(self.b, other.b)

if self.coord_origin == CoordOrigin.TOPLEFT:
bottom = max(self.t, other.t)
top = min(self.b, other.b)
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
top = min(self.t, other.t)
bottom = max(self.b, other.b)

# Calculate intersection dimensions
width = right - left
height = bottom - top
height = top - bottom

# If the bounding boxes do not overlap, width or height will be negative
if width <= 0 or height <= 0:
return 0.0

return width * height

def intersection_over_union(
self, other: "BoundingBox", eps: float = 1.0e-6
) -> float:
"""intersection_over_union."""
intersection_area = self.intersection_area_with(other=other)

union_area = (
abs(self.l - self.r) * abs(self.t - self.b)
+ abs(other.l - other.r) * abs(other.t - other.b)
- intersection_area
)

return intersection_area / (union_area + eps)

def intersection_over_self(
self, other: "BoundingBox", eps: float = 1.0e-6
) -> float:
"""intersection_over_self."""
intersection_area = self.intersection_area_with(other=other)
return intersection_area / self.area()

def to_bottom_left_origin(self, page_height: float) -> "BoundingBox":
"""to_bottom_left_origin.

Expand Down Expand Up @@ -176,3 +217,151 @@ def to_top_left_origin(self, page_height: float) -> "BoundingBox":
b=page_height - self.b, # self.t
coord_origin=CoordOrigin.TOPLEFT,
)

def overlaps(self, other: "BoundingBox") -> bool:
"""overlaps."""
return self.overlaps_horizontally(other=other) and self.overlaps_vertically(
other=other
)

def overlaps_horizontally(self, other: "BoundingBox") -> bool:
"""Check if two bounding boxes overlap horizontally."""
return not (self.r <= other.l or other.r <= self.l)

def overlaps_vertically(self, other: "BoundingBox") -> bool:
"""Check if two bounding boxes overlap vertically."""
if self.coord_origin != other.coord_origin:
raise ValueError("BoundingBoxes have different CoordOrigin")

# Normalize coordinates if needed
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
return not (self.t <= other.b or other.t <= self.b)
elif self.coord_origin == CoordOrigin.TOPLEFT:
return not (self.b <= other.t or other.b <= self.t)

def overlaps_vertically_with_iou(self, other: "BoundingBox", iou: float) -> bool:
"""overlaps_y_with_iou."""
if (
self.coord_origin == CoordOrigin.BOTTOMLEFT
and other.coord_origin == CoordOrigin.BOTTOMLEFT
):

if self.overlaps_vertically(other=other):

u0 = min(self.b, other.b)
u1 = max(self.t, other.t)

i0 = max(self.b, other.b)
i1 = min(self.t, other.t)

iou_ = float(i1 - i0) / float(u1 - u0)
return (iou_) > iou

return False

elif (
self.coord_origin == CoordOrigin.TOPLEFT
and other.coord_origin == CoordOrigin.TOPLEFT
):
if self.overlaps_vertically(other=other):
u0 = min(self.t, other.t)
u1 = max(self.b, other.b)

i0 = max(self.t, other.t)
i1 = min(self.b, other.b)

iou_ = float(i1 - i0) / float(u1 - u0)
return (iou_) > iou

return False
else:
raise ValueError("BoundingBoxes have different CoordOrigin")

return False

def is_left_of(self, other: "BoundingBox") -> bool:
"""is_left_of."""
return self.l < other.l

def is_strictly_left_of(self, other: "BoundingBox", eps: float = 0.001) -> bool:
"""is_strictly_left_of."""
return (self.r + eps) < other.l

def is_above(self, other: "BoundingBox") -> bool:
"""is_above."""
if (
self.coord_origin == CoordOrigin.BOTTOMLEFT
and other.coord_origin == CoordOrigin.BOTTOMLEFT
):
return self.t > other.t

elif (
self.coord_origin == CoordOrigin.TOPLEFT
and other.coord_origin == CoordOrigin.TOPLEFT
):
return self.t < other.t

else:
raise ValueError("BoundingBoxes have different CoordOrigin")

return False

def is_strictly_above(self, other: "BoundingBox", eps: float = 1.0e-3) -> bool:
"""is_strictly_above."""
if (
self.coord_origin == CoordOrigin.BOTTOMLEFT
and other.coord_origin == CoordOrigin.BOTTOMLEFT
):
return (self.b + eps) > other.t

elif (
self.coord_origin == CoordOrigin.TOPLEFT
and other.coord_origin == CoordOrigin.TOPLEFT
):
return (self.b + eps) < other.t

else:
raise ValueError("BoundingBoxes have different CoordOrigin")

return False

def is_horizontally_connected(
self, elem_i: "BoundingBox", elem_j: "BoundingBox"
) -> bool:
"""is_horizontally_connected."""
if (
self.coord_origin == CoordOrigin.BOTTOMLEFT
and elem_i.coord_origin == CoordOrigin.BOTTOMLEFT
and elem_j.coord_origin == CoordOrigin.BOTTOMLEFT
):
min_ij = min(elem_i.b, elem_j.b)
max_ij = max(elem_i.t, elem_j.t)

if self.b < max_ij and min_ij < self.t: # overlap_y
return False

if self.l < elem_i.r and elem_j.l < self.r:
return True

return False

elif (
self.coord_origin == CoordOrigin.TOPLEFT
and elem_i.coord_origin == CoordOrigin.TOPLEFT
and elem_j.coord_origin == CoordOrigin.TOPLEFT
):
min_ij = min(elem_i.t, elem_j.t)
max_ij = max(elem_i.b, elem_j.b)

if self.t < max_ij and min_ij < self.b: # overlap_y
return False

if self.l < elem_i.r and elem_j.l < self.r:
return True

return False

else:
raise ValueError("BoundingBoxes have different CoordOrigin")

return False
3 changes: 2 additions & 1 deletion docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,8 @@ def get_image(self, doc: "DoclingDocument") -> Optional[PILImage.Image]:
crop_bbox = (
self.prov[0]
.bbox.to_top_left_origin(page_height=page.size.height)
.scaled(scale=page_image.height / page.size.height)
.scale_to_size(old_size=page.size, new_size=page.image.size)
# .scaled(scale=page_image.height / page.size.height)
)
return page_image.crop(crop_bbox.as_tuple())

Expand Down
Loading