diff --git a/docling_core/types/doc/base.py b/docling_core/types/doc/base.py index 74daacc..5ad50d8 100644 --- a/docling_core/types/doc/base.py +++ b/docling_core/types/doc/base.py @@ -1,6 +1,5 @@ """Models for the base data types.""" -import copy from enum import Enum from typing import Tuple @@ -53,33 +52,53 @@ def height(self): """height.""" return abs(self.t - self.b) - def scaled(self, scale: float) -> "BoundingBox": - """scaled. - - :param scale: float: - - """ - out_bbox = copy.deepcopy(self) - out_bbox.l *= scale - out_bbox.r *= scale - out_bbox.t *= scale - out_bbox.b *= scale - - return out_bbox - - def normalized(self, page_size: Size) -> "BoundingBox": - """normalized. - - :param page_size: Size: - - """ - out_bbox = copy.deepcopy(self) - out_bbox.l /= page_size.width - out_bbox.r /= page_size.width - out_bbox.t /= page_size.height - out_bbox.b /= page_size.height - - return out_bbox + def resize_by_scale(self, x_scale: float, y_scale: float): + """resize_by_scale.""" + return BoundingBox( + l=self.l * x_scale, + r=self.r * x_scale, + t=self.t * y_scale, + b=self.b * y_scale, + coord_origin=self.coord_origin, + ) + + def scale_to_size(self, old_size: Size, new_size: Size): + """scale_to_size.""" + return self.resize_by_scale( + x_scale=new_size.width / old_size.width, + y_scale=new_size.height / old_size.height, + ) + + # same as before, but using the implementation above + def scaled(self, scale: float): + """scaled.""" + return self.resize_by_scale(x_scale=scale, y_scale=scale) + + # same as before, but using the implementation above + def normalized(self, page_size: Size): + """normalized.""" + return self.scale_to_size( + old_size=page_size, new_size=Size(height=1.0, width=1.0) + ) + + def expand_by_scale(self, x_scale: float, y_scale: float) -> "BoundingBox": + """expand_to_size.""" + if self.coord_origin == CoordOrigin.TOPLEFT: + return BoundingBox( + l=self.l - self.width * x_scale, + r=self.r + self.width * x_scale, + t=self.t - self.height * y_scale, + b=self.b + self.height * y_scale, + coord_origin=self.coord_origin, + ) + elif self.coord_origin == CoordOrigin.BOTTOMLEFT: + return BoundingBox( + l=self.l - self.width * x_scale, + r=self.r + self.width * x_scale, + t=self.t + self.height * y_scale, + b=self.b - self.height * y_scale, + coord_origin=self.coord_origin, + ) def as_tuple(self) -> Tuple[float, float, float, float]: """as_tuple.""" @@ -116,26 +135,27 @@ def from_tuple(cls, coord: Tuple[float, ...], origin: CoordOrigin): def area(self) -> float: """area.""" - area = (self.r - self.l) * (self.b - self.t) - if self.coord_origin == CoordOrigin.BOTTOMLEFT: - area = -area - return area + return abs(self.r - self.l) * abs(self.b - self.t) def intersection_area_with(self, other: "BoundingBox") -> float: - """intersection_area_with. - - :param other: "BoundingBox": + """Calculate the intersection area with another bounding box.""" + if self.coord_origin != other.coord_origin: + raise ValueError("BoundingBoxes have different CoordOrigin") - """ # Calculate intersection coordinates left = max(self.l, other.l) - top = max(self.t, other.t) right = min(self.r, other.r) - bottom = min(self.b, other.b) + + if self.coord_origin == CoordOrigin.TOPLEFT: + bottom = max(self.t, other.t) + top = min(self.b, other.b) + elif self.coord_origin == CoordOrigin.BOTTOMLEFT: + top = min(self.t, other.t) + bottom = max(self.b, other.b) # Calculate intersection dimensions width = right - left - height = bottom - top + height = top - bottom # If the bounding boxes do not overlap, width or height will be negative if width <= 0 or height <= 0: @@ -143,6 +163,27 @@ def intersection_area_with(self, other: "BoundingBox") -> float: return width * height + def intersection_over_union( + self, other: "BoundingBox", eps: float = 1.0e-6 + ) -> float: + """intersection_over_union.""" + intersection_area = self.intersection_area_with(other=other) + + union_area = ( + abs(self.l - self.r) * abs(self.t - self.b) + + abs(other.l - other.r) * abs(other.t - other.b) + - intersection_area + ) + + return intersection_area / (union_area + eps) + + def intersection_over_self( + self, other: "BoundingBox", eps: float = 1.0e-6 + ) -> float: + """intersection_over_self.""" + intersection_area = self.intersection_area_with(other=other) + return intersection_area / self.area() + def to_bottom_left_origin(self, page_height: float) -> "BoundingBox": """to_bottom_left_origin. @@ -176,3 +217,151 @@ def to_top_left_origin(self, page_height: float) -> "BoundingBox": b=page_height - self.b, # self.t coord_origin=CoordOrigin.TOPLEFT, ) + + def overlaps(self, other: "BoundingBox") -> bool: + """overlaps.""" + return self.overlaps_horizontally(other=other) and self.overlaps_vertically( + other=other + ) + + def overlaps_horizontally(self, other: "BoundingBox") -> bool: + """Check if two bounding boxes overlap horizontally.""" + return not (self.r <= other.l or other.r <= self.l) + + def overlaps_vertically(self, other: "BoundingBox") -> bool: + """Check if two bounding boxes overlap vertically.""" + if self.coord_origin != other.coord_origin: + raise ValueError("BoundingBoxes have different CoordOrigin") + + # Normalize coordinates if needed + if self.coord_origin == CoordOrigin.BOTTOMLEFT: + return not (self.t <= other.b or other.t <= self.b) + elif self.coord_origin == CoordOrigin.TOPLEFT: + return not (self.b <= other.t or other.b <= self.t) + + def overlaps_vertically_with_iou(self, other: "BoundingBox", iou: float) -> bool: + """overlaps_y_with_iou.""" + if ( + self.coord_origin == CoordOrigin.BOTTOMLEFT + and other.coord_origin == CoordOrigin.BOTTOMLEFT + ): + + if self.overlaps_vertically(other=other): + + u0 = min(self.b, other.b) + u1 = max(self.t, other.t) + + i0 = max(self.b, other.b) + i1 = min(self.t, other.t) + + iou_ = float(i1 - i0) / float(u1 - u0) + return (iou_) > iou + + return False + + elif ( + self.coord_origin == CoordOrigin.TOPLEFT + and other.coord_origin == CoordOrigin.TOPLEFT + ): + if self.overlaps_vertically(other=other): + u0 = min(self.t, other.t) + u1 = max(self.b, other.b) + + i0 = max(self.t, other.t) + i1 = min(self.b, other.b) + + iou_ = float(i1 - i0) / float(u1 - u0) + return (iou_) > iou + + return False + else: + raise ValueError("BoundingBoxes have different CoordOrigin") + + return False + + def is_left_of(self, other: "BoundingBox") -> bool: + """is_left_of.""" + return self.l < other.l + + def is_strictly_left_of(self, other: "BoundingBox", eps: float = 0.001) -> bool: + """is_strictly_left_of.""" + return (self.r + eps) < other.l + + def is_above(self, other: "BoundingBox") -> bool: + """is_above.""" + if ( + self.coord_origin == CoordOrigin.BOTTOMLEFT + and other.coord_origin == CoordOrigin.BOTTOMLEFT + ): + return self.t > other.t + + elif ( + self.coord_origin == CoordOrigin.TOPLEFT + and other.coord_origin == CoordOrigin.TOPLEFT + ): + return self.t < other.t + + else: + raise ValueError("BoundingBoxes have different CoordOrigin") + + return False + + def is_strictly_above(self, other: "BoundingBox", eps: float = 1.0e-3) -> bool: + """is_strictly_above.""" + if ( + self.coord_origin == CoordOrigin.BOTTOMLEFT + and other.coord_origin == CoordOrigin.BOTTOMLEFT + ): + return (self.b + eps) > other.t + + elif ( + self.coord_origin == CoordOrigin.TOPLEFT + and other.coord_origin == CoordOrigin.TOPLEFT + ): + return (self.b + eps) < other.t + + else: + raise ValueError("BoundingBoxes have different CoordOrigin") + + return False + + def is_horizontally_connected( + self, elem_i: "BoundingBox", elem_j: "BoundingBox" + ) -> bool: + """is_horizontally_connected.""" + if ( + self.coord_origin == CoordOrigin.BOTTOMLEFT + and elem_i.coord_origin == CoordOrigin.BOTTOMLEFT + and elem_j.coord_origin == CoordOrigin.BOTTOMLEFT + ): + min_ij = min(elem_i.b, elem_j.b) + max_ij = max(elem_i.t, elem_j.t) + + if self.b < max_ij and min_ij < self.t: # overlap_y + return False + + if self.l < elem_i.r and elem_j.l < self.r: + return True + + return False + + elif ( + self.coord_origin == CoordOrigin.TOPLEFT + and elem_i.coord_origin == CoordOrigin.TOPLEFT + and elem_j.coord_origin == CoordOrigin.TOPLEFT + ): + min_ij = min(elem_i.t, elem_j.t) + max_ij = max(elem_i.b, elem_j.b) + + if self.t < max_ij and min_ij < self.b: # overlap_y + return False + + if self.l < elem_i.r and elem_j.l < self.r: + return True + + return False + + else: + raise ValueError("BoundingBoxes have different CoordOrigin") + + return False diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index d168915..71e49db 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -585,7 +585,8 @@ def get_image(self, doc: "DoclingDocument") -> Optional[PILImage.Image]: crop_bbox = ( self.prov[0] .bbox.to_top_left_origin(page_height=page.size.height) - .scaled(scale=page_image.height / page.size.height) + .scale_to_size(old_size=page.size, new_size=page.image.size) + # .scaled(scale=page_image.height / page.size.height) ) return page_image.crop(crop_bbox.as_tuple()) diff --git a/test/test_docling_doc.py b/test/test_docling_doc.py index 1e1652a..c29ae59 100644 --- a/test/test_docling_doc.py +++ b/test/test_docling_doc.py @@ -10,10 +10,9 @@ from PIL import ImageDraw from pydantic import AnyUrl, ValidationError -from docling_core.types.doc.base import ImageRefMode -from docling_core.types.doc.document import ( +from docling_core.types.doc.base import BoundingBox, CoordOrigin, ImageRefMode, Size +from docling_core.types.doc.document import ( # BoundingBox, CURRENT_VERSION, - BoundingBox, CodeItem, DocItem, DoclingDocument, @@ -44,6 +43,127 @@ def test_doc_origin(): ) +def test_overlaps_horizontally(): + # Overlapping horizontally + bbox1 = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT) + bbox2 = BoundingBox(l=5, t=5, r=15, b=15, coord_origin=CoordOrigin.TOPLEFT) + assert bbox1.overlaps_horizontally(bbox2) is True + + # No overlap horizontally (disjoint on the right) + bbox3 = BoundingBox(l=11, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT) + assert bbox1.overlaps_horizontally(bbox3) is False + + # No overlap horizontally (disjoint on the left) + bbox4 = BoundingBox(l=-10, t=0, r=-1, b=10, coord_origin=CoordOrigin.TOPLEFT) + assert bbox1.overlaps_horizontally(bbox4) is False + + # Full containment + bbox5 = BoundingBox(l=2, t=2, r=8, b=8, coord_origin=CoordOrigin.TOPLEFT) + assert bbox1.overlaps_horizontally(bbox5) is True + + # Edge touching (no overlap) + bbox6 = BoundingBox(l=10, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT) + assert bbox1.overlaps_horizontally(bbox6) is False + + +def test_overlaps_vertically(): + + page_height = 300 + + # Same CoordOrigin (TOPLEFT) + bbox1 = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT) + bbox2 = BoundingBox(l=5, t=5, r=15, b=15, coord_origin=CoordOrigin.TOPLEFT) + assert bbox1.overlaps_vertically(bbox2) is True + + bbox1_ = bbox1.to_bottom_left_origin(page_height=page_height) + bbox2_ = bbox2.to_bottom_left_origin(page_height=page_height) + assert bbox1_.overlaps_vertically(bbox2_) is True + + bbox3 = BoundingBox(l=0, t=11, r=10, b=20, coord_origin=CoordOrigin.TOPLEFT) + assert bbox1.overlaps_vertically(bbox3) is False + + bbox3_ = bbox3.to_bottom_left_origin(page_height=page_height) + assert bbox1_.overlaps_vertically(bbox3_) is False + + # Same CoordOrigin (BOTTOMLEFT) + bbox4 = BoundingBox(l=0, b=20, r=10, t=30, coord_origin=CoordOrigin.BOTTOMLEFT) + bbox5 = BoundingBox(l=5, b=15, r=15, t=25, coord_origin=CoordOrigin.BOTTOMLEFT) + assert bbox4.overlaps_vertically(bbox5) is True + + bbox4_ = bbox4.to_top_left_origin(page_height=page_height) + bbox5_ = bbox5.to_top_left_origin(page_height=page_height) + assert bbox4_.overlaps_vertically(bbox5_) is True + + bbox6 = BoundingBox(l=0, b=31, r=10, t=40, coord_origin=CoordOrigin.BOTTOMLEFT) + assert bbox4.overlaps_vertically(bbox6) is False + + bbox6_ = bbox6.to_top_left_origin(page_height=page_height) + assert bbox4_.overlaps_vertically(bbox6_) is False + + # Different CoordOrigin + with pytest.raises(ValueError): + bbox1.overlaps_vertically(bbox4) + + +def test_intersection_area_with(): + page_height = 300 + + # Overlapping bounding boxes (TOPLEFT) + bbox1 = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT) + bbox2 = BoundingBox(l=5, t=5, r=15, b=15, coord_origin=CoordOrigin.TOPLEFT) + assert abs(bbox1.intersection_area_with(bbox2) - 25.0) < 1.0e-3 + + bbox1_ = bbox1.to_bottom_left_origin(page_height=page_height) + bbox2_ = bbox2.to_bottom_left_origin(page_height=page_height) + assert abs(bbox1_.intersection_area_with(bbox2_) - 25.0) < 1.0e-3 + + # Non-overlapping bounding boxes (TOPLEFT) + bbox3 = BoundingBox(l=11, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT) + assert abs(bbox1.intersection_area_with(bbox3) - 0.0) < 1.0e-3 + + # Touching edges (no intersection, TOPLEFT) + bbox4 = BoundingBox(l=10, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT) + assert abs(bbox1.intersection_area_with(bbox4) - 0.0) < 1.0e-3 + + # Fully contained (TOPLEFT) + bbox5 = BoundingBox(l=2, t=2, r=8, b=8, coord_origin=CoordOrigin.TOPLEFT) + assert abs(bbox1.intersection_area_with(bbox5) - 36.0) < 1.0e-3 + + # Overlapping bounding boxes (BOTTOMLEFT) + bbox6 = BoundingBox(l=0, t=10, r=10, b=0, coord_origin=CoordOrigin.BOTTOMLEFT) + bbox7 = BoundingBox(l=5, t=15, r=15, b=5, coord_origin=CoordOrigin.BOTTOMLEFT) + assert abs(bbox6.intersection_area_with(bbox7) - 25.0) < 1.0e-3 + + # Different CoordOrigins (raises ValueError) + with pytest.raises(ValueError): + bbox1.intersection_area_with(bbox6) + + +def test_orientation(): + + page_height = 300 + + # Same CoordOrigin (TOPLEFT) + bbox1 = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT) + bbox2 = BoundingBox(l=5, t=5, r=15, b=15, coord_origin=CoordOrigin.TOPLEFT) + bbox3 = BoundingBox(l=11, t=5, r=15, b=15, coord_origin=CoordOrigin.TOPLEFT) + bbox4 = BoundingBox(l=0, t=11, r=10, b=15, coord_origin=CoordOrigin.TOPLEFT) + + assert bbox1.is_left_of(bbox2) is True + assert bbox1.is_strictly_left_of(bbox2) is False + assert bbox1.is_strictly_left_of(bbox3) is True + + bbox1_ = bbox1.to_bottom_left_origin(page_height=page_height) + bbox2_ = bbox2.to_bottom_left_origin(page_height=page_height) + bbox3_ = bbox3.to_bottom_left_origin(page_height=page_height) + bbox4_ = bbox4.to_bottom_left_origin(page_height=page_height) + + assert bbox1.is_above(bbox2) is True + assert bbox1_.is_above(bbox2_) is True + assert bbox1.is_strictly_above(bbox4) is True + assert bbox1_.is_strictly_above(bbox4_) is True + + def test_docitems(): # Iterative function to find all subclasses