Skip to content

Commit

Permalink
fix: Handle no result from RapidOcr reader (#558)
Browse files Browse the repository at this point in the history
Signed-off-by: Christoph Auer <[email protected]>
  • Loading branch information
cau-git authored Dec 10, 2024
1 parent d0c9e8e commit f45499c
Showing 1 changed file with 18 additions and 17 deletions.
35 changes: 18 additions & 17 deletions docling/models/rapid_ocr_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,24 +118,25 @@ def __call__(
del high_res_image
del im

cells = [
OcrCell(
id=ix,
text=line[1],
confidence=line[2],
bbox=BoundingBox.from_tuple(
coord=(
(line[0][0][0] / self.scale) + ocr_rect.l,
(line[0][0][1] / self.scale) + ocr_rect.t,
(line[0][2][0] / self.scale) + ocr_rect.l,
(line[0][2][1] / self.scale) + ocr_rect.t,
if result is not None:
cells = [
OcrCell(
id=ix,
text=line[1],
confidence=line[2],
bbox=BoundingBox.from_tuple(
coord=(
(line[0][0][0] / self.scale) + ocr_rect.l,
(line[0][0][1] / self.scale) + ocr_rect.t,
(line[0][2][0] / self.scale) + ocr_rect.l,
(line[0][2][1] / self.scale) + ocr_rect.t,
),
origin=CoordOrigin.TOPLEFT,
),
origin=CoordOrigin.TOPLEFT,
),
)
for ix, line in enumerate(result)
]
all_ocr_cells.extend(cells)
)
for ix, line in enumerate(result)
]
all_ocr_cells.extend(cells)

# Post-process the cells
page.cells = self.post_process_cells(all_ocr_cells, page.cells)
Expand Down

0 comments on commit f45499c

Please sign in to comment.