From f45499ce9349fe55538dfb36d74c395e9193d9b1 Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:25:05 +0100 Subject: [PATCH] fix: Handle no result from RapidOcr reader (#558) Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> --- docling/models/rapid_ocr_model.py | 35 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py index 7fd5a3d4..b40dbf6a 100644 --- a/docling/models/rapid_ocr_model.py +++ b/docling/models/rapid_ocr_model.py @@ -118,24 +118,25 @@ def __call__( del high_res_image del im - cells = [ - OcrCell( - id=ix, - text=line[1], - confidence=line[2], - bbox=BoundingBox.from_tuple( - coord=( - (line[0][0][0] / self.scale) + ocr_rect.l, - (line[0][0][1] / self.scale) + ocr_rect.t, - (line[0][2][0] / self.scale) + ocr_rect.l, - (line[0][2][1] / self.scale) + ocr_rect.t, + if result is not None: + cells = [ + OcrCell( + id=ix, + text=line[1], + confidence=line[2], + bbox=BoundingBox.from_tuple( + coord=( + (line[0][0][0] / self.scale) + ocr_rect.l, + (line[0][0][1] / self.scale) + ocr_rect.t, + (line[0][2][0] / self.scale) + ocr_rect.l, + (line[0][2][1] / self.scale) + ocr_rect.t, + ), + origin=CoordOrigin.TOPLEFT, ), - origin=CoordOrigin.TOPLEFT, - ), - ) - for ix, line in enumerate(result) - ] - all_ocr_cells.extend(cells) + ) + for ix, line in enumerate(result) + ] + all_ocr_cells.extend(cells) # Post-process the cells page.cells = self.post_process_cells(all_ocr_cells, page.cells)