diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 9a50eee0..ababe670 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -1,5 +1,6 @@ import io import logging +import os import tempfile from subprocess import DEVNULL, PIPE, Popen from typing import Iterable, Optional, Tuple @@ -130,14 +131,17 @@ def __call__( high_res_image = page._backend.get_page_image( scale=self.scale, cropbox=ocr_rect ) - - with tempfile.NamedTemporaryFile( - suffix=".png", mode="w" - ) as image_file: - fname = image_file.name - high_res_image.save(fname) + try: + with tempfile.NamedTemporaryFile( + suffix=".png", mode="w+b", delete=False + ) as image_file: + fname = image_file.name + high_res_image.save(image_file) df = self._run_tesseract(fname) + finally: + if os.path.exists(fname): + os.remove(fname) # _log.info(df)