From d3f84b2457125feacd0c21d6513e7ae69a308ea5 Mon Sep 17 00:00:00 2001 From: Gaspard Petit Date: Tue, 3 Dec 2024 04:22:03 -0500 Subject: [PATCH] fix: PermissionError when using tesseract_ocr_cli_model (#496) Signed-off-by: Gaspard Petit --- docling/models/tesseract_ocr_cli_model.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 9a50eee0..ababe670 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -1,5 +1,6 @@ import io import logging +import os import tempfile from subprocess import DEVNULL, PIPE, Popen from typing import Iterable, Optional, Tuple @@ -130,14 +131,17 @@ def __call__( high_res_image = page._backend.get_page_image( scale=self.scale, cropbox=ocr_rect ) - - with tempfile.NamedTemporaryFile( - suffix=".png", mode="w" - ) as image_file: - fname = image_file.name - high_res_image.save(fname) + try: + with tempfile.NamedTemporaryFile( + suffix=".png", mode="w+b", delete=False + ) as image_file: + fname = image_file.name + high_res_image.save(image_file) df = self._run_tesseract(fname) + finally: + if os.path.exists(fname): + os.remove(fname) # _log.info(df)