Skip to content

Commit

Permalink
fix: PermissionError when using tesseract_ocr_cli_model (#496)
Browse files Browse the repository at this point in the history
Signed-off-by: Gaspard Petit <[email protected]>
  • Loading branch information
gaspardpetit authored Dec 3, 2024
1 parent 33cff98 commit d3f84b2
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions docling/models/tesseract_ocr_cli_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import io
import logging
import os
import tempfile
from subprocess import DEVNULL, PIPE, Popen
from typing import Iterable, Optional, Tuple
Expand Down Expand Up @@ -130,14 +131,17 @@ def __call__(
high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect
)

with tempfile.NamedTemporaryFile(
suffix=".png", mode="w"
) as image_file:
fname = image_file.name
high_res_image.save(fname)
try:
with tempfile.NamedTemporaryFile(
suffix=".png", mode="w+b", delete=False
) as image_file:
fname = image_file.name
high_res_image.save(image_file)

df = self._run_tesseract(fname)
finally:
if os.path.exists(fname):
os.remove(fname)

# _log.info(df)

Expand Down

0 comments on commit d3f84b2

Please sign in to comment.