File tree Expand file tree Collapse file tree 2 files changed +19
-0
lines changed Expand file tree Collapse file tree 2 files changed +19
-0
lines changed Original file line number Diff line number Diff line change 1
1
import importlib
2
2
import json
3
3
import logging
4
+ import re
4
5
import time
5
6
import warnings
6
7
from enum import Enum
@@ -129,6 +130,12 @@ def export_documents(
129
130
)
130
131
131
132
133
+ def _split_list (raw : Optional [str ]) -> Optional [List [str ]]:
134
+ if raw is None :
135
+ return None
136
+ return re .split (r"[;,]" , raw )
137
+
138
+
132
139
@app .command (no_args_is_help = True )
133
140
def convert (
134
141
input_sources : Annotated [
@@ -163,6 +170,13 @@ def convert(
163
170
ocr_engine : Annotated [
164
171
OcrEngine , typer .Option (..., help = "The OCR engine to use." )
165
172
] = OcrEngine .EASYOCR ,
173
+ ocr_lang : Annotated [
174
+ Optional [str ],
175
+ typer .Option (
176
+ ...,
177
+ help = "Provide a comma-separated list of languages used by the OCR engine. Note that each OCR engine has different values for the language names." ,
178
+ ),
179
+ ] = None ,
166
180
pdf_backend : Annotated [
167
181
PdfBackend , typer .Option (..., help = "The PDF backend to use." )
168
182
] = PdfBackend .DLPARSE_V1 ,
@@ -248,6 +262,10 @@ def convert(
248
262
case _:
249
263
raise RuntimeError (f"Unexpected OCR engine type { ocr_engine } " )
250
264
265
+ ocr_lang_list = _split_list (ocr_lang )
266
+ if ocr_lang_list is not None :
267
+ ocr_options .lang = ocr_lang_list
268
+
251
269
pipeline_options = PdfPipelineOptions (
252
270
do_ocr = ocr ,
253
271
ocr_options = ocr_options ,
Original file line number Diff line number Diff line change @@ -22,6 +22,7 @@ class TableStructureOptions(BaseModel):
22
22
23
23
class OcrOptions (BaseModel ):
24
24
kind : str
25
+ lang : List [str ]
25
26
force_full_page_ocr : bool = False # If enabled a full page OCR is always applied
26
27
bitmap_area_threshold : float = (
27
28
0.05 # percentage of the area for a bitmap to processed with OCR
You can’t perform that action at this time.
0 commit comments