From d2f9f050ce623693926214445ede00704e9927b8 Mon Sep 17 00:00:00 2001 From: Yorick Terweijden Date: Wed, 22 Jan 2025 15:38:28 +0200 Subject: [PATCH] Expose `rec_keys_path` in RapidOcrOptions to support custom dictionaries - Added `rec_keys_path` to `RapidOcrOptions` to align with RapidOCR's capability to use custom character dictionaries. - Passed `rec_keys_path` to `RapidOcrModel` initialization, ensuring the recognition model can load the correct dictionary (e.g., for Latin characters). Signed-off-by: Yorick Terweijden --- docling/datamodel/pipeline_options.py | 1 + docling/models/rapid_ocr_model.py | 1 + 2 files changed, 2 insertions(+) diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index eeec6bab..d23fa631 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -125,6 +125,7 @@ class RapidOcrOptions(OcrOptions): det_model_path: Optional[str] = None # same default as rapidocr cls_model_path: Optional[str] = None # same default as rapidocr rec_model_path: Optional[str] = None # same default as rapidocr + rec_keys_path: Optional[str] = None # same default as rapidocr model_config = ConfigDict( extra="forbid", diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py index 5882ffc7..fa3fbedf 100644 --- a/docling/models/rapid_ocr_model.py +++ b/docling/models/rapid_ocr_model.py @@ -59,6 +59,7 @@ def __init__( det_model_path=self.options.det_model_path, cls_model_path=self.options.cls_model_path, rec_model_path=self.options.rec_model_path, + rec_keys_path=self.options.rec_keys_path, ) def __call__(