From 9b5e482d1ec102813ecb8602744325e998ef3920 Mon Sep 17 00:00:00 2001 From: Rafael Teixeira de Lima Date: Mon, 27 Jan 2025 10:02:21 +0100 Subject: [PATCH] pre commit fixes, issue with pylatexenc --- docling/backend/docx_latex/omml.py | 48 ++++++++++++++++++------------ docling/backend/msword_backend.py | 3 +- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/docling/backend/docx_latex/omml.py b/docling/backend/docx_latex/omml.py index 4483c31d..471b241a 100644 --- a/docling/backend/docx_latex/omml.py +++ b/docling/backend/docx_latex/omml.py @@ -5,35 +5,36 @@ On 23/01/2025 """ -from pylatexenc.latexencode import UnicodeToLatexEncoder import lxml.etree as ET +import pylatexenc # type: ignore + from docling.backend.docx_latex.latex_dict import ( + ALN, + ARR, + BACKSLASH, + BLANK, + BRK, CHARS, CHR, CHR_BO, CHR_DEFAULT, - POS, - POS_DEFAULT, - SUB, - SUP, - F, + D_DEFAULT, F_DEFAULT, - T, FUNC, - D, - D_DEFAULT, - RAD, - RAD_DEFAULT, - ARR, + FUNC_PLACE, LIM_FUNC, LIM_TO, LIM_UPP, + POS, + POS_DEFAULT, + RAD, + RAD_DEFAULT, + SUB, + SUP, + D, + F, M, - BRK, - BLANK, - BACKSLASH, - ALN, - FUNC_PLACE, + T, ) OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}" @@ -176,7 +177,7 @@ class oMath2Latex(Tag2Method): _t_dict = T __direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e") - u = UnicodeToLatexEncoder( + u = pylatexenc.latexencode.UnicodeToLatexEncoder( replacement_latex_protection="braces-all", unknown_char_policy="keep", unknown_char_warning=False, @@ -229,13 +230,22 @@ def do_d(self, elm): c_dict = self.process_children_dict(elm) pr = c_dict["dPr"] null = D_DEFAULT.get("null") + + print(pr.text) s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T) + print(pr.begChr, D_DEFAULT.get("left"), s_val) + e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T) - return pr.text + D.format( + print(pr.endChr, D_DEFAULT.get("right"), s_val) + + delim = pr.text + D.format( left=null if not s_val else escape_latex(s_val), text=c_dict["e"], right=null if not e_val else escape_latex(e_val), ) + print(delim) + print() + return delim def do_spre(self, elm): """ diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index a8776732..69c00b4b 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -19,11 +19,10 @@ from PIL import Image, UnidentifiedImageError from docling.backend.abstract_backend import DeclarativeDocumentBackend +from docling.backend.docx_latex.omml import oMath2Latex from docling.datamodel.base_models import InputFormat from docling.datamodel.document import InputDocument -from docling.backend.docx_latex.omml import oMath2Latex - _log = logging.getLogger(__name__)