From 6e6267bafd04bd50e00b4f0cbe5af67f793a2d14 Mon Sep 17 00:00:00 2001 From: Rafael Teixeira de Lima Date: Tue, 28 Jan 2025 13:39:54 +0100 Subject: [PATCH] Remove prints and backend flag --- docling/backend/docx_latex/omml.py | 11 ++--------- docling/backend/msword_backend.py | 13 ++----------- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/docling/backend/docx_latex/omml.py b/docling/backend/docx_latex/omml.py index 5a5e24c1..68f2519e 100644 --- a/docling/backend/docx_latex/omml.py +++ b/docling/backend/docx_latex/omml.py @@ -187,7 +187,7 @@ def __init__(self, element): self._latex = self.process_children(element) def __str__(self): - return self.latex + return self.latex.replace(" ", " ") def __unicode__(self): return self.__str__(self) @@ -231,20 +231,13 @@ def do_d(self, elm): pr = c_dict["dPr"] null = D_DEFAULT.get("null") - print(pr.text) s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T) - print(pr.begChr, D_DEFAULT.get("left"), s_val) - e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T) - print(pr.endChr, D_DEFAULT.get("right"), s_val) - delim = pr.text + D.format( left=null if not s_val else escape_latex(s_val), text=c_dict["e"], right=null if not e_val else escape_latex(e_val), ) - print(delim) - print() return delim def do_spre(self, elm): @@ -355,7 +348,7 @@ def do_m(self, elm): """ rows = [] for stag, t, e in self.process_children_list(elm): - if stag is "mPr": + if stag == "mPr": pass elif stag == "mr": rows.append(t) diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index 4ad59f1c..15625583 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -27,12 +27,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): - def __init__( - self, - in_doc: "InputDocument", - path_or_stream: Union[BytesIO, Path], - get_latex=False, - ): + def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): super().__init__(in_doc, path_or_stream) self.XML_KEY = ( "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val" @@ -54,9 +49,6 @@ def __init__( self.level = 0 self.listIter = 0 - # Transform MSWord equations to latex - self.get_latex = get_latex - self.history = { "names": [None], "levels": [None], @@ -248,8 +240,7 @@ def handle_text_elements(self, element, docx_obj, doc): paragraph = docx.text.paragraph.Paragraph(element, docx_obj) text = paragraph.text - if self.get_latex: - text = self.handle_equations_in_text(element=element, text=text) + text = self.handle_equations_in_text(element=element, text=text) if text is None: return