From 25c681328268af038e6246b533c5fba16fdd4c3e Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Fri, 31 Jan 2025 15:24:27 +0100 Subject: [PATCH] make mathml the default Signed-off-by: Michele Dolfi --- docling_core/types/doc/document.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index eb56e7cc..a3616a4a 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -2288,7 +2288,7 @@ def save_as_html( to_element: int = sys.maxsize, labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS, image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER, - formula_to_mathml: bool = False, + formula_to_mathml: bool = True, page_no: Optional[int] = None, html_lang: str = "en", html_head: str = _HTML_DEFAULT_HEAD, @@ -2355,7 +2355,7 @@ def export_to_html( # noqa: C901 to_element: int = sys.maxsize, labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS, image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER, - formula_to_mathml: bool = False, + formula_to_mathml: bool = True, page_no: Optional[int] = None, html_lang: str = "en", html_head: str = _HTML_DEFAULT_HEAD, @@ -2479,7 +2479,13 @@ def _prepare_tag_content( annotation.text = math_formula mathml = unescape(tostring(mathml_element, encoding="unicode")) text = f"
{mathml}
" - else: + + elif ( + item.text == "" + and item.orig != "" + and image_mode == ImageRefMode.EMBEDDED + ): + text = f"
{math_formula}
" html_texts.append(text)