diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py
index b0e1007f..da72252b 100644
--- a/docling_core/types/doc/document.py
+++ b/docling_core/types/doc/document.py
@@ -5,6 +5,7 @@
import hashlib
import html
import json
+import logging
import mimetypes
import os
import re
@@ -20,6 +21,7 @@
from xml.sax.saxutils import unescape
import latex2mathml.converter
+import latex2mathml.exceptions
import pandas as pd
import yaml
from PIL import Image as PILImage
@@ -44,6 +46,8 @@
from docling_core.types.doc.tokens import DocumentToken, TableToken
from docling_core.types.doc.utils import relative_path
+_logger = logging.getLogger(__name__)
+
Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))]
LevelNumber = typing.Annotated[int, Field(ge=1, le=100)]
CURRENT_VERSION: Final = "1.0.0"
@@ -2487,34 +2491,47 @@ def _prepare_tag_content(
)
text = ""
- # If the formula is not processed correcty, use its image
- if (
- item.text == ""
- and item.orig != ""
- and image_mode == ImageRefMode.EMBEDDED
- and len(item.prov) > 0
- ):
+ def _image_fallback(item: TextItem):
item_image = item.get_image(doc=self)
if item_image is not None:
img_ref = ImageRef.from_pil(item_image, dpi=72)
- text = (
+ return (
"'
"
{math_formula}" elif math_formula != "": text = f"
{math_formula}"