Skip to content

Commit c9739b2

Browse files
authored
feat: add escaping_underscores option to markdown export (#135)
feat: add escaping_underscores option to markdown BREAKING CHANGE: export to text no longer escapes underscores. Add `escaping_underscores` option to `export_to_markdown(). Set default value to `escaping_underscores` to True. Signed-off-by: Vdaleke <[email protected]>
1 parent f02bbae commit c9739b2

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

docling_core/types/doc/document.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1995,6 +1995,7 @@ def save_as_markdown(
19951995
to_element: int = sys.maxsize,
19961996
labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
19971997
strict_text: bool = False,
1998+
escaping_underscores: bool = True,
19981999
image_placeholder: str = "<!-- image -->",
19992000
image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
20002001
indent: int = 4,
@@ -2017,6 +2018,7 @@ def save_as_markdown(
20172018
to_element=to_element,
20182019
labels=labels,
20192020
strict_text=strict_text,
2021+
escaping_underscores=escaping_underscores,
20202022
image_placeholder=image_placeholder,
20212023
image_mode=image_mode,
20222024
indent=indent,
@@ -2034,6 +2036,7 @@ def export_to_markdown( # noqa: C901
20342036
to_element: int = sys.maxsize,
20352037
labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
20362038
strict_text: bool = False,
2039+
escaping_underscores: bool = True,
20372040
image_placeholder: str = "<!-- image -->",
20382041
image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
20392042
indent: int = 4,
@@ -2059,6 +2062,9 @@ def export_to_markdown( # noqa: C901
20592062
:param strict_text: bool: Whether to only include the text content
20602063
of the document. (Default value = False).
20612064
:type strict_text: bool = False
2065+
:param escaping_underscores: bool: Whether to escape underscores in the
2066+
text content of the document. (Default value = True).
2067+
:type escaping_underscores: bool = True
20622068
:param image_placeholder: The placeholder to include to position
20632069
images in the markdown. (Default value = "\<!-- image --\>").
20642070
:type image_placeholder: str = "<!-- image -->"
@@ -2227,7 +2233,8 @@ def escape_underscores(text):
22272233

22282234
return "".join(parts)
22292235

2230-
mdtext = escape_underscores(mdtext)
2236+
if escaping_underscores:
2237+
mdtext = escape_underscores(mdtext)
22312238

22322239
return mdtext
22332240

@@ -2245,6 +2252,7 @@ def export_to_text( # noqa: C901
22452252
to_element,
22462253
labels,
22472254
strict_text=True,
2255+
escaping_underscores=False,
22482256
image_placeholder="",
22492257
)
22502258

0 commit comments

Comments
 (0)