Skip to content

feat: add escaping_underscores option to markdown export #135

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -1994,6 +1994,7 @@ def save_as_markdown(
to_element: int = sys.maxsize,
labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
strict_text: bool = False,
escaping_underscores: bool = True,
image_placeholder: str = "<!-- image -->",
image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
indent: int = 4,
Expand All @@ -2016,6 +2017,7 @@ def save_as_markdown(
to_element=to_element,
labels=labels,
strict_text=strict_text,
escaping_underscores=escaping_underscores,
image_placeholder=image_placeholder,
image_mode=image_mode,
indent=indent,
Expand All @@ -2033,6 +2035,7 @@ def export_to_markdown( # noqa: C901
to_element: int = sys.maxsize,
labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
strict_text: bool = False,
escaping_underscores: bool = True,
image_placeholder: str = "<!-- image -->",
image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
indent: int = 4,
Expand All @@ -2058,6 +2061,9 @@ def export_to_markdown( # noqa: C901
:param strict_text: bool: Whether to only include the text content
of the document. (Default value = False).
:type strict_text: bool = False
:param escaping_underscores: bool: Whether to escape underscores in the
text content of the document. (Default value = True).
:type escaping_underscores: bool = True
:param image_placeholder: The placeholder to include to position
images in the markdown. (Default value = "\<!-- image --\>").
:type image_placeholder: str = "<!-- image -->"
Expand Down Expand Up @@ -2226,7 +2232,8 @@ def escape_underscores(text):

return "".join(parts)

mdtext = escape_underscores(mdtext)
if escaping_underscores:
mdtext = escape_underscores(mdtext)

return mdtext

Expand All @@ -2244,6 +2251,7 @@ def export_to_text( # noqa: C901
to_element,
labels,
strict_text=True,
escaping_underscores=False,
image_placeholder="",
)

Expand Down