From f03ff8e566c4aad38fd03e6b0efc983de52ee3fc Mon Sep 17 00:00:00 2001 From: "v.fateev" Date: Mon, 27 Jan 2025 13:48:02 +0300 Subject: [PATCH 1/4] Added functionality to transliterate any language to valid stable html identifiers. --- myst_parser/config/main.py | 14 ++++++++++++++ myst_parser/mdit_to_docutils/base.py | 6 +++++- myst_parser/parsers/docutils_.py | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py index 2b088b56..fe8eab47 100644 --- a/myst_parser/config/main.py +++ b/myst_parser/config/main.py @@ -307,6 +307,20 @@ def __repr__(self) -> str: }, ) + fully_normalize_name_slug_func: Callable[[str], str] | None = dc.field( + default=None, + metadata={ + "validator": check_heading_slug_func, + "help": ( + "Return a case- and whitespace-normalized name." + "or a python import path e.g. `my_package.my_module.my_function`" + "It can be used to transliterate any language to valid stable html identifiers" + ), + "global_only": True, + "doc_type": "None | Callable[[str], str] | str", + }, + ) + html_meta: dict[str, str] = dc.field( default_factory=dict, metadata={ diff --git a/myst_parser/mdit_to_docutils/base.py b/myst_parser/mdit_to_docutils/base.py index 3778dd31..d82c068b 100644 --- a/myst_parser/mdit_to_docutils/base.py +++ b/myst_parser/mdit_to_docutils/base.py @@ -772,7 +772,11 @@ def generate_heading_target( # during ref resolution, and is not stored in the document. # TODO this is purely to mimic docutils, but maybe we don't need it? # (since we have the slugify logic below) - name = nodes.fully_normalize_name(implicit_text) + + if self.md_config.fully_normalize_name_slug_func is not None: + name = self.md_config.fully_normalize_name_slug_func(implicit_text) + else: + name = nodes.fully_normalize_name(implicit_text) node["names"].append(name) self.document.note_implicit_target(node, node) diff --git a/myst_parser/parsers/docutils_.py b/myst_parser/parsers/docutils_.py index e17de44b..db8a270c 100644 --- a/myst_parser/parsers/docutils_.py +++ b/myst_parser/parsers/docutils_.py @@ -139,7 +139,7 @@ def _attr_to_optparse_option(at: Field, default: Any) -> tuple[dict[str, Any], s "metavar": "", "validator": frontend.validate_boolean, }, str(default) - if at.type is str or at.name == "heading_slug_func": + if at.type is str or at.name in ("heading_slug_func", "fully_normalize_name_slug_func"): return { "metavar": "", }, f"(default: '{default}')" From 741e29dbfc8996a4ee07cee2278c277ab5c0799b Mon Sep 17 00:00:00 2001 From: "v.fateev" Date: Tue, 28 Jan 2025 09:53:21 +0300 Subject: [PATCH 2/4] The code has been refactored. --- .gitignore | 2 ++ .pre-commit-config.yaml | 4 ++-- myst_parser/config/main.py | 5 ++--- myst_parser/parsers/docutils_.py | 11 +++++++---- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 48cf54de..de216731 100644 --- a/.gitignore +++ b/.gitignore @@ -133,5 +133,7 @@ _archive/ .vscode/ .DS_Store +.idea/ + docs/apidocs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0504ee84..9e1acd20 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,14 +21,14 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.4 + rev: v0.9.3 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.14.0 + rev: v1.14.1 hooks: - id: mypy args: [--config-file=pyproject.toml] diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py index fe8eab47..3ebcc1d9 100644 --- a/myst_parser/config/main.py +++ b/myst_parser/config/main.py @@ -312,7 +312,7 @@ def __repr__(self) -> str: metadata={ "validator": check_heading_slug_func, "help": ( - "Return a case- and whitespace-normalized name." + "Function for normalizing text to valid html id value" "or a python import path e.g. `my_package.my_module.my_function`" "It can be used to transliterate any language to valid stable html identifiers" ), @@ -536,8 +536,7 @@ def merge_file_level( if "html_meta" in topmatter: warning( MystWarnings.MD_TOPMATTER, - "top-level 'html_meta' key is deprecated, " - "place under 'myst' key instead", + "top-level 'html_meta' key is deprecated, place under 'myst' key instead", ) updates["html_meta"] = topmatter["html_meta"] if "substitutions" in topmatter: diff --git a/myst_parser/parsers/docutils_.py b/myst_parser/parsers/docutils_.py index db8a270c..ca427ae0 100644 --- a/myst_parser/parsers/docutils_.py +++ b/myst_parser/parsers/docutils_.py @@ -139,7 +139,10 @@ def _attr_to_optparse_option(at: Field, default: Any) -> tuple[dict[str, Any], s "metavar": "", "validator": frontend.validate_boolean, }, str(default) - if at.type is str or at.name in ("heading_slug_func", "fully_normalize_name_slug_func"): + if at.type is str or at.name in ( + "heading_slug_func", + "fully_normalize_name_slug_func", + ): return { "metavar": "", }, f"(default: '{default}')" @@ -278,7 +281,7 @@ def parse(self, inputstring: str, document: nodes.document) -> None: for i, line in enumerate(inputstring.split("\n")): if len(line) > document.settings.line_length_limit: error = document.reporter.error( - f"Line {i+1} exceeds the line-length-limit:" + f"Line {i + 1} exceeds the line-length-limit:" f" {document.settings.line_length_limit}." ) document.append(error) @@ -479,7 +482,7 @@ def visit_rubric_html(self, node): So here we override the visit/depart methods to output the correct element """ if "level" in node: - self.body.append(self.starttag(node, f'h{node["level"]}', "", CLASS="rubric")) + self.body.append(self.starttag(node, f"h{node['level']}", "", CLASS="rubric")) else: self.body.append(self.starttag(node, "p", "", CLASS="rubric")) @@ -490,7 +493,7 @@ def depart_rubric_html(self, node): See explanation in `visit_rubric_html` """ if "level" in node: - self.body.append(f'\n') + self.body.append(f"\n") else: self.body.append("

\n") From e6b4c06d6aaf95854c81ff0cb03240732c8f3f02 Mon Sep 17 00:00:00 2001 From: "v.fateev" Date: Tue, 28 Jan 2025 11:34:51 +0300 Subject: [PATCH 3/4] Revert "The code has been refactored." This reverts commit 741e29dbfc8996a4ee07cee2278c277ab5c0799b. --- .gitignore | 2 -- .pre-commit-config.yaml | 4 ++-- myst_parser/config/main.py | 5 +++-- myst_parser/parsers/docutils_.py | 11 ++++------- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index de216731..48cf54de 100644 --- a/.gitignore +++ b/.gitignore @@ -133,7 +133,5 @@ _archive/ .vscode/ .DS_Store -.idea/ - docs/apidocs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9e1acd20..0504ee84 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,14 +21,14 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.3 + rev: v0.8.4 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.14.1 + rev: v1.14.0 hooks: - id: mypy args: [--config-file=pyproject.toml] diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py index 3ebcc1d9..fe8eab47 100644 --- a/myst_parser/config/main.py +++ b/myst_parser/config/main.py @@ -312,7 +312,7 @@ def __repr__(self) -> str: metadata={ "validator": check_heading_slug_func, "help": ( - "Function for normalizing text to valid html id value" + "Return a case- and whitespace-normalized name." "or a python import path e.g. `my_package.my_module.my_function`" "It can be used to transliterate any language to valid stable html identifiers" ), @@ -536,7 +536,8 @@ def merge_file_level( if "html_meta" in topmatter: warning( MystWarnings.MD_TOPMATTER, - "top-level 'html_meta' key is deprecated, place under 'myst' key instead", + "top-level 'html_meta' key is deprecated, " + "place under 'myst' key instead", ) updates["html_meta"] = topmatter["html_meta"] if "substitutions" in topmatter: diff --git a/myst_parser/parsers/docutils_.py b/myst_parser/parsers/docutils_.py index ca427ae0..db8a270c 100644 --- a/myst_parser/parsers/docutils_.py +++ b/myst_parser/parsers/docutils_.py @@ -139,10 +139,7 @@ def _attr_to_optparse_option(at: Field, default: Any) -> tuple[dict[str, Any], s "metavar": "", "validator": frontend.validate_boolean, }, str(default) - if at.type is str or at.name in ( - "heading_slug_func", - "fully_normalize_name_slug_func", - ): + if at.type is str or at.name in ("heading_slug_func", "fully_normalize_name_slug_func"): return { "metavar": "", }, f"(default: '{default}')" @@ -281,7 +278,7 @@ def parse(self, inputstring: str, document: nodes.document) -> None: for i, line in enumerate(inputstring.split("\n")): if len(line) > document.settings.line_length_limit: error = document.reporter.error( - f"Line {i + 1} exceeds the line-length-limit:" + f"Line {i+1} exceeds the line-length-limit:" f" {document.settings.line_length_limit}." ) document.append(error) @@ -482,7 +479,7 @@ def visit_rubric_html(self, node): So here we override the visit/depart methods to output the correct element """ if "level" in node: - self.body.append(self.starttag(node, f"h{node['level']}", "", CLASS="rubric")) + self.body.append(self.starttag(node, f'h{node["level"]}', "", CLASS="rubric")) else: self.body.append(self.starttag(node, "p", "", CLASS="rubric")) @@ -493,7 +490,7 @@ def depart_rubric_html(self, node): See explanation in `visit_rubric_html` """ if "level" in node: - self.body.append(f"\n") + self.body.append(f'\n') else: self.body.append("

\n") From 19fb92c33e62d205f03467068c3281494beb2ee3 Mon Sep 17 00:00:00 2001 From: "v.fateev" Date: Tue, 28 Jan 2025 11:38:41 +0300 Subject: [PATCH 4/4] Changed the description of the argument fully_normalize_name_slug_func. --- myst_parser/config/main.py | 4 ++-- myst_parser/parsers/docutils_.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py index fe8eab47..c1e65588 100644 --- a/myst_parser/config/main.py +++ b/myst_parser/config/main.py @@ -312,8 +312,8 @@ def __repr__(self) -> str: metadata={ "validator": check_heading_slug_func, "help": ( - "Return a case- and whitespace-normalized name." - "or a python import path e.g. `my_package.my_module.my_function`" + "Function for normalizing text to valid html id value, " + "or a python import path e.g. `my_package.my_module.my_function` " "It can be used to transliterate any language to valid stable html identifiers" ), "global_only": True, diff --git a/myst_parser/parsers/docutils_.py b/myst_parser/parsers/docutils_.py index db8a270c..73acf08c 100644 --- a/myst_parser/parsers/docutils_.py +++ b/myst_parser/parsers/docutils_.py @@ -139,7 +139,10 @@ def _attr_to_optparse_option(at: Field, default: Any) -> tuple[dict[str, Any], s "metavar": "", "validator": frontend.validate_boolean, }, str(default) - if at.type is str or at.name in ("heading_slug_func", "fully_normalize_name_slug_func"): + if at.type is str or at.name in ( + "heading_slug_func", + "fully_normalize_name_slug_func", + ): return { "metavar": "", }, f"(default: '{default}')"