From b4e2f4409e9c6d949f0f347f6257fba6e346ec25 Mon Sep 17 00:00:00 2001 From: jarvis Date: Tue, 27 Feb 2024 22:15:53 +0530 Subject: [PATCH 01/13] Add .gitignore file --- .gitignore | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5429971 --- /dev/null +++ b/.gitignore @@ -0,0 +1,154 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dirhtml/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# Editor vomit +.idea/ +.vscode/ + +# User defined +_cache +_templates \ No newline at end of file From 0014f0086392107dc15dadf06eff607b81e4364c Mon Sep 17 00:00:00 2001 From: jarvis Date: Tue, 27 Feb 2024 22:44:35 +0530 Subject: [PATCH 02/13] Add the main lexer logic --- jsonschema_lexer/__init__.py | 4 +- jsonschema_lexer/lexer.py | 183 +++++++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+), 3 deletions(-) create mode 100644 jsonschema_lexer/lexer.py diff --git a/jsonschema_lexer/__init__.py b/jsonschema_lexer/__init__.py index a1ce084..ac79155 100644 --- a/jsonschema_lexer/__init__.py +++ b/jsonschema_lexer/__init__.py @@ -1,3 +1 @@ -""" -Fill me in! -""" +from .lexer import JSONSchemaLexer # type: ignore diff --git a/jsonschema_lexer/lexer.py b/jsonschema_lexer/lexer.py new file mode 100644 index 0000000..5c2ef1b --- /dev/null +++ b/jsonschema_lexer/lexer.py @@ -0,0 +1,183 @@ +from pygments.lexer import include, RegexLexer +from pygments.token import Token + +def _get_regex_from_options(options: list[str]) -> str: + """ + Constructs a regular expression pattern allowing any string from the options list. + + Args: + options (list[str]): List of options to be included in the regex pattern. + + Returns: + str: Regular expression pattern constructed from the options. + """ + options = ['"' + option + '"' for option in options] + regex_str = "(" + "|".join(options) + ")" + return regex_str + +class JSONSchemaLexer(RegexLexer): + """ + Lexer for JSON Schema syntax highlighting. + """ + name = "JSON Schema Lexer" + + data_types = ["object", "integer", "string", "number", "array", "boolean", "null"] + core_keywords = [ + r"\$schema", + r"\$id", + r"\$ref", + r"\$defs", + r"\$comment", + r"\$dynamicAnchor", + r"\$dynamicRef", + r"\$anchor", + r"\$vocabulary", + ] + applicator_keywords = [ + "oneOf", + "allOf", + "anyOf", + "if", + "then", + "else", + "not", + "properties", + "patternProperties", + "additionalProperties", + "dependentSchemas", + "propertyNames", + "prefixNames", + "contains", + "items", + ] + meta_data_keywords = [ + "title", + "description", + "default", + "deprecated", + "examples", + "readOnly", + "writeOnly", + ] + validation_keywords = [ + "type", + "enum", + "const", + "minLength", + "maxLength", + "pattern", + "maximum", + "exclusiveMinimum", + "multipleOf", + "exclusiveMaximum", + "minimum", + "dependentRequired", + "minProperties", + "maxProperties", + "required", + "minItems", + "maxItems", + "minContains", + "maxContains", + "uniqueItems", + ] + other_keywords = ["format", "unevaluated", "content", "format_assertion"] + + tokens = { + "whitespace": [ + (r"\s+", Token.Whitespace), + ], + "data_types": [ + # Used Literal type here to differentiate the highlighted color of data types from other keywords + (_get_regex_from_options(data_types), Token.Literal), + ], + "core_keywords": [ + ( + _get_regex_from_options(core_keywords), + Token.Keyword.Reserved, + "objectattribute", + ), + ], + "applicator_keywords": [ + ( + _get_regex_from_options(applicator_keywords), + Token.Keyword.Reserved, + "objectattribute", + ), + ], + "validation_keywords": [ + ( + _get_regex_from_options(validation_keywords), + Token.Keyword.Reserved, + "objectattribute", + ), + ], + "meta_data_keywords": [ + ( + _get_regex_from_options(meta_data_keywords), + Token.Keyword.Reserved, + "objectattribute", + ), + ], + "other_keywords": [ + ( + _get_regex_from_options(other_keywords), + Token.Keyword.Reserved, + "objectattribute", + ), + ], + "keywords": [ + include("core_keywords"), + include("applicator_keywords"), + include("validation_keywords"), + include("meta_data_keywords"), + include("other_keywords"), + ], + + # represents a simple terminal value + "simplevalue": [ + include("data_types"), + (r"(true|false)", Token.Number), + (r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", Token.Number.Integer), + ('"(\\|"|[^"])*"', Token.String.Double), + ], + + # the right hand side of an object, after the attribute name + "objectattribute": [ + include("value"), + (r":", Token.Punctuation), + # comma terminates the attribute but expects more + (r",", Token.Punctuation, "#pop"), + # a closing bracket terminates the entire object, so pop twice + (r"}", Token.Punctuation, ("#pop", "#pop")), + ], + + # a json object - { attr, attr, ... } + "objectvalue": [ + include("whitespace"), + include("keywords"), + (r'"(\\\\|\\"|[^"])*"', Token.Name.Tag, "objectattribute"), + (r"}", Token.Punctuation, "#pop"), + ], + + # json array - [ value, value, ... } + "arrayvalue": [ + include("whitespace"), + include("value"), + (r",", Token.Punctuation), + (r"]", Token.Punctuation, "#pop"), + ], + + # a json value - either a simple value or a complex value (object or array) + "value": [ + include("whitespace"), + include("simplevalue"), + (r"{", Token.Punctuation, "objectvalue"), + (r"\[", Token.Punctuation, "arrayvalue"), + ], + + # the root of a json document whould be a value + "root": [ + include("value"), + ], + } From daafe6f83ec6c0e95ffbc507b7758bffe477104a Mon Sep 17 00:00:00 2001 From: jarvis Date: Tue, 27 Feb 2024 22:48:17 +0530 Subject: [PATCH 03/13] Add dependencies --- pyproject.toml | 4 +++- requirements.txt | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 requirements.txt diff --git a/pyproject.toml b/pyproject.toml index cd4b17e..8e23c78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,9 @@ classifiers = [ ] dynamic = ["version"] - +dependencies = [ + "Pygments==2.17.2" +] [project.urls] Issues = "https://github.com/python-jsonschema/jsonschema-lexer/issues/" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8dd3e04 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --strip-extras pyproject.toml +# +pygments==2.17.2 + # via jsonschema_lexer (pyproject.toml) From e9c4a4f55f81d52d7ed440c47e7b7adf2fec77bb Mon Sep 17 00:00:00 2001 From: jarvis Date: Tue, 27 Feb 2024 23:07:20 +0530 Subject: [PATCH 04/13] Add basic readme to use the lexer --- README.rst | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/README.rst b/README.rst index 16d40b9..74e1c47 100644 --- a/README.rst +++ b/README.rst @@ -15,3 +15,46 @@ .. |CI| image:: https://github.com/python-jsonschema/jsonschema-lexer/workflows/CI/badge.svg :alt: Build status :target: https://github.com/python-jsonschema/jsonschema-lexer/actions?query=workflow%3ACI + + +Introduction +------------ + +`jsonschema-lexer` is a Python package that provides a JSON Schema lexer for syntax highlighting JSON Schema documents. It utilizes Pygments, a syntax highlighting library, to tokenize JSON Schema documents according to the JSON Schema specification. + +Usage +----- + +Once installed, you can use it in your Python code to highlight JSON Schema documents. + +Here's a simple example: + +.. code-block:: python + + from jsonschema_lexer.lexer import JSONSchemaLexer + + from rich.syntax import Syntax + console = Console() + + code = """ + { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/product.schema.json", + "title": "Product", + "description": "A product from Acme's catalog", + "type": "object", + "properties": { + "productId": { + "description": "The unique identifier for a product", + "type": "integer" + }, + "productName": { + "description": "Name of the product", + "type": "string" + } + } + } + """ + + syntax = Syntax(code, lexer=JSONSchemaLexer(), background_color="default", word_wrap=True) + console.print(syntax) From 6392b0ed641433534a270c15048e04b3096851d6 Mon Sep 17 00:00:00 2001 From: jarvis Date: Tue, 27 Feb 2024 23:10:54 +0530 Subject: [PATCH 05/13] Add missing import in readme example --- README.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 74e1c47..0266f05 100644 --- a/README.rst +++ b/README.rst @@ -31,9 +31,12 @@ Here's a simple example: .. code-block:: python - from jsonschema_lexer.lexer import JSONSchemaLexer - + # Import the JSONSchemaLexer class from the package + from jsonschema_lexer import JSONSchemaLexer + + from rich.console import Console from rich.syntax import Syntax + console = Console() code = """ From eff20a15194f863cb3180e9d093801834dfd6919 Mon Sep 17 00:00:00 2001 From: jarvis Date: Tue, 27 Feb 2024 23:14:31 +0530 Subject: [PATCH 06/13] Readme sentences must be in separate lines --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 0266f05..d6db72f 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -==================== +======================= ``jsonschema-lexer`` ==================== @@ -16,11 +16,11 @@ :alt: Build status :target: https://github.com/python-jsonschema/jsonschema-lexer/actions?query=workflow%3ACI - Introduction ------------ -`jsonschema-lexer` is a Python package that provides a JSON Schema lexer for syntax highlighting JSON Schema documents. It utilizes Pygments, a syntax highlighting library, to tokenize JSON Schema documents according to the JSON Schema specification. +``jsonschema-lexer`` is a Python package that provides a JSON Schema lexer for syntax highlighting JSON Schema documents. +It utilizes Pygments, a syntax highlighting library, to tokenize JSON Schema documents according to the JSON Schema specification. Usage ----- From eb58d63cc3f06a14107bafd6a85a4b10e0a5ae67 Mon Sep 17 00:00:00 2001 From: jarvis Date: Tue, 27 Feb 2024 23:22:32 +0530 Subject: [PATCH 07/13] Verified and fixed any typos in keywords --- jsonschema_lexer/lexer.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/jsonschema_lexer/lexer.py b/jsonschema_lexer/lexer.py index 5c2ef1b..b038179 100644 --- a/jsonschema_lexer/lexer.py +++ b/jsonschema_lexer/lexer.py @@ -1,6 +1,7 @@ from pygments.lexer import include, RegexLexer from pygments.token import Token + def _get_regex_from_options(options: list[str]) -> str: """ Constructs a regular expression pattern allowing any string from the options list. @@ -15,12 +16,14 @@ def _get_regex_from_options(options: list[str]) -> str: regex_str = "(" + "|".join(options) + ")" return regex_str + class JSONSchemaLexer(RegexLexer): """ Lexer for JSON Schema syntax highlighting. """ + name = "JSON Schema Lexer" - + data_types = ["object", "integer", "string", "number", "array", "boolean", "null"] core_keywords = [ r"\$schema", @@ -46,7 +49,7 @@ class JSONSchemaLexer(RegexLexer): "additionalProperties", "dependentSchemas", "propertyNames", - "prefixNames", + "prefixItems", "contains", "items", ] @@ -81,7 +84,15 @@ class JSONSchemaLexer(RegexLexer): "maxContains", "uniqueItems", ] - other_keywords = ["format", "unevaluated", "content", "format_assertion"] + other_keywords = [ + "format", + "unevaluatedItems", + "unevaluatedProperties", + "contentEncoding", + "contentMediaType", + "contentSchema", + "format_assertion", + ] tokens = { "whitespace": [ @@ -133,7 +144,6 @@ class JSONSchemaLexer(RegexLexer): include("meta_data_keywords"), include("other_keywords"), ], - # represents a simple terminal value "simplevalue": [ include("data_types"), @@ -141,7 +151,6 @@ class JSONSchemaLexer(RegexLexer): (r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", Token.Number.Integer), ('"(\\|"|[^"])*"', Token.String.Double), ], - # the right hand side of an object, after the attribute name "objectattribute": [ include("value"), @@ -151,7 +160,6 @@ class JSONSchemaLexer(RegexLexer): # a closing bracket terminates the entire object, so pop twice (r"}", Token.Punctuation, ("#pop", "#pop")), ], - # a json object - { attr, attr, ... } "objectvalue": [ include("whitespace"), @@ -159,7 +167,6 @@ class JSONSchemaLexer(RegexLexer): (r'"(\\\\|\\"|[^"])*"', Token.Name.Tag, "objectattribute"), (r"}", Token.Punctuation, "#pop"), ], - # json array - [ value, value, ... } "arrayvalue": [ include("whitespace"), @@ -167,7 +174,6 @@ class JSONSchemaLexer(RegexLexer): (r",", Token.Punctuation), (r"]", Token.Punctuation, "#pop"), ], - # a json value - either a simple value or a complex value (object or array) "value": [ include("whitespace"), @@ -175,7 +181,6 @@ class JSONSchemaLexer(RegexLexer): (r"{", Token.Punctuation, "objectvalue"), (r"\[", Token.Punctuation, "arrayvalue"), ], - # the root of a json document whould be a value "root": [ include("value"), From e1711f2eeb5f1ac83dbb60cbc498aecf7ab538c7 Mon Sep 17 00:00:00 2001 From: jarvis Date: Tue, 27 Feb 2024 23:23:56 +0530 Subject: [PATCH 08/13] Add latest dialect name in readme --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index d6db72f..a0f81de 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ ======================= ``jsonschema-lexer`` -==================== +======================= |PyPI| |Pythons| |CI| @@ -19,7 +19,7 @@ Introduction ------------ -``jsonschema-lexer`` is a Python package that provides a JSON Schema lexer for syntax highlighting JSON Schema documents. +`jsonschema-lexer` is a Python package that provides a JSON Schema lexer for syntax highlighting JSON Schema documents based on the `2020-12 dialect`. It utilizes Pygments, a syntax highlighting library, to tokenize JSON Schema documents according to the JSON Schema specification. Usage From dd117afb14c81ca74c05a2cbf9fcf3e43612c402 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Feb 2024 17:58:47 +0000 Subject: [PATCH 09/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .gitignore | 2 +- README.rst | 6 +++--- jsonschema_lexer/__init__.py | 1 - jsonschema_lexer/lexer.py | 18 +++++++++++++++--- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 5429971..3b30a0b 100644 --- a/.gitignore +++ b/.gitignore @@ -151,4 +151,4 @@ cython_debug/ # User defined _cache -_templates \ No newline at end of file +_templates diff --git a/README.rst b/README.rst index a0f81de..5b85ede 100644 --- a/README.rst +++ b/README.rst @@ -25,7 +25,7 @@ It utilizes Pygments, a syntax highlighting library, to tokenize JSON Schema doc Usage ----- -Once installed, you can use it in your Python code to highlight JSON Schema documents. +Once installed, you can use it in your Python code to highlight JSON Schema documents. Here's a simple example: @@ -33,10 +33,10 @@ Here's a simple example: # Import the JSONSchemaLexer class from the package from jsonschema_lexer import JSONSchemaLexer - + from rich.console import Console from rich.syntax import Syntax - + console = Console() code = """ diff --git a/jsonschema_lexer/__init__.py b/jsonschema_lexer/__init__.py index ac79155..e69de29 100644 --- a/jsonschema_lexer/__init__.py +++ b/jsonschema_lexer/__init__.py @@ -1 +0,0 @@ -from .lexer import JSONSchemaLexer # type: ignore diff --git a/jsonschema_lexer/lexer.py b/jsonschema_lexer/lexer.py index b038179..15c45c1 100644 --- a/jsonschema_lexer/lexer.py +++ b/jsonschema_lexer/lexer.py @@ -1,4 +1,4 @@ -from pygments.lexer import include, RegexLexer +from pygments.lexer import RegexLexer, include from pygments.token import Token @@ -11,6 +11,7 @@ def _get_regex_from_options(options: list[str]) -> str: Returns: str: Regular expression pattern constructed from the options. + """ options = ['"' + option + '"' for option in options] regex_str = "(" + "|".join(options) + ")" @@ -24,7 +25,15 @@ class JSONSchemaLexer(RegexLexer): name = "JSON Schema Lexer" - data_types = ["object", "integer", "string", "number", "array", "boolean", "null"] + data_types = [ + "object", + "integer", + "string", + "number", + "array", + "boolean", + "null", + ] core_keywords = [ r"\$schema", r"\$id", @@ -148,7 +157,10 @@ class JSONSchemaLexer(RegexLexer): "simplevalue": [ include("data_types"), (r"(true|false)", Token.Number), - (r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", Token.Number.Integer), + ( + r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", + Token.Number.Integer, + ), ('"(\\|"|[^"])*"', Token.String.Double), ], # the right hand side of an object, after the attribute name From 33d3834ad2c831af53c1e8c47afdd4ae4ebc181c Mon Sep 17 00:00:00 2001 From: jarvis Date: Tue, 27 Feb 2024 23:46:24 +0530 Subject: [PATCH 10/13] Fix failing errors --- README.rst | 2 +- jsonschema_lexer/__init__.py | 3 +++ jsonschema_lexer/lexer.py | 34 +++++++++++++++++++++------------- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index 5b85ede..bb909ec 100644 --- a/README.rst +++ b/README.rst @@ -32,7 +32,7 @@ Here's a simple example: .. code-block:: python # Import the JSONSchemaLexer class from the package - from jsonschema_lexer import JSONSchemaLexer + from jsonschema_lexer.lexer import JSONSchemaLexer from rich.console import Console from rich.syntax import Syntax diff --git a/jsonschema_lexer/__init__.py b/jsonschema_lexer/__init__.py index e69de29..70b3836 100644 --- a/jsonschema_lexer/__init__.py +++ b/jsonschema_lexer/__init__.py @@ -0,0 +1,3 @@ +""" +Provides the JSONSchema Lexer. +""" diff --git a/jsonschema_lexer/lexer.py b/jsonschema_lexer/lexer.py index 15c45c1..1064925 100644 --- a/jsonschema_lexer/lexer.py +++ b/jsonschema_lexer/lexer.py @@ -1,21 +1,27 @@ +""" +Contains the main functionality of the JSONSchemaLexer. +""" + +from typing import Any, ClassVar + from pygments.lexer import RegexLexer, include from pygments.token import Token def _get_regex_from_options(options: list[str]) -> str: """ - Constructs a regular expression pattern allowing any string from the options list. + Constructs regex allowing any string from the options list. Args: - options (list[str]): List of options to be included in the regex pattern. + options (list[str]): List of options to be included + in the regex pattern. Returns: str: Regular expression pattern constructed from the options. """ options = ['"' + option + '"' for option in options] - regex_str = "(" + "|".join(options) + ")" - return regex_str + return "(" + "|".join(options) + ")" class JSONSchemaLexer(RegexLexer): @@ -25,7 +31,7 @@ class JSONSchemaLexer(RegexLexer): name = "JSON Schema Lexer" - data_types = [ + data_types: ClassVar[list[str]] = [ "object", "integer", "string", @@ -34,7 +40,7 @@ class JSONSchemaLexer(RegexLexer): "boolean", "null", ] - core_keywords = [ + core_keywords: ClassVar[list[str]] = [ r"\$schema", r"\$id", r"\$ref", @@ -45,7 +51,7 @@ class JSONSchemaLexer(RegexLexer): r"\$anchor", r"\$vocabulary", ] - applicator_keywords = [ + applicator_keywords: ClassVar[list[str]] = [ "oneOf", "allOf", "anyOf", @@ -62,7 +68,7 @@ class JSONSchemaLexer(RegexLexer): "contains", "items", ] - meta_data_keywords = [ + meta_data_keywords: ClassVar[list[str]] = [ "title", "description", "default", @@ -71,7 +77,7 @@ class JSONSchemaLexer(RegexLexer): "readOnly", "writeOnly", ] - validation_keywords = [ + validation_keywords: ClassVar[list[str]] = [ "type", "enum", "const", @@ -93,7 +99,7 @@ class JSONSchemaLexer(RegexLexer): "maxContains", "uniqueItems", ] - other_keywords = [ + other_keywords: ClassVar[list[str]] = [ "format", "unevaluatedItems", "unevaluatedProperties", @@ -103,12 +109,13 @@ class JSONSchemaLexer(RegexLexer): "format_assertion", ] - tokens = { + tokens: ClassVar[dict[str, list[Any]]] = { "whitespace": [ (r"\s+", Token.Whitespace), ], "data_types": [ - # Used Literal type here to differentiate the highlighted color of data types from other keywords + # Used Literal type here to differentiate the highlighted + # color of data types from other keywords (_get_regex_from_options(data_types), Token.Literal), ], "core_keywords": [ @@ -186,7 +193,8 @@ class JSONSchemaLexer(RegexLexer): (r",", Token.Punctuation), (r"]", Token.Punctuation, "#pop"), ], - # a json value - either a simple value or a complex value (object or array) + # a json value - either a simple value or a + # complex value (object or array) "value": [ include("whitespace"), include("simplevalue"), From 51d150332c895e8d3e00d1bf60b1ae5621f0d845 Mon Sep 17 00:00:00 2001 From: jarvis Date: Wed, 28 Feb 2024 09:35:35 +0530 Subject: [PATCH 11/13] Use lexer on top of preexisting JSON lexer --- jsonschema_lexer/lexer.py | 170 ++++++++++---------------------------- 1 file changed, 44 insertions(+), 126 deletions(-) diff --git a/jsonschema_lexer/lexer.py b/jsonschema_lexer/lexer.py index 1064925..fa8bb0e 100644 --- a/jsonschema_lexer/lexer.py +++ b/jsonschema_lexer/lexer.py @@ -2,31 +2,17 @@ Contains the main functionality of the JSONSchemaLexer. """ -from typing import Any, ClassVar +from typing import ClassVar -from pygments.lexer import RegexLexer, include +from pygments.lexers.data import ( + JsonLexer, # type: ignore[reportMissingTypeStubs] +) from pygments.token import Token -def _get_regex_from_options(options: list[str]) -> str: +class JSONSchemaLexer(JsonLexer): """ - Constructs regex allowing any string from the options list. - - Args: - options (list[str]): List of options to be included - in the regex pattern. - - Returns: - str: Regular expression pattern constructed from the options. - - """ - options = ['"' + option + '"' for option in options] - return "(" + "|".join(options) + ")" - - -class JSONSchemaLexer(RegexLexer): - """ - Lexer for JSON Schema syntax highlighting. + For JSONSchema. """ name = "JSON Schema Lexer" @@ -41,15 +27,15 @@ class JSONSchemaLexer(RegexLexer): "null", ] core_keywords: ClassVar[list[str]] = [ - r"\$schema", - r"\$id", - r"\$ref", - r"\$defs", - r"\$comment", - r"\$dynamicAnchor", - r"\$dynamicRef", - r"\$anchor", - r"\$vocabulary", + "$schema", + "$id", + "$ref", + "$defs", + "$comment", + "$dynamicAnchor", + "$dynamicRef", + "$anchor", + "$vocabulary", ] applicator_keywords: ClassVar[list[str]] = [ "oneOf", @@ -109,100 +95,32 @@ class JSONSchemaLexer(RegexLexer): "format_assertion", ] - tokens: ClassVar[dict[str, list[Any]]] = { - "whitespace": [ - (r"\s+", Token.Whitespace), - ], - "data_types": [ - # Used Literal type here to differentiate the highlighted - # color of data types from other keywords - (_get_regex_from_options(data_types), Token.Literal), - ], - "core_keywords": [ - ( - _get_regex_from_options(core_keywords), - Token.Keyword.Reserved, - "objectattribute", - ), - ], - "applicator_keywords": [ - ( - _get_regex_from_options(applicator_keywords), - Token.Keyword.Reserved, - "objectattribute", - ), - ], - "validation_keywords": [ - ( - _get_regex_from_options(validation_keywords), - Token.Keyword.Reserved, - "objectattribute", - ), - ], - "meta_data_keywords": [ - ( - _get_regex_from_options(meta_data_keywords), - Token.Keyword.Reserved, - "objectattribute", - ), - ], - "other_keywords": [ - ( - _get_regex_from_options(other_keywords), - Token.Keyword.Reserved, - "objectattribute", - ), - ], - "keywords": [ - include("core_keywords"), - include("applicator_keywords"), - include("validation_keywords"), - include("meta_data_keywords"), - include("other_keywords"), - ], - # represents a simple terminal value - "simplevalue": [ - include("data_types"), - (r"(true|false)", Token.Number), - ( - r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", - Token.Number.Integer, - ), - ('"(\\|"|[^"])*"', Token.String.Double), - ], - # the right hand side of an object, after the attribute name - "objectattribute": [ - include("value"), - (r":", Token.Punctuation), - # comma terminates the attribute but expects more - (r",", Token.Punctuation, "#pop"), - # a closing bracket terminates the entire object, so pop twice - (r"}", Token.Punctuation, ("#pop", "#pop")), - ], - # a json object - { attr, attr, ... } - "objectvalue": [ - include("whitespace"), - include("keywords"), - (r'"(\\\\|\\"|[^"])*"', Token.Name.Tag, "objectattribute"), - (r"}", Token.Punctuation, "#pop"), - ], - # json array - [ value, value, ... } - "arrayvalue": [ - include("whitespace"), - include("value"), - (r",", Token.Punctuation), - (r"]", Token.Punctuation, "#pop"), - ], - # a json value - either a simple value or a - # complex value (object or array) - "value": [ - include("whitespace"), - include("simplevalue"), - (r"{", Token.Punctuation, "objectvalue"), - (r"\[", Token.Punctuation, "arrayvalue"), - ], - # the root of a json document whould be a value - "root": [ - include("value"), - ], - } + parsed_keywords: ClassVar[list[str]] = [ + '"%s"' % keyword + for keyword in ( + core_keywords + + applicator_keywords + + meta_data_keywords + + validation_keywords + + other_keywords + ) + ] + + parsed_data_types: ClassVar[list[str]] = [ + '"%s"' % data_type for data_type in data_types + ] + + def get_tokens_unprocessed(self, text: str): # type: ignore[reportUnknownParameterType] + """ + Add token classes to it according to JSON Schema. + """ + for start, token, value in super().get_tokens_unprocessed(text): # type: ignore[reportUnknownVariableType] + if token is Token.Name.Tag and value in self.parsed_keywords: + yield start, Token.Keyword, value + elif ( + token is Token.String.Double + and value in self.parsed_data_types + ): + yield start, Token.Name.Decorator, value + else: + yield start, token, value From 32132b85249b967d16129f864717cd74da504b0a Mon Sep 17 00:00:00 2001 From: jarvis Date: Wed, 28 Feb 2024 09:54:56 +0530 Subject: [PATCH 12/13] Fix placement of #ignore stubs line --- jsonschema_lexer/lexer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jsonschema_lexer/lexer.py b/jsonschema_lexer/lexer.py index fa8bb0e..ccd62ca 100644 --- a/jsonschema_lexer/lexer.py +++ b/jsonschema_lexer/lexer.py @@ -4,8 +4,8 @@ from typing import ClassVar -from pygments.lexers.data import ( - JsonLexer, # type: ignore[reportMissingTypeStubs] +from pygments.lexers.data import ( # type: ignore[reportMissingTypeStubs] + JsonLexer, ) from pygments.token import Token From 1c300bebe92f04f8e2e490043d1596b39a84eb0d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 28 Feb 2024 04:25:10 +0000 Subject: [PATCH 13/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jsonschema_lexer/lexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonschema_lexer/lexer.py b/jsonschema_lexer/lexer.py index ccd62ca..f8adf5b 100644 --- a/jsonschema_lexer/lexer.py +++ b/jsonschema_lexer/lexer.py @@ -4,7 +4,7 @@ from typing import ClassVar -from pygments.lexers.data import ( # type: ignore[reportMissingTypeStubs] +from pygments.lexers.data import ( # type: ignore[reportMissingTypeStubs] JsonLexer, ) from pygments.token import Token