Skip to content

Commit 51d1503

Browse files
committed
Use lexer on top of preexisting JSON lexer
1 parent 33d3834 commit 51d1503

File tree

1 file changed

+44
-126
lines changed

1 file changed

+44
-126
lines changed

jsonschema_lexer/lexer.py

+44-126
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,17 @@
22
Contains the main functionality of the JSONSchemaLexer.
33
"""
44

5-
from typing import Any, ClassVar
5+
from typing import ClassVar
66

7-
from pygments.lexer import RegexLexer, include
7+
from pygments.lexers.data import (
8+
JsonLexer, # type: ignore[reportMissingTypeStubs]
9+
)
810
from pygments.token import Token
911

1012

11-
def _get_regex_from_options(options: list[str]) -> str:
13+
class JSONSchemaLexer(JsonLexer):
1214
"""
13-
Constructs regex allowing any string from the options list.
14-
15-
Args:
16-
options (list[str]): List of options to be included
17-
in the regex pattern.
18-
19-
Returns:
20-
str: Regular expression pattern constructed from the options.
21-
22-
"""
23-
options = ['"' + option + '"' for option in options]
24-
return "(" + "|".join(options) + ")"
25-
26-
27-
class JSONSchemaLexer(RegexLexer):
28-
"""
29-
Lexer for JSON Schema syntax highlighting.
15+
For JSONSchema.
3016
"""
3117

3218
name = "JSON Schema Lexer"
@@ -41,15 +27,15 @@ class JSONSchemaLexer(RegexLexer):
4127
"null",
4228
]
4329
core_keywords: ClassVar[list[str]] = [
44-
r"\$schema",
45-
r"\$id",
46-
r"\$ref",
47-
r"\$defs",
48-
r"\$comment",
49-
r"\$dynamicAnchor",
50-
r"\$dynamicRef",
51-
r"\$anchor",
52-
r"\$vocabulary",
30+
"$schema",
31+
"$id",
32+
"$ref",
33+
"$defs",
34+
"$comment",
35+
"$dynamicAnchor",
36+
"$dynamicRef",
37+
"$anchor",
38+
"$vocabulary",
5339
]
5440
applicator_keywords: ClassVar[list[str]] = [
5541
"oneOf",
@@ -109,100 +95,32 @@ class JSONSchemaLexer(RegexLexer):
10995
"format_assertion",
11096
]
11197

112-
tokens: ClassVar[dict[str, list[Any]]] = {
113-
"whitespace": [
114-
(r"\s+", Token.Whitespace),
115-
],
116-
"data_types": [
117-
# Used Literal type here to differentiate the highlighted
118-
# color of data types from other keywords
119-
(_get_regex_from_options(data_types), Token.Literal),
120-
],
121-
"core_keywords": [
122-
(
123-
_get_regex_from_options(core_keywords),
124-
Token.Keyword.Reserved,
125-
"objectattribute",
126-
),
127-
],
128-
"applicator_keywords": [
129-
(
130-
_get_regex_from_options(applicator_keywords),
131-
Token.Keyword.Reserved,
132-
"objectattribute",
133-
),
134-
],
135-
"validation_keywords": [
136-
(
137-
_get_regex_from_options(validation_keywords),
138-
Token.Keyword.Reserved,
139-
"objectattribute",
140-
),
141-
],
142-
"meta_data_keywords": [
143-
(
144-
_get_regex_from_options(meta_data_keywords),
145-
Token.Keyword.Reserved,
146-
"objectattribute",
147-
),
148-
],
149-
"other_keywords": [
150-
(
151-
_get_regex_from_options(other_keywords),
152-
Token.Keyword.Reserved,
153-
"objectattribute",
154-
),
155-
],
156-
"keywords": [
157-
include("core_keywords"),
158-
include("applicator_keywords"),
159-
include("validation_keywords"),
160-
include("meta_data_keywords"),
161-
include("other_keywords"),
162-
],
163-
# represents a simple terminal value
164-
"simplevalue": [
165-
include("data_types"),
166-
(r"(true|false)", Token.Number),
167-
(
168-
r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?",
169-
Token.Number.Integer,
170-
),
171-
('"(\\|"|[^"])*"', Token.String.Double),
172-
],
173-
# the right hand side of an object, after the attribute name
174-
"objectattribute": [
175-
include("value"),
176-
(r":", Token.Punctuation),
177-
# comma terminates the attribute but expects more
178-
(r",", Token.Punctuation, "#pop"),
179-
# a closing bracket terminates the entire object, so pop twice
180-
(r"}", Token.Punctuation, ("#pop", "#pop")),
181-
],
182-
# a json object - { attr, attr, ... }
183-
"objectvalue": [
184-
include("whitespace"),
185-
include("keywords"),
186-
(r'"(\\\\|\\"|[^"])*"', Token.Name.Tag, "objectattribute"),
187-
(r"}", Token.Punctuation, "#pop"),
188-
],
189-
# json array - [ value, value, ... }
190-
"arrayvalue": [
191-
include("whitespace"),
192-
include("value"),
193-
(r",", Token.Punctuation),
194-
(r"]", Token.Punctuation, "#pop"),
195-
],
196-
# a json value - either a simple value or a
197-
# complex value (object or array)
198-
"value": [
199-
include("whitespace"),
200-
include("simplevalue"),
201-
(r"{", Token.Punctuation, "objectvalue"),
202-
(r"\[", Token.Punctuation, "arrayvalue"),
203-
],
204-
# the root of a json document whould be a value
205-
"root": [
206-
include("value"),
207-
],
208-
}
98+
parsed_keywords: ClassVar[list[str]] = [
99+
'"%s"' % keyword
100+
for keyword in (
101+
core_keywords
102+
+ applicator_keywords
103+
+ meta_data_keywords
104+
+ validation_keywords
105+
+ other_keywords
106+
)
107+
]
108+
109+
parsed_data_types: ClassVar[list[str]] = [
110+
'"%s"' % data_type for data_type in data_types
111+
]
112+
113+
def get_tokens_unprocessed(self, text: str): # type: ignore[reportUnknownParameterType]
114+
"""
115+
Add token classes to it according to JSON Schema.
116+
"""
117+
for start, token, value in super().get_tokens_unprocessed(text): # type: ignore[reportUnknownVariableType]
118+
if token is Token.Name.Tag and value in self.parsed_keywords:
119+
yield start, Token.Keyword, value
120+
elif (
121+
token is Token.String.Double
122+
and value in self.parsed_data_types
123+
):
124+
yield start, Token.Name.Decorator, value
125+
else:
126+
yield start, token, value

0 commit comments

Comments
 (0)