-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathgenerate_entities.py
86 lines (71 loc) · 2.25 KB
/
generate_entities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import json
import sys
key_and_value = list(json.load(sys.stdin).items())
# Sort by descending length so we match the largest prefix first
key_and_value.sort(key=lambda x: (-len(x[0]), x[0]))
match_arms = {}
for key, value in key_and_value:
assert key[0] == "&"
key = key[1:]
first_char = key[0]
key = key[1:]
match_arms.setdefault(first_char, []).append((key, value))
with open("src/entities.rs", "w") as f:
f.write(
"""
// @generated
// this file is autogenerated by
// curl https://html.spec.whatwg.org/entities.json | python generate_entities.py
pub(crate) struct CharRef {
/// Name as it appears escaped in HTML
pub(crate) name: &'static str,
/// Unescaped character codepoints
pub(crate) characters: &'static str,
}
pub(crate) fn try_read_character_reference<E>(first_char: char, try_read: impl FnMut(&str) -> Result<bool, E>) -> Result<Option<CharRef>, E> {
match first_char {
"""
)
for first_char, if_statements in sorted(match_arms.items()):
# Write each branch of the match stmt as its own function such that
# compilation is faster.
f.write(
"""
'%(first_char)s' => {
#[allow(non_snake_case)]
fn branch_%(first_char)s<E>(mut try_read: impl FnMut(&str) -> Result<bool, E>) -> Result<Option<CharRef>, E> {
for (other_chars, characters) in &[
"""
% {"first_char": first_char}
)
for other_chars, value in if_statements:
characters = ""
for c in value["codepoints"]:
characters += r"\u{" + hex(c)[2:] + r"}"
f.write(
"""
("%(other_chars)s", "%(characters)s"),
"""
% {"other_chars": other_chars, "characters": characters}
)
f.write(
"""
] {
if try_read(other_chars)? {
return Ok(Some(CharRef { name: other_chars, characters }));
}
}
Ok(None)
}
branch_%(first_char)s(try_read)
}
"""
% {"first_char": first_char}
)
f.write(
"""
_ => Ok(None)
}
}
"""
)