-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLexer.py
More file actions
73 lines (65 loc) · 1.68 KB
/
Lexer.py
File metadata and controls
73 lines (65 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import re
RESERVED = 'RESERVED'
TEMPORAL = 'TEMPORAL'
INT = 'INT'
VAR = 'VAR'
VARLIST = 'VARLIST'
INTERVAL = 'INTERVAL'
AGGREGATION = 'AGGREGATION'
GROUPBY = 'GROUPBY'
basic_rules = [
(r'[ \n\t]+', None),
(r'#[^\n]*', None),
(r"\(", RESERVED),
(r"\)", RESERVED),
(r"IMPLIES", RESERVED),
(r"\[[0-9]+,[0-9]+\]", INTERVAL),
(r"SINCE", TEMPORAL),
(r"PREVIOUS", TEMPORAL),
(r"ONCE", TEMPORAL),
(r"AND", RESERVED),
(r"OR", RESERVED),
(r"NOT", RESERVED),
(r"<-", RESERVED),
(r"SUM", AGGREGATION),
(r"CNT", AGGREGATION),
(r"MIN", AGGREGATION),
(r"MAX", AGGREGATION),
(r"AVG", AGGREGATION),
(r";", GROUPBY),
(r"<=", RESERVED),
(r">=", RESERVED),
(r"<", RESERVED),
(r">", RESERVED),
(r"=", RESERVED),
(r"EXISTS", RESERVED),
(r"FORALL", RESERVED),
(r"TRUE", RESERVED),
(r"FALSE", RESERVED),
("\.[a-z]+(,[a-z]+)*", VARLIST),
(r"[0-9]+", INT),
(r"[A-Za-z][A-Za-z0-9_]*", VAR)
]
def lex(characters, lexer_rules) -> [(str, str)]:
pos = 0
tokens = []
while pos < len(characters):
match = None
for rule in lexer_rules:
pattern, tag = rule
regex = re.compile(pattern)
match = regex.match(characters, pos)
if match:
text = match.group(0)
if tag:
token = (text, tag)
tokens.append(token)
break
if not match:
raise RuntimeError("Illegal character \"" + characters[pos] + "\" at position " + str(pos))
else:
pos = match.end(0)
print(tokens)
return tokens
def get_rules():
return basic_rules