-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #74 from certik/parser
Initial parser implementation
- Loading branch information
Showing
15 changed files
with
3,784 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,5 @@ dependencies: | |
- python=3.12.0 | ||
- toml=0.10.2 | ||
- xtensor=0.24.7 | ||
- bison=3.4 | ||
- re2c=3.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,8 @@ | ||
set(SRC | ||
parser/tokenizer.cpp | ||
parser/parser.cpp | ||
parser/parser.tab.cc | ||
|
||
utils.cpp | ||
) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
-- ASDL's 4 builtin types are: | ||
-- identifier, int, string, constant | ||
|
||
module LC | ||
{ | ||
mod = Module(stmt* body, type_ignore* type_ignores) | ||
| Interactive(stmt* body) | ||
| Expression(expr body) | ||
| FunctionType(expr* argtypes, expr returns) | ||
|
||
stmt = FunctionDef(identifier name, arguments args, | ||
stmt* body, expr* decorator_list, expr? returns, | ||
string? type_comment) | ||
| AsyncFunctionDef(identifier name, arguments args, | ||
stmt* body, expr* decorator_list, expr? returns, | ||
string? type_comment) | ||
|
||
| ClassDef(identifier name, | ||
expr* bases, | ||
keyword* keywords, | ||
stmt* body, | ||
expr* decorator_list) | ||
| Return(expr? value) | ||
|
||
| Delete(expr* targets) | ||
| Assign(expr* targets, expr value, string? type_comment) | ||
| AugAssign(expr target, operator op, expr value) | ||
-- 'simple' indicates that we annotate simple name without parens | ||
| AnnAssign(expr target, expr annotation, expr? value, int simple) | ||
|
||
-- use 'orelse' because else is a keyword in target languages | ||
| For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) | ||
| AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) | ||
| While(expr test, stmt* body, stmt* orelse) | ||
| If(expr test, stmt* body, stmt* orelse) | ||
| With(withitem* items, stmt* body, string? type_comment) | ||
| AsyncWith(withitem* items, stmt* body, string? type_comment) | ||
|
||
| Match(expr subject, match_case* cases) | ||
|
||
| Raise(expr? exc, expr? cause) | ||
| Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) | ||
| Assert(expr test, expr? msg) | ||
|
||
| Import(alias* names) | ||
| ImportFrom(identifier? module, alias* names, int level) | ||
|
||
| Global(identifier* names) | ||
| Nonlocal(identifier* names) | ||
| Expr(expr value) | ||
| Pass | Break | Continue | ||
|
||
-- col_offset is the byte offset in the utf8 string the parser uses | ||
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) | ||
|
||
-- BoolOp() can use left & right? | ||
expr = BoolOp(boolop op, expr* values) | ||
| NamedExpr(expr target, expr value) | ||
| BinOp(expr left, operator op, expr right) | ||
| UnaryOp(unaryop op, expr operand) | ||
| Lambda(arguments args, expr body) | ||
| IfExp(expr test, expr body, expr orelse) | ||
| Dict(expr* keys, expr* values) | ||
| Set(expr* elts) | ||
| ListComp(expr elt, comprehension* generators) | ||
| SetComp(expr elt, comprehension* generators) | ||
| DictComp(expr key, expr value, comprehension* generators) | ||
| GeneratorExp(expr elt, comprehension* generators) | ||
-- the grammar constrains where yield expressions can occur | ||
| Await(expr value) | ||
| Yield(expr? value) | ||
| YieldFrom(expr value) | ||
-- need sequences for compare to distinguish between | ||
-- x < 4 < 3 and (x < 4) < 3 | ||
| Compare(expr left, cmpop ops, expr* comparators) | ||
| Call(expr func, expr* args, keyword* keywords) | ||
| FormattedValue(expr value, int conversion, expr? format_spec) | ||
| JoinedStr(expr* values) | ||
-- | Constant(constant value, string? kind) | ||
-- Our specific nodes that are used instead of Constant: | ||
| ConstantStr(string value, string? kind) | ||
| ConstantInt(int value, string? kind) | ||
| ConstantBool(bool value, string? kind) | ||
| ConstantFloat(float value, string? kind) | ||
| ConstantComplex(float re, float im, string? kind) | ||
| ConstantEllipsis(string? kind) | ||
| ConstantNone(string? kind) | ||
| ConstantBytes(string value, string? kind) | ||
|
||
-- the following expression can appear in assignment context | ||
| Attribute(expr value, identifier attr, expr_context ctx) | ||
| Subscript(expr value, expr slice, expr_context ctx) | ||
| Starred(expr value, expr_context ctx) | ||
| Name(identifier id, expr_context ctx) | ||
| List(expr* elts, expr_context ctx) | ||
| Tuple(expr* elts, expr_context ctx) | ||
|
||
-- can appear only in Subscript | ||
| Slice(expr? lower, expr? upper, expr? step) | ||
|
||
-- col_offset is the byte offset in the utf8 string the parser uses | ||
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) | ||
|
||
expr_context = Load | Store | Del | ||
|
||
boolop = And | Or | ||
|
||
operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift | ||
| RShift | BitOr | BitXor | BitAnd | FloorDiv | ||
|
||
unaryop = Invert | Not | UAdd | USub | ||
|
||
cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn | ||
|
||
comprehension = (expr target, expr iter, expr* ifs, int is_async) | ||
|
||
excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) | ||
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) | ||
|
||
arguments = (arg* posonlyargs, arg* args, arg* vararg, arg* kwonlyargs, | ||
expr* kw_defaults, arg* kwarg, expr* defaults) | ||
|
||
arg = (identifier arg, expr? annotation, string? type_comment) | ||
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) | ||
|
||
-- keyword arguments supplied to call (NULL identifier for **kwargs) | ||
keyword = (identifier? arg, expr value) | ||
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) | ||
|
||
-- import name with optional 'as' alias. | ||
alias = (identifier name, identifier? asname) | ||
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) | ||
|
||
withitem = (expr context_expr, expr? optional_vars) | ||
|
||
match_case = (pattern pattern, expr? guard, stmt* body) | ||
|
||
pattern = MatchValue(expr value) | ||
-- | MatchSingleton(constant value) | ||
| MatchSingleton(expr value) | ||
| MatchSequence(pattern* patterns) | ||
| MatchMapping(expr* keys, pattern* patterns, identifier? rest) | ||
| MatchClass(expr cls, pattern* patterns, identifier* kwd_attrs, pattern* kwd_patterns) | ||
|
||
| MatchStar(identifier? name) | ||
-- The optional "rest" MatchMapping parameter handles capturing extra mapping keys | ||
|
||
| MatchAs(pattern? pattern, identifier? name) | ||
| MatchOr(pattern* patterns) | ||
|
||
attributes (int lineno, int col_offset, int end_lineno, int end_col_offset) | ||
|
||
type_ignore = TypeIgnore(int lineno, string tag) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
#include <iostream> | ||
#include <string> | ||
#include <sstream> | ||
#include <fstream> | ||
#include <chrono> | ||
|
||
#include <lc/parser/parser.h> | ||
#include <lc/parser/parser.tab.hh> | ||
#include <libasr/diagnostics.h> | ||
#include <libasr/string_utils.h> | ||
#include <libasr/utils.h> | ||
#include <lc/parser/parser_exception.h> | ||
|
||
namespace LCompilers::LC { | ||
|
||
Result<LC::AST::Module_t*> parse(Allocator &al, const std::string &s, | ||
uint32_t prev_loc, diag::Diagnostics &diagnostics) | ||
{ | ||
Parser p(al, diagnostics); | ||
try { | ||
p.parse(s, prev_loc); | ||
} catch (const parser_local::TokenizerError &e) { | ||
Error error; | ||
diagnostics.diagnostics.push_back(e.d); | ||
return error; | ||
} catch (const parser_local::ParserError &e) { | ||
Error error; | ||
diagnostics.diagnostics.push_back(e.d); | ||
return error; | ||
} | ||
|
||
Location l; | ||
if (p.result.size() == 0) { | ||
l.first=0; | ||
l.last=0; | ||
} else { | ||
l.first=p.result[0]->base.loc.first; | ||
l.last=p.result[p.result.size()-1]->base.loc.last; | ||
} | ||
return (LC::AST::Module_t*)LC::AST::make_Module_t(al, l, | ||
p.result.p, p.result.size(), p.type_ignore.p, p.type_ignore.size()); | ||
} | ||
|
||
void Parser::parse(const std::string &input, uint32_t prev_loc) | ||
{ | ||
inp = input; | ||
if (inp.size() > 0) { | ||
if (inp[inp.size()-1] != '\n') inp.append("\n"); | ||
} else { | ||
inp.append("\n"); | ||
} | ||
m_tokenizer.set_string(inp, prev_loc); | ||
if (yyparse(*this) == 0) { | ||
return; | ||
} | ||
throw parser_local::ParserError("Parsing unsuccessful (internal compiler error)"); | ||
} | ||
|
||
void Parser::handle_yyerror(const Location &loc, const std::string &msg) | ||
{ | ||
std::string message; | ||
if (msg == "syntax is ambiguous") { | ||
message = "Internal Compiler Error: syntax is ambiguous in the parser"; | ||
} else if (msg == "syntax error") { | ||
YYSTYPE yylval_; | ||
YYLTYPE yyloc_; | ||
this->m_tokenizer.cur = this->m_tokenizer.tok; | ||
int token = this->m_tokenizer.lex(this->m_a, yylval_, yyloc_, diag); | ||
if (token == yytokentype::END_OF_FILE) { | ||
message = "End of file is unexpected here"; | ||
} else if (token == yytokentype::TK_NEWLINE) { | ||
message = "Newline is unexpected here"; | ||
} else { | ||
std::string token_str = this->m_tokenizer.token(); | ||
std::string token_type = token2text(token); | ||
if (token_str == token_type) { | ||
message = "Token '" + token_str + "' is unexpected here"; | ||
} else { | ||
message = "Token '" + token_str + "' (of type '" + token2text(token) + "') is unexpected here"; | ||
} | ||
} | ||
} else { | ||
message = "Internal Compiler Error: parser returned unknown error"; | ||
} | ||
throw parser_local::ParserError(message, loc); | ||
} | ||
|
||
bool file_exists(const std::string &name) { | ||
std::ifstream file(name); | ||
if (!file.is_open()) { | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
std::string unique_filename(const std::string &prefix) { | ||
uint64_t ms = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now().time_since_epoch()).count(); | ||
ms = ms % 1000000000; | ||
srand((unsigned) ms); | ||
std::string hex = "0123456789ABCDEF"; | ||
std::string random_hash; | ||
for (int i=0; i < 6; i++) { | ||
random_hash += hex[rand() % 16]; | ||
} | ||
int counter = 1; | ||
std::string filename = prefix + random_hash + std::to_string(counter); | ||
while (file_exists(filename)) { | ||
counter++; | ||
filename = prefix + random_hash + std::to_string(counter); | ||
} | ||
return filename; | ||
} | ||
|
||
Result<LC::AST::ast_t*> parse_python_file(Allocator &al, | ||
const std::string &/*runtime_library_dir*/, | ||
const std::string &infile, | ||
diag::Diagnostics &diagnostics, | ||
uint32_t prev_loc, | ||
[[maybe_unused]] bool new_parser) { | ||
LC::AST::ast_t* ast; | ||
// We will be using the new parser from now on | ||
new_parser = true; | ||
LCOMPILERS_ASSERT(new_parser) | ||
std::string input = read_file(infile); | ||
Result<LC::AST::Module_t*> res = parse(al, input, prev_loc, diagnostics); | ||
if (res.ok) { | ||
ast = (LC::AST::ast_t*)res.result; | ||
} else { | ||
LCOMPILERS_ASSERT(diagnostics.has_error()) | ||
return Error(); | ||
} | ||
return ast; | ||
} | ||
|
||
|
||
} // namespace LCompilers::LC |
Oops, something went wrong.