diff --git a/src/main/kotlin/parser/Expression.kt b/src/main/kotlin/parser/Expression.kt index 6709de6..c9f1b8e 100644 --- a/src/main/kotlin/parser/Expression.kt +++ b/src/main/kotlin/parser/Expression.kt @@ -11,3 +11,21 @@ data class IntExpression( return "${indent(indent)}Int(${value.lexeme})" } } + +// support arithmetic operation +data class BinaryExpression( + val left_operand: Expression, + val operator: Token, + val right_operand: Expression +) : Expression() { + override fun prettyPrint(indent: Int): String { + return buildString { + appendLine("${indent(indent)}BinaryExpression(operator=${operator.lexeme})") + appendLine("${indent(indent + 1)}left =") + append(left_operand.prettyPrint(indent + 2)) + appendLine("${indent(indent + 1)}right =") + append(right_operand.prettyPrint(indent + 2)) + append("${indent(indent)})") + } + } +} diff --git a/src/main/kotlin/parser/Parser.kt b/src/main/kotlin/parser/Parser.kt index c478469..e3c60b8 100644 --- a/src/main/kotlin/parser/Parser.kt +++ b/src/main/kotlin/parser/Parser.kt @@ -5,8 +5,8 @@ import lexer.TokenType import org.example.Exceptions.SyntaxError class Parser { - fun parseTokens(tokens: List): ASTNode { - val tokens = tokens.toMutableList() + fun parseTokens(incomingTokens: List): ASTNode { + val tokens = incomingTokens.toMutableList() val ast = parseProgram(tokens) val lastToken = tokens.removeFirst() @@ -78,14 +78,53 @@ class Parser { } private fun parseExpression(tokens: MutableList): Expression { - val token = tokens.removeFirst() - if (token.type != TokenType.INT_LITERAL) { - throw SyntaxError( - line = token.line, - column = token.column, - message = "Expected token: ${TokenType.INT_LITERAL}, got ${token.type}" - ) + return parseAddition(tokens) + } + + private fun parseAddition(tokens: MutableList): Expression { + var left_operand = parseTerm(tokens) + + while (check(TokenType.PLUS, tokens) || check(TokenType.MINUS, tokens)) { + var operator = tokens.removeFirst() + var right_operand = parseTerm(tokens) + left_operand = BinaryExpression(left_operand, operator, right_operand) + } + return left_operand + } + + private fun parseTerm(tokens: MutableList): Expression { + var left_operand = parsePrimary(tokens) + + while (check(TokenType.MULTIPLY, tokens) || check(TokenType.DIVIDE, tokens)) { + var operator = tokens.removeFirst() + var right_operand = parsePrimary(tokens) + left_operand = BinaryExpression(left_operand, operator, right_operand) + } + return left_operand + } + + private fun parsePrimary(tokens: MutableList): Expression { + if (check(TokenType.INT_LITERAL, tokens)) { + return IntExpression(tokens.removeFirst()) } - return IntExpression(value = token) + + if (check(TokenType.LEFT_PAREN, tokens)) { + expect(TokenType.LEFT_PAREN, tokens) + val expression = parseExpression(tokens) + expect(TokenType.RIGHT_PAREN, tokens) + return expression + } + + val unexpectedToken = tokens.first() + throw SyntaxError( + line = unexpectedToken.line, + column = unexpectedToken.column, + message = "Unexpected token: ${unexpectedToken.type}" + ) + } + + private fun check(type: TokenType, tokens: MutableList): Boolean { + if (tokens.isEmpty()) return false + return tokens.first().type == type } } diff --git a/src/test/kotlin/LexerTest.kt b/src/test/kotlin/lexer/LexerTest.kt similarity index 97% rename from src/test/kotlin/LexerTest.kt rename to src/test/kotlin/lexer/LexerTest.kt index 944d557..fe011d5 100644 --- a/src/test/kotlin/LexerTest.kt +++ b/src/test/kotlin/lexer/LexerTest.kt @@ -1,6 +1,4 @@ - -import lexer.Lexer -import lexer.TokenType +package lexer import org.junit.jupiter.api.Test import kotlin.test.assertEquals diff --git a/src/test/kotlin/parser/ParserTest.kt b/src/test/kotlin/parser/ParserTest.kt index 9287407..4cf8b9d 100644 --- a/src/test/kotlin/parser/ParserTest.kt +++ b/src/test/kotlin/parser/ParserTest.kt @@ -3,165 +3,327 @@ package parser import lexer.Token import lexer.TokenType import org.example.Exceptions.SyntaxError +import org.example.parser.BinaryExpression +import org.example.parser.IntExpression import org.example.parser.Parser +import org.example.parser.ReturnStatement +import org.example.parser.SimpleFunction import org.example.parser.SimpleProgram +import org.junit.jupiter.api.Nested import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows import kotlin.test.assertEquals import kotlin.test.assertIs class ParserTest { + @Nested + inner class RegressionTests { + @Test + fun `test basic program parsing`() { + // Create a list of tokens for a simple program: int main(void) { return 42; } + val tokens = + listOf( + Token(TokenType.KEYWORD_INT, "int", 1, 1), + Token(TokenType.IDENTIFIER, "main", 1, 5), + Token(TokenType.LEFT_PAREN, "(", 1, 9), + Token(TokenType.KEYWORD_VOID, "void", 1, 10), + Token(TokenType.RIGHT_PAREN, ")", 1, 14), + Token(TokenType.LEFT_BRACK, "{", 1, 16), + Token(TokenType.KEYWORD_RETURN, "return", 1, 18), + Token(TokenType.INT_LITERAL, "42", 1, 25), + Token(TokenType.SEMICOLON, ";", 1, 27), + Token(TokenType.RIGHT_BRACK, "}", 1, 29), + Token(TokenType.EOF, "", 1, 30), + ) - @Test - fun `test basic program parsing`() { - // Create a list of tokens for a simple program: int main(void) { return 42; } - val tokens = listOf( - Token(TokenType.KEYWORD_INT, "int", 1, 1), - Token(TokenType.IDENTIFIER, "main", 1, 5), - Token(TokenType.LEFT_PAREN, "(", 1, 9), - Token(TokenType.KEYWORD_VOID, "void", 1, 10), - Token(TokenType.RIGHT_PAREN, ")", 1, 14), - Token(TokenType.LEFT_BRACK, "{", 1, 16), - Token(TokenType.KEYWORD_RETURN, "return", 1, 18), - Token(TokenType.INT_LITERAL, "42", 1, 25), - Token(TokenType.SEMICOLON, ";", 1, 27), - Token(TokenType.RIGHT_BRACK, "}", 1, 29), - Token(TokenType.EOF, "", 1, 30) - ) - - val parser = Parser() - val ast = parser.parseTokens(tokens) - - // Verify the AST structure - assertIs(ast) - } + val parser = Parser() + val ast = parser.parseTokens(tokens) + + // Verify the AST structure + assertIs(ast) + } + + @Test + fun `test detailed AST structure for single integer`() { + // Create a list of tokens for a simple program: int main(void) { return 42; } + val tokens = + listOf( + Token(TokenType.KEYWORD_INT, "int", 1, 1), + Token(TokenType.IDENTIFIER, "main", 1, 5), + Token(TokenType.LEFT_PAREN, "(", 1, 9), + Token(TokenType.KEYWORD_VOID, "void", 1, 10), + Token(TokenType.RIGHT_PAREN, ")", 1, 14), + Token(TokenType.LEFT_BRACK, "{", 1, 16), + Token(TokenType.KEYWORD_RETURN, "return", 1, 18), + Token(TokenType.INT_LITERAL, "42", 1, 25), + Token(TokenType.SEMICOLON, ";", 1, 27), + Token(TokenType.RIGHT_BRACK, "}", 1, 29), + Token(TokenType.EOF, "", 1, 30), + ) + + val parser = Parser() + val ast = parser.parseTokens(tokens) - @Test - fun `test detailed AST structure`() { - // Create a list of tokens for a simple program: int main(void) { return 42; } - val tokens = listOf( - Token(TokenType.KEYWORD_INT, "int", 1, 1), - Token(TokenType.IDENTIFIER, "main", 1, 5), - Token(TokenType.LEFT_PAREN, "(", 1, 9), - Token(TokenType.KEYWORD_VOID, "void", 1, 10), - Token(TokenType.RIGHT_PAREN, ")", 1, 14), - Token(TokenType.LEFT_BRACK, "{", 1, 16), - Token(TokenType.KEYWORD_RETURN, "return", 1, 18), - Token(TokenType.INT_LITERAL, "42", 1, 25), - Token(TokenType.SEMICOLON, ";", 1, 27), - Token(TokenType.RIGHT_BRACK, "}", 1, 29), - Token(TokenType.EOF, "", 1, 30) - ) - - val parser = Parser() - val ast = parser.parseTokens(tokens) - - // Verify the AST structure in detail - assertIs(ast) - val program = ast - - // Check function definition - val function = program.functionDefinition - assertIs(function) - val simpleFunction = function - - // Check function name - assertEquals("main", simpleFunction.name.token.lexeme) - - // Check return statement - val returnStatement = simpleFunction.body - assertIs(returnStatement) - val returnStmt = returnStatement - - // Check return expression - val expression = returnStmt.expression - assertIs(expression) - val intExpr = expression - assertEquals("42", intExpr.value.lexeme) + // Verify the AST structure in detail + assertIs(ast) + val program = ast + + val function = program.functionDefinition + assertIs(function) + assertEquals("main", function.name.token.lexeme) + + val returnStatement = function.body + assertIs(returnStatement) + + val expression = returnStatement.expression + assertIs(expression) + assertEquals("42", expression.value.lexeme) + } } - @Test - fun `test syntax error - missing int keyword`() { - // Missing the int keyword at the beginning - val tokens = listOf( - Token(TokenType.IDENTIFIER, "main", 1, 1), - Token(TokenType.LEFT_PAREN, "(", 1, 5), - Token(TokenType.KEYWORD_VOID, "void", 1, 6), - Token(TokenType.RIGHT_PAREN, ")", 1, 10), - Token(TokenType.LEFT_BRACK, "{", 1, 12), - Token(TokenType.KEYWORD_RETURN, "return", 1, 14), - Token(TokenType.INT_LITERAL, "42", 1, 21), - Token(TokenType.SEMICOLON, ";", 1, 23), - Token(TokenType.RIGHT_BRACK, "}", 1, 25), - Token(TokenType.EOF, "", 1, 26) - ) - - val parser = Parser() - val exception = assertThrows { - parser.parseTokens(tokens) + // verify the arithmetic expression functionality + @Nested + inner class ArithmeticExpressionTests { + private fun buildTokensForExpression(expressionTokens: List): List = + listOf( + Token(TokenType.KEYWORD_INT, "int", 1, 1), + Token(TokenType.IDENTIFIER, "main", 1, 5), + Token(TokenType.LEFT_PAREN, "(", 1, 9), + Token(TokenType.KEYWORD_VOID, "void", 1, 10), + Token(TokenType.RIGHT_PAREN, ")", 1, 14), + Token(TokenType.LEFT_BRACK, "{", 1, 16), + Token(TokenType.KEYWORD_RETURN, "return", 1, 18), + ) + expressionTokens + + listOf( + Token(TokenType.SEMICOLON, ";", 1, 27), + Token(TokenType.RIGHT_BRACK, "}", 1, 29), + Token(TokenType.EOF, "", 1, 30), + ) + + @Test + fun `test simple addition`() { + // return 5 + 3; + val tokens = + buildTokensForExpression( + listOf( + Token(TokenType.INT_LITERAL, "5", 1, 25), + Token(TokenType.PLUS, "+", 1, 26), + Token(TokenType.INT_LITERAL, "3", 1, 27), + ), + ) + val parser = Parser() + val ast = parser.parseTokens(tokens) + + val returnStmt = ((ast as SimpleProgram).functionDefinition as SimpleFunction).body as ReturnStatement + val expr = returnStmt.expression + assertIs(expr) + assertEquals(TokenType.PLUS, expr.operator.type) + assertIs(expr.left_operand) + assertEquals("5", (expr.left_operand as IntExpression).value.lexeme) + assertIs(expr.right_operand) + assertEquals("3", (expr.right_operand as IntExpression).value.lexeme) } - // Check that the error message contains the expected text - assert(exception.message!!.contains("Expected token: KEYWORD_INT, got IDENTIFIER")) - assertEquals(1, exception.line) - assertEquals(1, exception.column) - } + @Test + fun `test operator precedence`() { + // return 2 + 3 * 4; + val tokens = + buildTokensForExpression( + listOf( + Token(TokenType.INT_LITERAL, "2", 1, 25), + Token(TokenType.PLUS, "+", 1, 27), + Token(TokenType.INT_LITERAL, "3", 1, 29), + Token(TokenType.MULTIPLY, "*", 1, 31), + Token(TokenType.INT_LITERAL, "4", 1, 33), + ), + ) + val parser = Parser() + val ast = parser.parseTokens(tokens) + + val returnStmt = ((ast as SimpleProgram).functionDefinition as SimpleFunction).body as ReturnStatement + val expr = returnStmt.expression + + // Top level should be the PLUS operation + assertIs(expr) + assertEquals(TokenType.PLUS, expr.operator.type) - @Test - fun `test syntax error - missing return keyword`() { - // Missing the return keyword - val tokens = listOf( - Token(TokenType.KEYWORD_INT, "int", 1, 1), - Token(TokenType.IDENTIFIER, "main", 1, 5), - Token(TokenType.LEFT_PAREN, "(", 1, 9), - Token(TokenType.KEYWORD_VOID, "void", 1, 10), - Token(TokenType.RIGHT_PAREN, ")", 1, 14), - Token(TokenType.LEFT_BRACK, "{", 1, 16), - // Missing KEYWORD_RETURN here - Token(TokenType.INT_LITERAL, "42", 1, 18), - Token(TokenType.SEMICOLON, ";", 1, 20), - Token(TokenType.RIGHT_BRACK, "}", 1, 22), - Token(TokenType.EOF, "", 1, 23) - ) - - val parser = Parser() - val exception = assertThrows { - parser.parseTokens(tokens) + // Left side of PLUS is 2 + assertIs(expr.left_operand) + assertEquals("2", (expr.left_operand as IntExpression).value.lexeme) + + // Right side of PLUS is another binary expression for the MULTIPLY + val rightExpr = expr.right_operand + assertIs(rightExpr) + assertEquals(TokenType.MULTIPLY, rightExpr.operator.type) + assertEquals("3", (rightExpr.left_operand as IntExpression).value.lexeme) + assertEquals("4", (rightExpr.right_operand as IntExpression).value.lexeme) } - // Check that the error message contains the expected text - assert(exception.message!!.contains("Expected token: KEYWORD_RETURN, got INT_LITERAL")) - assertEquals(1, exception.line) - assertEquals(18, exception.column) + @Test + fun `test parentheses overriding precedence`() { + // return (2 + 3) * 4; + val tokens = + buildTokensForExpression( + listOf( + Token(TokenType.LEFT_PAREN, "(", 1, 25), + Token(TokenType.INT_LITERAL, "2", 1, 26), + Token(TokenType.PLUS, "+", 1, 28), + Token(TokenType.INT_LITERAL, "3", 1, 30), + Token(TokenType.RIGHT_PAREN, ")", 1, 31), + Token(TokenType.MULTIPLY, "*", 1, 33), + Token(TokenType.INT_LITERAL, "4", 1, 35), + ), + ) + val parser = Parser() + val ast = parser.parseTokens(tokens) + + val returnStmt = ((ast as SimpleProgram).functionDefinition as SimpleFunction).body as ReturnStatement + val expr = returnStmt.expression + + // Top level should be the MULTIPLY operation + assertIs(expr) + assertEquals(TokenType.MULTIPLY, expr.operator.type) + + // Right side of MULTIPLY is 4 + assertIs(expr.right_operand) + assertEquals("4", (expr.right_operand as IntExpression).value.lexeme) + + // Left side of MULTIPLY is another binary expression for the PLUS + val leftExpr = expr.left_operand + assertIs(leftExpr) + assertEquals(TokenType.PLUS, leftExpr.operator.type) + assertEquals("2", (leftExpr.left_operand as IntExpression).value.lexeme) + assertEquals("3", (leftExpr.right_operand as IntExpression).value.lexeme) + } + + @Test + fun `test left associativity`() { + // return 10 - 4 - 2; + val tokens = + buildTokensForExpression( + listOf( + Token(TokenType.INT_LITERAL, "10", 1, 25), + Token(TokenType.MINUS, "-", 1, 28), + Token(TokenType.INT_LITERAL, "4", 1, 30), + Token(TokenType.MINUS, "-", 1, 32), + Token(TokenType.INT_LITERAL, "2", 1, 34), + ), + ) + val parser = Parser() + val ast = parser.parseTokens(tokens) + + val returnStmt = ((ast as SimpleProgram).functionDefinition as SimpleFunction).body as ReturnStatement + val expr = returnStmt.expression + + // The top-level operation is the second minus + assertIs(expr) + assertEquals(TokenType.MINUS, expr.operator.type) + + // Its right-hand side is 2 + assertIs(expr.right_operand) + assertEquals("2", (expr.right_operand as IntExpression).value.lexeme) + + // Its left-hand side is the first minus operation + val leftExpr = expr.left_operand + assertIs(leftExpr) + assertEquals(TokenType.MINUS, leftExpr.operator.type) + assertEquals("10", (leftExpr.left_operand as IntExpression).value.lexeme) + assertEquals("4", (leftExpr.right_operand as IntExpression).value.lexeme) + } } - @Test - fun `test syntax error - extra tokens after program`() { - // Extra tokens after the program - val tokens = listOf( - Token(TokenType.KEYWORD_INT, "int", 1, 1), - Token(TokenType.IDENTIFIER, "main", 1, 5), - Token(TokenType.LEFT_PAREN, "(", 1, 9), - Token(TokenType.KEYWORD_VOID, "void", 1, 10), - Token(TokenType.RIGHT_PAREN, ")", 1, 14), - Token(TokenType.LEFT_BRACK, "{", 1, 16), - Token(TokenType.KEYWORD_RETURN, "return", 1, 18), - Token(TokenType.INT_LITERAL, "42", 1, 25), - Token(TokenType.SEMICOLON, ";", 1, 27), - Token(TokenType.RIGHT_BRACK, "}", 1, 29), - // Extra tokens here - Token(TokenType.IDENTIFIER, "extra", 1, 31), - Token(TokenType.EOF, "", 1, 36) - ) - - val parser = Parser() - val exception = assertThrows { - parser.parseTokens(tokens) + // These tests verify that the parser correctly throws errors on bad syntax + @Nested + inner class SyntaxErrorTests { + @Test + fun `test syntax error - missing int keyword`() { + // Missing the int keyword at the beginning + val tokens = + listOf( + Token(TokenType.IDENTIFIER, "main", 1, 1), + Token(TokenType.LEFT_PAREN, "(", 1, 5), + Token(TokenType.KEYWORD_VOID, "void", 1, 6), + Token(TokenType.RIGHT_PAREN, ")", 1, 10), + Token(TokenType.LEFT_BRACK, "{", 1, 12), + Token(TokenType.KEYWORD_RETURN, "return", 1, 14), + Token(TokenType.INT_LITERAL, "42", 1, 21), + Token(TokenType.SEMICOLON, ";", 1, 23), + Token(TokenType.RIGHT_BRACK, "}", 1, 25), + Token(TokenType.EOF, "", 1, 26), + ) + + val parser = Parser() + val exception = + assertThrows { + parser.parseTokens(tokens) + } + + // Check that the error message contains the expected text + assert(exception.message!!.contains("Expected token: KEYWORD_INT, got IDENTIFIER")) + assertEquals(1, exception.line) + assertEquals(1, exception.column) + } + + @Test + fun `test syntax error - missing return keyword`() { + // Missing the return keyword + val tokens = + listOf( + Token(TokenType.KEYWORD_INT, "int", 1, 1), + Token(TokenType.IDENTIFIER, "main", 1, 5), + Token(TokenType.LEFT_PAREN, "(", 1, 9), + Token(TokenType.KEYWORD_VOID, "void", 1, 10), + Token(TokenType.RIGHT_PAREN, ")", 1, 14), + Token(TokenType.LEFT_BRACK, "{", 1, 16), + // Missing KEYWORD_RETURN here + Token(TokenType.INT_LITERAL, "42", 1, 18), + Token(TokenType.SEMICOLON, ";", 1, 20), + Token(TokenType.RIGHT_BRACK, "}", 1, 22), + Token(TokenType.EOF, "", 1, 23), + ) + + val parser = Parser() + val exception = + assertThrows { + parser.parseTokens(tokens) + } + + // Check that the error message contains the expected text + assert(exception.message!!.contains("Expected token: KEYWORD_RETURN, got INT_LITERAL")) + assertEquals(1, exception.line) + assertEquals(18, exception.column) } - // Check that the error message contains the expected text - assert(exception.message!!.contains("Expected end of file")) - assertEquals(1, exception.line) - assertEquals(31, exception.column) + @Test + fun `test syntax error - extra tokens after program`() { + // Extra tokens after the program + val tokens = + listOf( + Token(TokenType.KEYWORD_INT, "int", 1, 1), + Token(TokenType.IDENTIFIER, "main", 1, 5), + Token(TokenType.LEFT_PAREN, "(", 1, 9), + Token(TokenType.KEYWORD_VOID, "void", 1, 10), + Token(TokenType.RIGHT_PAREN, ")", 1, 14), + Token(TokenType.LEFT_BRACK, "{", 1, 16), + Token(TokenType.KEYWORD_RETURN, "return", 1, 18), + Token(TokenType.INT_LITERAL, "42", 1, 25), + Token(TokenType.SEMICOLON, ";", 1, 27), + Token(TokenType.RIGHT_BRACK, "}", 1, 29), + // Extra tokens here + Token(TokenType.IDENTIFIER, "extra", 1, 31), + Token(TokenType.EOF, "", 1, 36), + ) + + val parser = Parser() + val exception = + assertThrows { + parser.parseTokens(tokens) + } + + // Check that the error message contains the expected text + assert(exception.message!!.contains("Expected end of file")) + assertEquals(1, exception.line) + assertEquals(31, exception.column) + } } }