diff --git a/meta/src/meta/grammar.y b/meta/src/meta/grammar.y index f4ad9640..1cff34ae 100644 --- a/meta/src/meta/grammar.y +++ b/meta/src/meta/grammar.y @@ -43,7 +43,7 @@ %token UINT32 UInt32 r'\d+u32' %token INT128 logic.Int128Value r'[-]?\d+i128' %token STRING String r'"(?:[^"\\]|\\.)*"' -%token SYMBOL String r'[a-zA-Z_][a-zA-Z0-9_./#-]*' +%token SYMBOL String r'[a-zA-Z_][a-zA-Z0-9_.#/-]*' %token UINT128 logic.UInt128Value r'0x[0-9a-fA-F]+' # Token aliases for formula constants (use hookable formatting in pretty printer) diff --git a/sdks/go/src/parser.go b/sdks/go/src/parser.go index 74665638..6452a36d 100644 --- a/sdks/go/src/parser.go +++ b/sdks/go/src/parser.go @@ -166,7 +166,7 @@ var ( {"UINT32", regexp.MustCompile(`^\d+u32`), func(s string) TokenValue { return TokenValue{kind: kindUint32, u32: scanUint32(s)} }}, {"INT128", regexp.MustCompile(`^[-]?\d+i128`), func(s string) TokenValue { return TokenValue{kind: kindInt128, int128: scanInt128(s)} }}, {"STRING", regexp.MustCompile(`^"(?:[^"\\]|\\.)*"`), func(s string) TokenValue { return TokenValue{kind: kindString, str: scanString(s)} }}, - {"SYMBOL", regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_./#-]*`), func(s string) TokenValue { return TokenValue{kind: kindString, str: scanSymbol(s)} }}, + {"SYMBOL", regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_.#/-]*`), func(s string) TokenValue { return TokenValue{kind: kindString, str: scanSymbol(s)} }}, {"UINT128", regexp.MustCompile(`^0x[0-9a-fA-F]+`), func(s string) TokenValue { return TokenValue{kind: kindUint128, uint128: scanUint128(s)} }}, } ) diff --git a/sdks/go/test/lexer_test.go b/sdks/go/test/lexer_test.go new file mode 100644 index 00000000..a0068886 --- /dev/null +++ b/sdks/go/test/lexer_test.go @@ -0,0 +1,43 @@ +package test + +import ( + "testing" + + lqp "github.com/RelationalAI/logical-query-protocol/sdks/go/src" +) + +// TestSymbolLexerRegex verifies that the SYMBOL regex treats hyphen as a literal +// character and does not accidentally include characters like $, %, etc. +func TestSymbolLexerRegex(t *testing.T) { + t.Run("hyphenated symbol", func(t *testing.T) { + // A hyphenated relation name should parse without error. + input := `(fragment :test (def :my-rel ([x::INT] (relatom :my-rel x))))` + result, _, err := lqp.ParseFragment(input) + if err != nil { + t.Fatalf("Failed to parse hyphenated symbol: %v", err) + } + if result == nil { + t.Fatal("ParseFragment returned nil") + } + }) + + t.Run("symbol with hash and slash", func(t *testing.T) { + input := `(fragment :test (def :base/#output ([x::INT] (relatom :base/#output x))))` + result, _, err := lqp.ParseFragment(input) + if err != nil { + t.Fatalf("Failed to parse symbol with hash and slash: %v", err) + } + if result == nil { + t.Fatal("ParseFragment returned nil") + } + }) + + t.Run("dollar terminates symbol", func(t *testing.T) { + // '$' is not a valid SYMBOL character, so this should fail to parse. + input := `(fragment :test (def :foo$bar ([x::INT] (relatom :foo$bar x))))` + _, _, err := lqp.ParseFragment(input) + if err == nil { + t.Error("Expected parse error for symbol containing '$'") + } + }) +} diff --git a/sdks/julia/LogicalQueryProtocol.jl/src/parser.jl b/sdks/julia/LogicalQueryProtocol.jl/src/parser.jl index 38a4fa7b..43b5b775 100644 --- a/sdks/julia/LogicalQueryProtocol.jl/src/parser.jl +++ b/sdks/julia/LogicalQueryProtocol.jl/src/parser.jl @@ -169,7 +169,7 @@ const _TOKEN_SPECS = [ ("UINT32", r"\d+u32", scan_uint32), ("INT128", r"[-]?\d+i128", scan_int128), ("STRING", r"\"(?:[^\"\\]|\\.)*\"", scan_string), - ("SYMBOL", r"[a-zA-Z_][a-zA-Z0-9_./#-]*", scan_symbol), + ("SYMBOL", r"[a-zA-Z_][a-zA-Z0-9_.#/-]*", scan_symbol), ("UINT128", r"0x[0-9a-fA-F]+", scan_uint128), ] diff --git a/sdks/julia/LogicalQueryProtocol.jl/test/parser_tests.jl b/sdks/julia/LogicalQueryProtocol.jl/test/parser_tests.jl index 27ec785c..b53b18d2 100644 --- a/sdks/julia/LogicalQueryProtocol.jl/test/parser_tests.jl +++ b/sdks/julia/LogicalQueryProtocol.jl/test/parser_tests.jl @@ -161,6 +161,20 @@ end ) end +@testitem "Parser - SYMBOL lexer regex" setup=[ParserSetup] begin + # Hyphen must be a literal character, not part of a range + lexer = Lexer("my-relation") + @test lexer.tokens[1].type == "SYMBOL" + @test lexer.tokens[1].value == "my-relation" + + lexer = Lexer("base/#output") + @test lexer.tokens[1].type == "SYMBOL" + @test lexer.tokens[1].value == "base/#output" + + # '$' is not a valid SYMBOL character — the lexer should fail on it + @test_throws ParseError Lexer("foo\$bar") +end + @testitem "Parser - Lexer tokenization" setup=[ParserSetup] begin lexer = Lexer("(transaction (epoch (writes) (reads)))") # Tokens: ( transaction ( epoch ( writes ) ( reads ) ) ) $ diff --git a/sdks/python/src/lqp/gen/parser.py b/sdks/python/src/lqp/gen/parser.py index 9d349668..206cd90d 100644 --- a/sdks/python/src/lqp/gen/parser.py +++ b/sdks/python/src/lqp/gen/parser.py @@ -122,7 +122,7 @@ def __repr__(self) -> str: ("STRING", re.compile(r'"(?:[^"\\]|\\.)*"'), lambda x: Lexer.scan_string(x)), ( "SYMBOL", - re.compile(r"[a-zA-Z_][a-zA-Z0-9_./#-]*"), + re.compile(r"[a-zA-Z_][a-zA-Z0-9_.#/-]*"), lambda x: Lexer.scan_symbol(x), ), ("UINT128", re.compile(r"0x[0-9a-fA-F]+"), lambda x: Lexer.scan_uint128(x)), diff --git a/sdks/python/tests/test_parser.py b/sdks/python/tests/test_parser.py index fc93be6d..ff2b4bcc 100644 --- a/sdks/python/tests/test_parser.py +++ b/sdks/python/tests/test_parser.py @@ -68,6 +68,25 @@ def test_parse_transaction_rejects_fragment(): parse_transaction(_SIMPLE_FRAGMENT) +class TestSymbolLexing: + """Tests for SYMBOL token regex — hyphen must be literal, not a range.""" + + def test_hyphenated_symbol(self): + tokens = Lexer("my-relation").tokens + assert tokens[0].type == "SYMBOL" + assert tokens[0].value == "my-relation" + + def test_symbol_with_hash_and_slash(self): + tokens = Lexer("base/#output").tokens + assert tokens[0].type == "SYMBOL" + assert tokens[0].value == "base/#output" + + def test_dollar_is_not_part_of_symbol(self): + # '$' is not a valid SYMBOL character — the lexer should fail on it + with pytest.raises(ParseError): + Lexer("foo$bar") + + class TestScanFloat32: """Tests for parsing float32 literals including inf32 and nan32.""" diff --git a/tests/bin/special_chars_in_ids.bin b/tests/bin/special_chars_in_ids.bin index 8c2bfd19..bd7c1d98 100644 Binary files a/tests/bin/special_chars_in_ids.bin and b/tests/bin/special_chars_in_ids.bin differ diff --git a/tests/lqp/special_chars_in_ids.lqp b/tests/lqp/special_chars_in_ids.lqp index 43aa225a..0f2fcd46 100644 --- a/tests/lqp/special_chars_in_ids.lqp +++ b/tests/lqp/special_chars_in_ids.lqp @@ -1,4 +1,4 @@ -;; Test that identifiers can contain / # characters +;; Test that identifiers can contain / # - characters (transaction (epoch (writes @@ -10,10 +10,16 @@ (= x 1) (atom :other/rel x x)))) (def :my#relation + ([x::INT y::INT] + (and + (atom :my/relation x) + (= y x)))) + (def :my-relation ([x::INT y::INT] (and (atom :my/relation x) (= y x))))))) (reads (output :my/relation :my/relation) - (output :my#relation :my#relation)))) + (output :my#relation :my#relation) + (output :my-relation :my-relation)))) diff --git a/tests/pretty/special_chars_in_ids.lqp b/tests/pretty/special_chars_in_ids.lqp index 2ad2067b..61d3423b 100644 --- a/tests/pretty/special_chars_in_ids.lqp +++ b/tests/pretty/special_chars_in_ids.lqp @@ -6,5 +6,9 @@ (fragment :f1 (def :my/relation ([x::INT] (and (= x 1) (atom :other/rel x x)))) - (def :my#relation ([x::INT y::INT] (and (atom :my/relation x) (= y x))))))) - (reads (output :my/relation :my/relation) (output :my#relation :my#relation)))) + (def :my#relation ([x::INT y::INT] (and (atom :my/relation x) (= y x)))) + (def :my-relation ([x::INT y::INT] (and (atom :my/relation x) (= y x))))))) + (reads + (output :my/relation :my/relation) + (output :my#relation :my#relation) + (output :my-relation :my-relation)))) diff --git a/tests/pretty_debug/special_chars_in_ids.lqp b/tests/pretty_debug/special_chars_in_ids.lqp index 21313bff..02e802af 100644 --- a/tests/pretty_debug/special_chars_in_ids.lqp +++ b/tests/pretty_debug/special_chars_in_ids.lqp @@ -10,14 +10,19 @@ ([x::INT] (and (= x 1) (atom 0x91b51012f825fc7c589f890a3b18cfd4 x x)))) (def 0x1e9de369d8c3c11ff417a9c0c86e7ebe + ([x::INT y::INT] (and (atom 0x15c3d261e9b87ddca0f3e13e054457bd x) (= y x)))) + (def + 0x56ad023a6ca39615583773d0a0fa7515 ([x::INT y::INT] (and (atom 0x15c3d261e9b87ddca0f3e13e054457bd x) (= y x))))))) (reads (output :my/relation 0x15c3d261e9b87ddca0f3e13e054457bd) - (output :my#relation 0x1e9de369d8c3c11ff417a9c0c86e7ebe)))) + (output :my#relation 0x1e9de369d8c3c11ff417a9c0c86e7ebe) + (output :my-relation 0x56ad023a6ca39615583773d0a0fa7515)))) ;; Debug information ;; ----------------------- ;; Original names ;; ID `0x1e9de369d8c3c11ff417a9c0c86e7ebe` -> `my#relation` +;; ID `0x56ad023a6ca39615583773d0a0fa7515` -> `my-relation` ;; ID `0x15c3d261e9b87ddca0f3e13e054457bd` -> `my/relation` ;; ID `0x91b51012f825fc7c589f890a3b18cfd4` -> `other/rel`