From 7feedb387f41edf37c882de07257289116b58ed2 Mon Sep 17 00:00:00 2001 From: geeknik <466878+geeknik@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:06:55 -0500 Subject: [PATCH] =?UTF-8?q?fix(parser):=20repair=20Zig=20support=20?= =?UTF-8?q?=E2=80=94=20grammar=20nodes=20were=20never=20matched?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zig was registered in EXTENSION_TO_LANGUAGE and the per-language node-type tables, but the mapped types (fn_proto, fn_decl, container_declaration, call_expression, builtin_call_expr) don't exist in the tree-sitter-zig grammar shipped by tree_sitter_language_pack — that grammar emits PascalCase nodes (FnProto, VarDecl, SuffixExpr, FnCallArguments, FieldOrFnCall, BUILTINIDENTIFIER, ContainerDecl, TestDecl). Result: every Zig file produced only its File node, with zero functions, structs, imports, or calls. Rather than rename and lose information, route Zig through a constructs handler (mirroring the Lua/Elixir pattern) so we can bridge FnProto's sibling Block, distinguish struct/union/enum/opaque ContainerDecl kinds, and split @import out as IMPORTS_FROM rather than CALLS. - Empty _CLASS_TYPES/_FUNCTION_TYPES/_IMPORT_TYPES/_CALL_TYPES for "zig" with comments explaining why generic dispatch can't be used. - Add _extract_zig_constructs dispatch in _extract_from_tree. - Implement _handle_zig_fn_decl, _handle_zig_var_decl, _handle_zig_test_decl, _zig_extract_import_target, and _extract_zig_calls_in_subtree. The call walker emits CALLS for any SuffixExpr/FieldOrFnCall whose direct children include FnCallArguments, resolving the callee to the immediately preceding IDENTIFIER or BUILTINIDENTIFIER. @import is filtered out so it isn't double-counted. - Add a Zig branch to _resolve_module_to_file: relative @import("./x.zig") resolves to an absolute file path; std and build-graph module names stay unresolved (resolving the latter would require parsing build.zig). - Add tests/fixtures/sample_zig.zig + sample_zig_util.zig covering top-level fns, struct with methods, enum, tagged union, std + relative @import, @intCast builtin call, and a labelled test block. - Add TestZigParsing to tests/test_multilang.py (11 tests). Verified on a 41-file Zig project (Tributary): before the fix, build produced 0 nodes / 0 edges; after, 393 unique nodes (214 Function, 124 Test, 55 Class) and 2839 edges (2353 CALLS, 393 CONTAINS, 93 IMPORTS_FROM), with hub detection correctly surfacing Reader.next, sanitize helpers, and Response.deinit as the most-called sites. --- code_review_graph/parser.py | 421 ++++++++++++++++++++++++++++- tests/fixtures/sample_zig.zig | 38 +++ tests/fixtures/sample_zig_util.zig | 1 + tests/test_multilang.py | 85 ++++++ 4 files changed, 540 insertions(+), 5 deletions(-) create mode 100644 tests/fixtures/sample_zig.zig create mode 100644 tests/fixtures/sample_zig_util.zig diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index f681263a..2278171b 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -169,7 +169,10 @@ class EdgeInfo: # identifier is literally "defmodule". Dispatched via # _extract_elixir_constructs to avoid matching every ``call`` here. "elixir": [], - "zig": ["container_declaration"], + # Zig has no single class node; struct/union/enum/opaque are VarDecl + # whose RHS is a SuffixExpr > ContainerDecl. Dispatched via + # _extract_zig_constructs. + "zig": [], "powershell": ["class_statement"], "julia": ["struct_definition", "abstract_definition"], } @@ -216,7 +219,10 @@ class EdgeInfo: # Elixir: def/defp/defmacro are all ``call`` nodes whose first # identifier matches. Dispatched via _extract_elixir_constructs. "elixir": [], - "zig": ["fn_proto", "fn_decl"], + # Zig: FnProto+Block pairs sit inside a Decl node; the standard generic + # walker can't bridge the FnProto signature to its sibling Block body, + # so the whole thing is dispatched via _extract_zig_constructs. + "zig": [], "powershell": ["function_statement"], "julia": [ "function_definition", @@ -256,8 +262,9 @@ class EdgeInfo: # Elixir: alias/import/require/use are all ``call`` nodes — # handled in _extract_elixir_constructs. "elixir": [], - # Zig: @import("...") is a builtin_call_expr — handled - # generically via call types below. + # Zig: @import("path") is a SuffixExpr containing a BUILTINIDENTIFIER + # "@import" + FnCallArguments holding a STRINGLITERALSINGLE. Handled in + # _extract_zig_constructs as part of VarDecl processing. "zig": [], "powershell": [], # Julia: import/using are import_statement nodes. @@ -297,7 +304,11 @@ class EdgeInfo: # _extract_elixir_constructs which filters out def/defmodule/alias/etc. # before treating what's left as a real call. "elixir": [], - "zig": ["call_expression", "builtin_call_expr"], + # Zig calls are SuffixExpr/FieldOrFnCall nodes containing FnCallArguments. + # Mapping SuffixExpr here would over-match (every expression is a + # SuffixExpr); calls are walked explicitly in + # _extract_zig_calls_in_subtree from inside function bodies. + "zig": [], "powershell": ["command_expression"], "julia": ["call_expression"], } @@ -1772,6 +1783,19 @@ def _extract_from_tree( ): continue + # --- Zig-specific constructs --- + # Zig's grammar emits PascalCase Decl/VarDecl/FnProto/SuffixExpr + # nodes that don't fit the generic class/function/import/call + # dispatch. _extract_zig_constructs handles top-level Decl and + # TestDecl nodes (functions, structs/unions/enums, @import, + # test blocks) and walks call sites itself. + if language == "zig" and self._extract_zig_constructs( + child, node_type, source, language, file_path, + nodes, edges, enclosing_class, enclosing_func, + import_map, defined_names, _depth, + ): + continue + # --- Bash-specific constructs --- # ``source ./foo.sh`` and ``. ./foo.sh`` are commands in # tree-sitter-bash; re-interpret them as IMPORTS_FROM edges so @@ -2561,6 +2585,379 @@ def _lua_get_require_target(call_node) -> Optional[str]: return raw.strip("'\"") return None + # ------------------------------------------------------------------ + # Zig-specific helpers + # ------------------------------------------------------------------ + + _ZIG_CONTAINER_KINDS = frozenset({"struct", "union", "enum", "opaque"}) + + def _extract_zig_constructs( + self, + child, + node_type: str, + source: bytes, + language: str, + file_path: str, + nodes: list[NodeInfo], + edges: list[EdgeInfo], + enclosing_class: Optional[str], + enclosing_func: Optional[str], + import_map: Optional[dict[str, str]], + defined_names: Optional[set[str]], + _depth: int, + ) -> bool: + """Handle Zig's PascalCase AST shapes. + + Top-level forms recognised: + - ``Decl > FnProto + Block`` -> Function/Test node + - ``Decl > VarDecl`` with ``@import`` -> IMPORTS_FROM edge + - ``Decl > VarDecl`` with ``ContainerDecl`` (struct/union/enum/ + opaque) -> Class node, recurse for nested methods + - ``TestDecl`` -> Test node + + Returns True if the construct was fully handled and the main loop + should skip generic recursion. Returns False to let generic + recursion continue (e.g. unknown / line_comment children). + """ + if node_type == "TestDecl": + return self._handle_zig_test_decl( + child, source, language, file_path, nodes, edges, + enclosing_class, enclosing_func, + import_map, defined_names, _depth, + ) + + if node_type != "Decl": + return False + + fn_proto = None + body_block = None + var_decl = None + for sub in child.children: + t = sub.type + if t == "FnProto" and fn_proto is None: + fn_proto = sub + elif t == "Block" and fn_proto is not None and body_block is None: + body_block = sub + elif t == "VarDecl" and var_decl is None: + var_decl = sub + + if fn_proto is not None: + return self._handle_zig_fn_decl( + child, fn_proto, body_block, source, language, file_path, + nodes, edges, enclosing_class, enclosing_func, + import_map, defined_names, _depth, + ) + + if var_decl is not None: + return self._handle_zig_var_decl( + child, var_decl, source, language, file_path, + nodes, edges, enclosing_class, enclosing_func, + import_map, defined_names, _depth, + ) + + return False + + def _handle_zig_fn_decl( + self, + decl, + fn_proto, + body_block, + source: bytes, + language: str, + file_path: str, + nodes: list[NodeInfo], + edges: list[EdgeInfo], + enclosing_class: Optional[str], + enclosing_func: Optional[str], + import_map: Optional[dict[str, str]], + defined_names: Optional[set[str]], + _depth: int, + ) -> bool: + """Emit a Function/Test node for ``fn name(...) ReturnType { body }``.""" + name: Optional[str] = None + for sub in fn_proto.children: + if sub.type == "IDENTIFIER": + name = sub.text.decode("utf-8", errors="replace") + break + if not name: + return False + + is_test = _is_test_function(name, file_path) + kind = "Test" if is_test else "Function" + qualified = self._qualify(name, file_path, enclosing_class) + + nodes.append(NodeInfo( + kind=kind, + name=name, + file_path=file_path, + line_start=decl.start_point[0] + 1, + line_end=decl.end_point[0] + 1, + language=language, + parent_name=enclosing_class, + is_test=is_test, + )) + container = ( + self._qualify(enclosing_class, file_path, None) + if enclosing_class + else file_path + ) + edges.append(EdgeInfo( + kind="CONTAINS", + source=container, + target=qualified, + file_path=file_path, + line=decl.start_point[0] + 1, + )) + + if body_block is not None: + self._extract_zig_calls_in_subtree( + body_block, file_path, edges, enclosing_class, name, + ) + return True + + def _handle_zig_var_decl( + self, + decl, + var_decl, + source: bytes, + language: str, + file_path: str, + nodes: list[NodeInfo], + edges: list[EdgeInfo], + enclosing_class: Optional[str], + enclosing_func: Optional[str], + import_map: Optional[dict[str, str]], + defined_names: Optional[set[str]], + _depth: int, + ) -> bool: + """Handle ``const Name = ;`` decls. + + Recognises @import (-> IMPORTS_FROM) and struct/union/enum/opaque + ContainerDecl (-> Class node + recurse). For other expressions, + scans the RHS for nested call sites so call edges aren't lost. + """ + var_name: Optional[str] = None + rhs_suffix = None + for sub in var_decl.children: + t = sub.type + if t == "IDENTIFIER" and var_name is None: + var_name = sub.text.decode("utf-8", errors="replace") + elif t == "ErrorUnionExpr" and rhs_suffix is None: + for inner in sub.children: + if inner.type == "SuffixExpr": + rhs_suffix = inner + break + + if not var_name or rhs_suffix is None: + return False + + suffix_children = list(rhs_suffix.children) + + # @import("path") -> IMPORTS_FROM edge + if ( + len(suffix_children) >= 2 + and suffix_children[0].type == "BUILTINIDENTIFIER" + and suffix_children[0].text == b"@import" + and suffix_children[1].type == "FnCallArguments" + ): + target = self._zig_extract_import_target(suffix_children[1]) + if target is not None: + resolved = self._resolve_module_to_file( + target, file_path, language, + ) + edges.append(EdgeInfo( + kind="IMPORTS_FROM", + source=file_path, + target=resolved if resolved else target, + file_path=file_path, + line=decl.start_point[0] + 1, + )) + return True + + # struct / union / enum / opaque -> Class node + container_decl = None + for inner in suffix_children: + if inner.type == "ContainerDecl": + container_decl = inner + break + + if container_decl is not None: + kind_label = "struct" + for cd in container_decl.children: + if cd.type == "ContainerDeclType": + for kw in cd.children: + txt = kw.text.decode("utf-8", errors="replace") + if txt in self._ZIG_CONTAINER_KINDS: + kind_label = txt + break + break + + nodes.append(NodeInfo( + kind="Class", + name=var_name, + file_path=file_path, + line_start=decl.start_point[0] + 1, + line_end=decl.end_point[0] + 1, + language=language, + parent_name=enclosing_class, + extra={"zig_kind": kind_label}, + )) + edges.append(EdgeInfo( + kind="CONTAINS", + source=( + self._qualify(enclosing_class, file_path, None) + if enclosing_class + else file_path + ), + target=self._qualify(var_name, file_path, enclosing_class), + file_path=file_path, + line=decl.start_point[0] + 1, + )) + self._extract_from_tree( + container_decl, source, language, file_path, nodes, edges, + enclosing_class=var_name, + enclosing_func=enclosing_func, + import_map=import_map, + defined_names=defined_names, + _depth=_depth + 1, + ) + return True + + # Plain ``const x = expr;`` — still scan RHS for call sites so + # call edges aren't lost when calls appear at module scope. + self._extract_zig_calls_in_subtree( + rhs_suffix, file_path, edges, + enclosing_class, enclosing_func, + ) + return True + + def _handle_zig_test_decl( + self, + child, + source: bytes, + language: str, + file_path: str, + nodes: list[NodeInfo], + edges: list[EdgeInfo], + enclosing_class: Optional[str], + enclosing_func: Optional[str], + import_map: Optional[dict[str, str]], + defined_names: Optional[set[str]], + _depth: int, + ) -> bool: + """Handle ``test "label" { ... }`` blocks.""" + label: Optional[str] = None + body_block = None + for sub in child.children: + if sub.type == "STRINGLITERALSINGLE": + raw = sub.text.decode("utf-8", errors="replace") + stripped = raw.strip().strip('"').strip("'") + if stripped: + label = stripped + elif sub.type == "Block": + body_block = sub + + line_no = child.start_point[0] + 1 + base = f"test:{label}" if label else "test" + synthetic = f"{base}@L{line_no}" + qualified = self._qualify(synthetic, file_path, enclosing_class) + + nodes.append(NodeInfo( + kind="Test", + name=synthetic, + file_path=file_path, + line_start=child.start_point[0] + 1, + line_end=child.end_point[0] + 1, + language=language, + parent_name=enclosing_class, + is_test=True, + )) + edges.append(EdgeInfo( + kind="CONTAINS", + source=( + self._qualify(enclosing_class, file_path, None) + if enclosing_class + else file_path + ), + target=qualified, + file_path=file_path, + line=line_no, + )) + + if body_block is not None: + self._extract_zig_calls_in_subtree( + body_block, file_path, edges, enclosing_class, synthetic, + ) + return True + + @staticmethod + def _zig_extract_import_target(args_node) -> Optional[str]: + """Pull the string argument out of ``@import("path")``. + + Walks FnCallArguments > ErrorUnionExpr > SuffixExpr > + STRINGLITERALSINGLE. Returns the unquoted contents or None. + """ + for arg in args_node.children: + if arg.type != "ErrorUnionExpr": + continue + for sub in arg.children: + if sub.type != "SuffixExpr": + continue + for s in sub.children: + if s.type == "STRINGLITERALSINGLE": + raw = s.text.decode("utf-8", errors="replace") + return raw.strip().strip('"').strip("'") + return None + + def _extract_zig_calls_in_subtree( + self, + root, + file_path: str, + edges: list[EdgeInfo], + enclosing_class: Optional[str], + enclosing_func: Optional[str], + ) -> None: + """Walk a subtree and emit a CALLS edge for each call site. + + A call site is a ``SuffixExpr`` or ``FieldOrFnCall`` node whose + direct children include a ``FnCallArguments``; the callee is the + IDENTIFIER (or BUILTINIDENTIFIER) immediately preceding it. The + builtin ``@import`` is skipped here because it's already modelled + as IMPORTS_FROM by _handle_zig_var_decl. + """ + src_qn = ( + self._qualify(enclosing_func, file_path, enclosing_class) + if enclosing_func + else file_path + ) + stack = [root] + while stack: + node = stack.pop() + if node.type in ("SuffixExpr", "FieldOrFnCall"): + children = node.children + for i, ch in enumerate(children): + if ch.type != "FnCallArguments" or i == 0: + continue + prev = children[i - 1] + callee: Optional[str] = None + if prev.type == "IDENTIFIER": + callee = prev.text.decode("utf-8", errors="replace") + elif prev.type == "BUILTINIDENTIFIER": + txt = prev.text.decode("utf-8", errors="replace") + if txt != "@import": + callee = txt + if callee: + edges.append(EdgeInfo( + kind="CALLS", + source=src_qn, + target=callee, + file_path=file_path, + line=node.start_point[0] + 1, + )) + break + for ch in node.children: + stack.append(ch) + # ------------------------------------------------------------------ # JS/TS: variable-assigned functions (const foo = () => {}) # ------------------------------------------------------------------ @@ -3683,6 +4080,20 @@ def _do_resolve_module( pass return None + if language == "zig": + # Zig: only relative ``@import("./foo.zig")`` paths are + # resolvable here. ``@import("std")`` and other package-style + # imports stay unresolved (the caller falls back to the raw + # module string as the edge target). + if module.endswith(".zig"): + try: + target = (caller_dir / module).resolve() + if target.is_file(): + return str(target) + except (OSError, ValueError): + pass + return None + if language == "python": rel_path = module.replace(".", "/") candidates = [rel_path + ".py", rel_path + "/__init__.py"] diff --git a/tests/fixtures/sample_zig.zig b/tests/fixtures/sample_zig.zig new file mode 100644 index 00000000..e95da2ff --- /dev/null +++ b/tests/fixtures/sample_zig.zig @@ -0,0 +1,38 @@ +const std = @import("std"); +const util = @import("./sample_zig_util.zig"); + +pub fn main() !void { + std.debug.print("hello\n", .{}); + const x = helper(2); + _ = x; + util.noop(); +} + +fn helper(x: i32) i32 { + return x + 1; +} + +pub const Point = struct { + x: i32, + y: i32, + + pub fn init(x: i32, y: i32) Point { + return .{ .x = x, .y = y }; + } + + pub fn distance(self: Point, other: Point) f32 { + _ = other; + return @intCast(helper(self.x)); + } +}; + +const Color = enum { red, green, blue }; + +pub const Shape = union(enum) { + circle: f32, + square: f32, +}; + +test "helper increments" { + try expect(helper(1) == 2); +} diff --git a/tests/fixtures/sample_zig_util.zig b/tests/fixtures/sample_zig_util.zig new file mode 100644 index 00000000..c17b90f1 --- /dev/null +++ b/tests/fixtures/sample_zig_util.zig @@ -0,0 +1 @@ +pub fn noop() void {} diff --git a/tests/test_multilang.py b/tests/test_multilang.py index 9d45f434..17b92fe2 100644 --- a/tests/test_multilang.py +++ b/tests/test_multilang.py @@ -1916,3 +1916,88 @@ def test_resolver_is_idempotent(self, tmp_path): # Second run should find nothing new — all already resolved. assert second["calls_resolved"] == 0 assert second["imports_resolved"] == 0 + + +class TestZigParsing: + def setup_method(self): + self.parser = CodeParser() + self.fixture = FIXTURES / "sample_zig.zig" + self.nodes, self.edges = self.parser.parse_file(self.fixture) + + def test_detects_language(self): + assert self.parser.detect_language(Path("main.zig")) == "zig" + + def test_finds_top_level_functions(self): + funcs = { + n.name for n in self.nodes + if n.kind == "Function" and n.parent_name is None + } + assert {"main", "helper"} <= funcs + + def test_finds_struct_methods(self): + methods = { + n.name for n in self.nodes + if n.kind == "Function" and n.parent_name == "Point" + } + assert {"init", "distance"} <= methods + + def test_finds_struct_enum_union_classes(self): + classes = { + n.name: n.extra.get("zig_kind") for n in self.nodes + if n.kind == "Class" + } + assert classes.get("Point") == "struct" + assert classes.get("Color") == "enum" + assert classes.get("Shape") == "union" + + def test_finds_imports(self): + imports = [e for e in self.edges if e.kind == "IMPORTS_FROM"] + targets = {e.target for e in imports} + # std stays unresolved (no relative .zig path); util resolves to + # the absolute fixture path. + assert "std" in targets + assert any( + t.endswith("sample_zig_util.zig") and t != "./sample_zig_util.zig" + for t in targets + ) + + def test_finds_calls(self): + calls = [e for e in self.edges if e.kind == "CALLS"] + # Bare callees (std.debug.print, expect, util.noop) keep their final + # identifier as the target; same-file helper resolves to the + # qualified name via _resolve_call_targets. + bare_targets = {e.target.split("::")[-1] for e in calls} + assert "print" in bare_targets + assert "expect" in bare_targets + assert "helper" in bare_targets + + def test_builtin_calls_emitted(self): + # @intCast inside Point.distance should produce a CALLS edge + # whose target is the builtin name (with the leading @). + targets = {e.target for e in self.edges if e.kind == "CALLS"} + assert "@intCast" in targets + + def test_at_import_is_not_a_call(self): + # @import is modelled as IMPORTS_FROM only — never as CALLS, so + # it doesn't pollute the call graph. + targets = {e.target for e in self.edges if e.kind == "CALLS"} + assert "@import" not in targets + + def test_test_block_creates_test_node(self): + tests = [n for n in self.nodes if n.kind == "Test"] + assert len(tests) == 1 + assert tests[0].name.startswith("test:helper increments@L") + assert tests[0].is_test is True + + def test_calls_inside_methods_have_qualified_source(self): + # Point.distance calls helper(...) — the source should be the + # qualified Point.distance name, not the bare file path. + sources = { + e.source.split("::")[-1] for e in self.edges + if e.kind == "CALLS" + } + assert "Point.distance" in sources + + def test_nodes_have_zig_language(self): + for node in self.nodes: + assert node.language == "zig"