Skip to content

Commit 6bc117a

Browse files
Mike Ricosclaude
andcommitted
feat(ast): Add bash CST parser with heredoc support
Complete bash 5.2 tokenizer and recursive descent parser: Tokenizer: - All bash token types (words, strings, variables, operators) - Heredoc support (<<EOF, <<-EOF, <<'EOF') - Position tracking (line, column) Parser: - Simple commands, pipelines, lists (&&, ||) - Function definitions (POSIX and bash style) - Control flow (if/elif/else, for, while, case) - Groups { }, subshells ( ), conditionals [[ ]], arithmetic (( )) - Heredocs as redirections CLI: - bash_cst.sh tokenize FILE - tokenize to lines - bash_cst.sh parse FILE - parse to JSON CST - bash_cst.sh functions FILE - extract function names/lines Tests: 233 passing (tokenizer + parser + heredoc) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent eab496a commit 6bc117a

11 files changed

+3143
-0
lines changed

tetra/bash/ast/bash_cst.sh

Lines changed: 1875 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#!/usr/bin/env bash
2+
# test_parser_basic.sh - Basic parser tests
3+
4+
#==============================================================================
5+
# SIMPLE COMMAND PARSING
6+
#==============================================================================
7+
8+
CURRENT_TEST="parse_simple_command"
9+
input="echo hello"
10+
_parse_load_tokens "$input"
11+
_parse_script
12+
type=$(echo "$_PARSE_RESULT" | jq -r '.type')
13+
assert_equals "script" "$type" "Root node type"
14+
15+
CURRENT_TEST="parse_simple_command_count"
16+
cmd_count=$(echo "$_PARSE_RESULT" | jq '.body.commands | length')
17+
assert_equals "1" "$cmd_count" "Command count"
18+
19+
CURRENT_TEST="parse_simple_command_type"
20+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
21+
assert_equals "simple_command" "$cmd_type" "Command type"
22+
23+
CURRENT_TEST="parse_simple_command_words"
24+
word0=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].words[0].value')
25+
word1=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].words[1].value')
26+
assert_equals "echo" "$word0" "First word"
27+
assert_equals "hello" "$word1" "Second word"
28+
29+
#==============================================================================
30+
# MULTIPLE COMMANDS
31+
#==============================================================================
32+
33+
CURRENT_TEST="parse_multiple_commands"
34+
input=$'echo one\necho two'
35+
_parse_load_tokens "$input"
36+
_parse_script
37+
cmd_count=$(echo "$_PARSE_RESULT" | jq '.body.commands | length')
38+
assert_equals "2" "$cmd_count" "Two commands"
39+
40+
CURRENT_TEST="parse_multiple_commands_values"
41+
cmd1=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].words[1].value')
42+
cmd2=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[1].words[1].value')
43+
assert_equals "one" "$cmd1" "First command arg"
44+
assert_equals "two" "$cmd2" "Second command arg"
45+
46+
CURRENT_TEST="parse_semicolon_separated"
47+
input="echo a; echo b; echo c"
48+
_parse_load_tokens "$input"
49+
_parse_script
50+
cmd_count=$(echo "$_PARSE_RESULT" | jq '.body.commands | length')
51+
assert_equals "3" "$cmd_count" "Three semicolon-separated commands"
52+
53+
#==============================================================================
54+
# STRINGS AND VARIABLES
55+
#==============================================================================
56+
57+
CURRENT_TEST="parse_with_string"
58+
input="echo 'hello world'"
59+
_parse_load_tokens "$input"
60+
_parse_script
61+
str_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].words[1].type')
62+
assert_equals "string" "$str_type" "String type"
63+
64+
CURRENT_TEST="parse_with_variable"
65+
input='echo $foo'
66+
_parse_load_tokens "$input"
67+
_parse_script
68+
var_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].words[1].type')
69+
assert_equals "variable" "$var_type" "Variable type"
70+
71+
CURRENT_TEST="parse_with_braced_variable"
72+
input='echo ${foo:-default}'
73+
_parse_load_tokens "$input"
74+
_parse_script
75+
var_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].words[1].type')
76+
assert_equals "variable" "$var_type" "Braced variable type"
77+
78+
#==============================================================================
79+
# POSITION TRACKING
80+
#==============================================================================
81+
82+
CURRENT_TEST="parse_position_line"
83+
input="echo hello"
84+
_parse_load_tokens "$input"
85+
_parse_script
86+
line=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].pos.line')
87+
assert_equals "1" "$line" "Command line"
88+
89+
CURRENT_TEST="parse_position_col"
90+
col=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].pos.col')
91+
assert_equals "1" "$col" "Command column"
92+
93+
CURRENT_TEST="parse_position_word_col"
94+
word_col=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].words[1].pos.col')
95+
assert_equals "6" "$word_col" "Second word column"
96+
97+
#==============================================================================
98+
# EDGE CASES
99+
#==============================================================================
100+
101+
CURRENT_TEST="parse_empty_input"
102+
input=""
103+
_parse_load_tokens "$input"
104+
_parse_script
105+
type=$(echo "$_PARSE_RESULT" | jq -r '.type')
106+
assert_equals "script" "$type" "Empty input produces script"
107+
108+
CURRENT_TEST="parse_empty_commands"
109+
cmd_count=$(echo "$_PARSE_RESULT" | jq '.body.commands | length')
110+
assert_equals "0" "$cmd_count" "No commands in empty input"
111+
112+
CURRENT_TEST="parse_comment_only"
113+
input="# just a comment"
114+
_parse_load_tokens "$input"
115+
_parse_script
116+
cmd_count=$(echo "$_PARSE_RESULT" | jq '.body.commands | length')
117+
assert_equals "0" "$cmd_count" "Comment-only produces no commands"
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
#!/usr/bin/env bash
2+
# test_parser_control.sh - Control flow parser tests
3+
4+
#==============================================================================
5+
# IF STATEMENTS
6+
#==============================================================================
7+
8+
CURRENT_TEST="parse_if_simple"
9+
input='if true; then echo yes; fi'
10+
_parse_load_tokens "$input"
11+
_parse_script
12+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
13+
assert_equals "if_clause" "$cmd_type" "If clause type"
14+
15+
CURRENT_TEST="parse_if_condition"
16+
input='if test -f foo; then echo exists; fi'
17+
_parse_load_tokens "$input"
18+
_parse_script
19+
cond_cmd=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].condition.commands[0].words[0].value')
20+
assert_equals "test" "$cond_cmd" "If condition command"
21+
22+
CURRENT_TEST="parse_if_then_body"
23+
input='if true; then echo one; echo two; fi'
24+
_parse_load_tokens "$input"
25+
_parse_script
26+
then_count=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].then_body.commands | length')
27+
assert_equals "2" "$then_count" "Two commands in then body"
28+
29+
CURRENT_TEST="parse_if_else"
30+
input='if false; then echo yes; else echo no; fi'
31+
_parse_load_tokens "$input"
32+
_parse_script
33+
else_body=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].else_body.commands[0].words[1].value')
34+
assert_equals "no" "$else_body" "Else body content"
35+
36+
CURRENT_TEST="parse_if_elif"
37+
input='if test1; then echo 1; elif test2; then echo 2; fi'
38+
_parse_load_tokens "$input"
39+
_parse_script
40+
elif_count=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].elif_clauses | length')
41+
assert_equals "1" "$elif_count" "One elif clause"
42+
43+
#==============================================================================
44+
# FOR LOOPS
45+
#==============================================================================
46+
47+
CURRENT_TEST="parse_for_simple"
48+
input='for i in a b c; do echo $i; done'
49+
_parse_load_tokens "$input"
50+
_parse_script
51+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
52+
assert_equals "for_clause" "$cmd_type" "For clause type"
53+
54+
CURRENT_TEST="parse_for_variable"
55+
var=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].variable')
56+
assert_equals "i" "$var" "For loop variable"
57+
58+
CURRENT_TEST="parse_for_items"
59+
input='for x in one two three; do echo $x; done'
60+
_parse_load_tokens "$input"
61+
_parse_script
62+
item_count=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].items | length')
63+
assert_equals "3" "$item_count" "Three items in for loop"
64+
65+
CURRENT_TEST="parse_for_first_item"
66+
item1=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].items[0].value')
67+
assert_equals "one" "$item1" "First item"
68+
69+
CURRENT_TEST="parse_for_body"
70+
input='for i in 1 2; do echo start; echo $i; echo end; done'
71+
_parse_load_tokens "$input"
72+
_parse_script
73+
body_count=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].body.commands | length')
74+
assert_equals "3" "$body_count" "Three commands in for body"
75+
76+
#==============================================================================
77+
# WHILE/UNTIL LOOPS
78+
#==============================================================================
79+
80+
CURRENT_TEST="parse_while_simple"
81+
input='while true; do echo loop; done'
82+
_parse_load_tokens "$input"
83+
_parse_script
84+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
85+
assert_equals "while_clause" "$cmd_type" "While clause type"
86+
87+
CURRENT_TEST="parse_while_condition"
88+
input='while test -f lock; do sleep 1; done'
89+
_parse_load_tokens "$input"
90+
_parse_script
91+
cond_cmd=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].condition.commands[0].words[0].value')
92+
assert_equals "test" "$cond_cmd" "While condition command"
93+
94+
CURRENT_TEST="parse_until_simple"
95+
input='until false; do echo waiting; done'
96+
_parse_load_tokens "$input"
97+
_parse_script
98+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
99+
assert_equals "until_clause" "$cmd_type" "Until clause type"
100+
101+
#==============================================================================
102+
# CASE STATEMENTS
103+
#==============================================================================
104+
105+
CURRENT_TEST="parse_case_simple"
106+
input='case $x in foo) echo foo;; esac'
107+
_parse_load_tokens "$input"
108+
_parse_script
109+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
110+
assert_equals "case_clause" "$cmd_type" "Case clause type"
111+
112+
#==============================================================================
113+
# GROUPS AND SUBSHELLS
114+
#==============================================================================
115+
116+
CURRENT_TEST="parse_group"
117+
input='{ echo one; echo two; }'
118+
_parse_load_tokens "$input"
119+
_parse_script
120+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
121+
assert_equals "group" "$cmd_type" "Group type"
122+
123+
CURRENT_TEST="parse_group_body"
124+
body_count=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].body.commands | length')
125+
assert_equals "2" "$body_count" "Two commands in group"
126+
127+
CURRENT_TEST="parse_subshell"
128+
input='(echo one; echo two)'
129+
_parse_load_tokens "$input"
130+
_parse_script
131+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
132+
assert_equals "subshell" "$cmd_type" "Subshell type"
133+
134+
CURRENT_TEST="parse_subshell_body"
135+
body_count=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].body.commands | length')
136+
assert_equals "2" "$body_count" "Two commands in subshell"
137+
138+
#==============================================================================
139+
# NESTED CONTROL FLOW
140+
#==============================================================================
141+
142+
CURRENT_TEST="parse_nested_if_for"
143+
input='if true; then for i in 1 2; do echo $i; done; fi'
144+
_parse_load_tokens "$input"
145+
_parse_script
146+
outer_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
147+
assert_equals "if_clause" "$outer_type" "Outer if"
148+
149+
CURRENT_TEST="parse_nested_inner_for"
150+
inner_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].then_body.commands[0].type')
151+
assert_equals "for_clause" "$inner_type" "Inner for loop"
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#!/usr/bin/env bash
2+
# test_parser_functions.sh - Function definition parser tests
3+
4+
#==============================================================================
5+
# POSIX STYLE FUNCTIONS
6+
#==============================================================================
7+
8+
CURRENT_TEST="parse_posix_function"
9+
input='foo() { echo hello; }'
10+
_parse_load_tokens "$input"
11+
_parse_script
12+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
13+
assert_equals "function_def" "$cmd_type" "POSIX function type"
14+
15+
CURRENT_TEST="parse_posix_function_name"
16+
name=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].name')
17+
assert_equals "foo" "$name" "Function name"
18+
19+
#==============================================================================
20+
# BASH STYLE FUNCTIONS
21+
#==============================================================================
22+
23+
CURRENT_TEST="parse_bash_function"
24+
input='function bar { echo world; }'
25+
_parse_load_tokens "$input"
26+
_parse_script
27+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
28+
assert_equals "function_def" "$cmd_type" "Bash function type"
29+
30+
CURRENT_TEST="parse_bash_function_name"
31+
name=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].name')
32+
assert_equals "bar" "$name" "Function name"
33+
34+
CURRENT_TEST="parse_bash_function_with_parens"
35+
input='function baz() { echo test; }'
36+
_parse_load_tokens "$input"
37+
_parse_script
38+
cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
39+
assert_equals "function_def" "$cmd_type" "Bash function with parens"
40+
41+
CURRENT_TEST="parse_bash_function_with_parens_name"
42+
name=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].name')
43+
assert_equals "baz" "$name" "Function name"
44+
45+
#==============================================================================
46+
# FUNCTION BODY
47+
#==============================================================================
48+
49+
CURRENT_TEST="parse_function_body_type"
50+
input='foo() { echo one; echo two; }'
51+
_parse_load_tokens "$input"
52+
_parse_script
53+
body_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].body.type')
54+
assert_equals "compound_list" "$body_type" "Function body type"
55+
56+
CURRENT_TEST="parse_function_body_count"
57+
body_count=$(echo "$_PARSE_RESULT" | jq '.body.commands[0].body.commands | length')
58+
assert_equals "2" "$body_count" "Two commands in function body"
59+
60+
CURRENT_TEST="parse_function_with_local"
61+
input='foo() { local x=1; echo $x; }'
62+
_parse_load_tokens "$input"
63+
_parse_script
64+
first_cmd=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].body.commands[0].words[0].value')
65+
assert_equals "local" "$first_cmd" "Local declaration"
66+
67+
CURRENT_TEST="parse_function_with_pipeline"
68+
input='foo() { cat file | grep pattern; }'
69+
_parse_load_tokens "$input"
70+
_parse_script
71+
body_cmd_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].body.commands[0].type')
72+
assert_equals "pipeline" "$body_cmd_type" "Pipeline in function body"
73+
74+
#==============================================================================
75+
# MULTIPLE FUNCTIONS
76+
#==============================================================================
77+
78+
CURRENT_TEST="parse_multiple_functions"
79+
input=$'foo() { echo foo; }\nbar() { echo bar; }'
80+
_parse_load_tokens "$input"
81+
_parse_script
82+
cmd_count=$(echo "$_PARSE_RESULT" | jq '.body.commands | length')
83+
assert_equals "2" "$cmd_count" "Two functions"
84+
85+
CURRENT_TEST="parse_multiple_functions_names"
86+
name1=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].name')
87+
name2=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[1].name')
88+
assert_equals "foo" "$name1" "First function name"
89+
assert_equals "bar" "$name2" "Second function name"
90+
91+
#==============================================================================
92+
# SPECIAL FUNCTION NAMES
93+
#==============================================================================
94+
95+
CURRENT_TEST="parse_function_underscore_name"
96+
input='_private_func() { echo private; }'
97+
_parse_load_tokens "$input"
98+
_parse_script
99+
name=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].name')
100+
assert_equals "_private_func" "$name" "Underscore function name"
101+
102+
CURRENT_TEST="parse_function_with_numbers"
103+
input='func123() { echo numbered; }'
104+
_parse_load_tokens "$input"
105+
_parse_script
106+
name=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].name')
107+
assert_equals "func123" "$name" "Function name with numbers"
108+
109+
#==============================================================================
110+
# NESTED FUNCTIONS
111+
#==============================================================================
112+
113+
CURRENT_TEST="parse_nested_function_outer"
114+
input='outer() { inner() { echo nested; }; inner; }'
115+
_parse_load_tokens "$input"
116+
_parse_script
117+
outer_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].type')
118+
assert_equals "function_def" "$outer_type" "Outer function"
119+
120+
CURRENT_TEST="parse_nested_function_inner"
121+
inner_type=$(echo "$_PARSE_RESULT" | jq -r '.body.commands[0].body.commands[0].type')
122+
assert_equals "function_def" "$inner_type" "Inner function"

0 commit comments

Comments
 (0)