WIP: Provide mechanism for Julia syntax evolution

Keno · Keno · commit 0109cc1b4519 · 2025-11-01T22:03:39.000Z
There are several corner cases in the Julia syntax that are essentially bugs or mistakes that we'd like to possibly remove, but can't due to backwards compatibility concerns. Similarly, when adding new syntax features, there are often cases that overlap with valid (but often nonsensical) existing syntax. In the past, we've mostly done judegement calls of these being "minor changes", but as the package ecosystem grows, so does the chance of someone accidentally using these anyway and our "minor changes" have (subjectively) resulted in more breakages recently. Fortunately, all the recent work on making the parser replacable, combined with the fact that JuliaSyntax already supports parsing multiple revisions of Julia syntax provides a solution here: Just let packages declare what version of the Julia syntax they are using. That way, packages would not break if we make changes to the syntax and they can be upgraded at their own pace the next time the author of that particular package upgrades to a new julia version. The way this works is simple. Right now, the parser function is always looked up in `Core._parse`. With this PR, it is instead looked up as `rootmodule(mod)._internal_julia_parse` (slightly longer name to avoid conflicting with existing bindings of the name in downstream packages), or `Core._parse` if no such binding exists. Similar for `_lower`. At the moment, the supported way to make this election is to write `@Base.Experimental.set_syntax_version v"1.14"` (or whatever the version is that you're writing your syntax against). However, to make this truly smooth, I think this should happen automatically through a Project.toml opt-in specifying the expected syntax version. My preference would be to use #59995 if that is merged, but this is a separate feature (with similar motivations around API evolution of course) and there could be a different opt-in mechanism. I should emphasize that I'm not proposing using this for any big syntax revolutions or anything. I would just like to start cleaning up a few corners of the syntax that I think are universally agreed to be bad but that we've kept for backwards compatibility. This way, by the time we get around to making a breaking revision, our entire ecosystem will have already upgraded to the new syntax.
diff --git a/base/client.jl b/base/client.jl
@@ -173,8 +173,8 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
     nothing
 end
 
-function _parse_input_line_core(s::String, filename::String)
-    ex = Meta.parseall(s, filename=filename)
+function _parse_input_line_core(s::String, filename::String, mod::Union{Module, Nothing})
+    ex = Meta.parseall(s; filename, mod)
     if ex isa Expr && ex.head === :toplevel
         if isempty(ex.args)
             return nothing
@@ -189,18 +189,18 @@ function _parse_input_line_core(s::String, filename::String)
     return ex
 end
 
-function parse_input_line(s::String; filename::String="none", depwarn=true)
+function parse_input_line(s::String; filename::String="none", depwarn=true, mod::Union{Module, Nothing}=nothing)
     # For now, assume all parser warnings are depwarns
     ex = if depwarn
-        _parse_input_line_core(s, filename)
+        _parse_input_line_core(s, filename, mod)
     else
         with_logger(NullLogger()) do
-            _parse_input_line_core(s, filename)
+            _parse_input_line_core(s, filename, mod)
         end
     end
     return ex
 end
-parse_input_line(s::AbstractString) = parse_input_line(String(s))
+parse_input_line(s::AbstractString; kwargs...) = parse_input_line(String(s); kwargs...)
 
 # detect the reason which caused an :incomplete expression
 # from the error message
@@ -443,7 +443,7 @@ function run_fallback_repl(interactive::Bool)
     let input = stdin
         if isa(input, File) || isa(input, IOStream)
             # for files, we can slurp in the whole thing at once
-            ex = parse_input_line(read(input, String))
+            ex = parse_input_line(read(input, String); mod=Main)
             if Meta.isexpr(ex, :toplevel)
                 # if we get back a list of statements, eval them sequentially
                 # as if we had parsed them sequentially
@@ -466,7 +466,7 @@ function run_fallback_repl(interactive::Bool)
                     ex = nothing
                     while !eof(input)
                         line *= readline(input, keep=true)
-                        ex = parse_input_line(line)
+                        ex = parse_input_line(line; mod=Main)
                         if !(isa(ex, Expr) && ex.head === :incomplete)
                             break
                         end
diff --git a/base/experimental.jl b/base/experimental.jl
@@ -746,4 +746,48 @@ macro reexport(ex)
     return esc(calls)
 end
 
+struct VersionedParse
+    ver::VersionNumber
+end
+
+function (vp::VersionedParse)(code, filename::String, lineno::Int, offset::Int, options::Symbol)
+    if !isdefined(Base, :JuliaSyntax)
+        if vp.ver === VERSION
+            return Core._parse
+        end
+        error("JuliaSyntax module is required for syntax version $(vp.ver), but it is not loaded.")
+    end
+    Base.JuliaSyntax.core_parser_hook(code, filename, lineno, offset, options; syntax_version=vp.ver)
+end
+
+struct VersionedLower
+    ver::VersionNumber
+end
+
+function (vp::VersionedLower)(@nospecialize(code), mod::Module,
+                              file="none", line=0, world=typemax(Csize_t), warn=false)
+    if !isdefined(Base, :JuliaLowering)
+        if vp.ver === VERSION
+            return Core._parse
+        end
+        error("JuliaLowering module is required for syntax version $(vp.ver), but it is not loaded.")
+    end
+    Base.JuliaLowering.core_lowering_hook(code, filename, lineno, offset, options; syntax_version=vp.ver)
+end
+
+function set_syntax_version(m::Module, ver::VersionNumber)
+    if !Base.is_root_module(m)
+        error("set_syntax_version can only be called on root modules")
+    end
+    parser = VersionedParse(ver)
+    lowerer = VersionedLower(ver)
+    Core.declare_const(m, :_internal_julia_parse, parser)
+    Core.declare_const(m, :_internal_julia_lower, lowerer)
+    nothing
+end
+
+macro set_syntax_version(ver)
+    Expr(:call, set_syntax_version, __module__, esc(ver))
+end
+
 end # module
diff --git a/base/meta.jl b/base/meta.jl
@@ -304,12 +304,22 @@ end
 
 ParseError(msg::AbstractString) = ParseError(msg, nothing)
 
+# N.B.: Should match definition in src/ast.c:jl_parse
+function parser_for_module(mod::Union{Module, Nothing})
+    mod === nothing && return Core._parse
+    mod = Base.moduleroot(mod)
+    isdefined(mod, :_internal_julia_parse) ?
+        getglobal(mod, :_internal_julia_parse) :
+        Core._parse
+end
+
 function _parse_string(text::AbstractString, filename::AbstractString,
-                       lineno::Integer, index::Integer, options)
+                       lineno::Integer, index::Integer, options,
+                       _parse=parser_for_module(nothing))
     if index < 1 || index > ncodeunits(text) + 1
         throw(BoundsError(text, index))
     end
-    ex, offset::Int = Core._parse(text, filename, lineno, index-1, options)
+    ex, offset::Int = _parse(text, filename, lineno, index-1, options)
     ex, offset+1
 end
 
@@ -346,8 +356,8 @@ julia> Meta.parse("(α, β) = 3, 5", 11, greedy=false)
 ```
 """
 function parse(str::AbstractString, pos::Integer;
-               filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true)
-    ex, pos = _parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom)
+               filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true, mod = nothing)
+    ex, pos = _parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom, parser_for_module(mod))
     if raise && isexpr(ex, :error)
         err = ex.args[1]
         if err isa String
@@ -386,8 +396,8 @@ julia> Meta.parse("x = ")
 ```
 """
 function parse(str::AbstractString;
-               filename="none", raise::Bool=true, depwarn::Bool=true)
-    ex, pos = parse(str, 1; filename, greedy=true, raise, depwarn)
+               filename="none", raise::Bool=true, depwarn::Bool=true, mod = nothing)
+    ex, pos = parse(str, 1; filename, greedy=true, raise, depwarn, mod = mod)
     if isexpr(ex, :error)
         return ex
     end
@@ -398,12 +408,12 @@ function parse(str::AbstractString;
     return ex
 end
 
-function parseatom(text::AbstractString, pos::Integer; filename="none", lineno=1)
-    return _parse_string(text, String(filename), lineno, pos, :atom)
+function parseatom(text::AbstractString, pos::Integer; filename="none", lineno=1, mod = nothing)
+    return _parse_string(text, String(filename), lineno, pos, :atom, parser_for_module(mod))
 end
 
-function parseall(text::AbstractString; filename="none", lineno=1)
-    ex,_ = _parse_string(text, String(filename), lineno, 1, :all)
+function parseall(text::AbstractString; filename="none", lineno=1, mod = nothing)
+    ex,_ = _parse_string(text, String(filename), lineno, 1, :all, parser_for_module(mod))
     return ex
 end
 
diff --git a/src/ast.c b/src/ast.c
@@ -1224,15 +1224,19 @@ JL_DLLEXPORT jl_value_t *jl_fl_lower(jl_value_t *expr, jl_module_t *inmodule,
 JL_DLLEXPORT jl_value_t *jl_lower(jl_value_t *expr, jl_module_t *inmodule,
                                   const char *filename, int line, size_t world, bool_t warn)
 {
-    jl_value_t *core_lower = NULL;
-    if (jl_core_module)
-        core_lower = jl_get_global_value(jl_core_module, jl_symbol("_lower"), jl_current_task->world_age);
-    if (!core_lower || core_lower == jl_nothing) {
+    jl_value_t *julia_lower = NULL;
+    if (inmodule) {
+        jl_module_t *this_root_module = jl_module_root(inmodule);
+        julia_lower = jl_get_global(this_root_module, jl_symbol("_internal_julia_lower"));
+    }
+    if ((!julia_lower || julia_lower == jl_nothing) && jl_core_module)
+        julia_lower = jl_get_global_value(jl_core_module, jl_symbol("_lower"), jl_current_task->world_age);
+    if (!julia_lower || julia_lower == jl_nothing) {
         return jl_fl_lower(expr, inmodule, filename, line, world, warn);
     }
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 7);
-    args[0] = core_lower;
+    args[0] = julia_lower;
     args[1] = expr;
     args[2] = (jl_value_t*)inmodule;
     args[3] = jl_cstr_to_string(filename);
@@ -1288,20 +1292,24 @@ jl_code_info_t *jl_inner_ctor_body(jl_array_t *fieldkinds, jl_module_t *inmodule
 // `text` is passed as a pointer to allow raw non-String buffers to be used
 // without copying.
 jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                     size_t lineno, size_t offset, jl_value_t *options)
+                     size_t lineno, size_t offset, jl_value_t *options, jl_module_t *inmodule)
 {
-    jl_value_t *core_parse = NULL;
-    if (jl_core_module) {
-        core_parse = jl_get_global(jl_core_module, jl_symbol("_parse"));
+    jl_value_t *parser = NULL;
+    if (inmodule) {
+        inmodule = jl_module_root(inmodule);
+        parser = jl_get_global(inmodule, jl_symbol("_internal_julia_parse"));
+    }
+    if ((!parser || parser == jl_nothing) && jl_core_module) {
+        parser = jl_get_global(jl_core_module, jl_symbol("_parse"));
     }
-    if (!core_parse || core_parse == jl_nothing) {
+    if (!parser || parser == jl_nothing) {
         // In bootstrap, directly call the builtin parser.
         jl_value_t *result = jl_fl_parse(text, text_len, filename, lineno, offset, options);
         return result;
     }
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 6);
-    args[0] = core_parse;
+    args[0] = parser;
     args[1] = (jl_value_t*)jl_alloc_svec(2);
     jl_svecset(args[1], 0, jl_box_uint8pointer((uint8_t*)text));
     jl_svecset(args[1], 1, jl_box_long(text_len));
@@ -1330,7 +1338,7 @@ JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len,
 {
     jl_value_t *fname = jl_pchar_to_string(filename, filename_len);
     JL_GC_PUSH1(&fname);
-    jl_value_t *p = jl_parse(text, text_len, fname, lineno, 0, (jl_value_t*)jl_all_sym);
+    jl_value_t *p = jl_parse(text, text_len, fname, lineno, 0, (jl_value_t*)jl_all_sym, NULL);
     JL_GC_POP();
     return jl_svecref(p, 0);
 }
@@ -1343,7 +1351,7 @@ JL_DLLEXPORT jl_value_t *jl_parse_string(const char *text, size_t text_len,
     jl_value_t *fname = jl_cstr_to_string("none");
     JL_GC_PUSH1(&fname);
     jl_value_t *result = jl_parse(text, text_len, fname, 1, offset,
-                                  (jl_value_t*)(greedy ? jl_statement_sym : jl_atom_sym));
+                                  (jl_value_t*)(greedy ? jl_statement_sym : jl_atom_sym), NULL);
     JL_GC_POP();
     return result;
 }
diff --git a/src/julia_internal.h b/src/julia_internal.h
@@ -932,6 +932,7 @@ STATIC_INLINE size_t module_usings_max(jl_module_t *m) JL_NOTSAFEPOINT {
 }
 
 JL_DLLEXPORT jl_sym_t *jl_module_name(jl_module_t *m) JL_NOTSAFEPOINT;
+jl_module_t *jl_module_root(jl_module_t *m);
 void jl_add_scanned_method(jl_module_t *m, jl_method_t *meth);
 jl_value_t *jl_eval_global_var(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *e, size_t world);
 JL_DLLEXPORT jl_value_t *jl_eval_globalref(jl_globalref_t *g, size_t world);
@@ -1366,7 +1367,7 @@ jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs
 JL_DLLEXPORT int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                                  size_t lineno, size_t offset, jl_value_t *options);
+                                  size_t lineno, size_t offset, jl_value_t *options, jl_module_t *inmodule);
 jl_code_info_t *jl_inner_ctor_body(jl_array_t *fieldkinds, jl_module_t *inmodule, const char *file, int line);
 jl_code_info_t *jl_outer_ctor_body(jl_value_t *thistype, size_t nfields, size_t nsparams, jl_module_t *inmodule, const char *file, int line);
 void jl_ctor_def(jl_value_t *ty, jl_value_t *functionloc);
diff --git a/src/timing.c b/src/timing.c
@@ -10,8 +10,6 @@
 #define DISABLE_FREQUENT_EVENTS
 #endif
 
-jl_module_t *jl_module_root(jl_module_t *m);
-
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/src/toplevel.c b/src/toplevel.c
@@ -830,7 +830,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     JL_GC_PUSH3(&ast, &result, &expression);
 
     ast = jl_svecref(jl_parse(jl_string_data(text), jl_string_len(text),
-                              filename, 1, 0, (jl_value_t*)jl_all_sym), 0);
+                              filename, 1, 0, (jl_value_t*)jl_all_sym, module), 0);
     if (!jl_is_expr(ast) || ((jl_expr_t*)ast)->head != jl_toplevel_sym) {
         jl_errorf("jl_parse_all() must generate a top level expression");
     }
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
@@ -740,7 +740,7 @@ function run_frontend(repl::BasicREPL, backend::REPLBackendRef)
                     rethrow()
                 end
             end
-            ast = Base.parse_input_line(line)
+            ast = Base.parse_input_line(line; mod=Base.active_module(repl))
             (isa(ast,Expr) && ast.head === :incomplete) || break
         end
         if !isempty(line)
@@ -814,7 +814,8 @@ REPLCompletionProvider() = REPLCompletionProvider(LineEdit.Modifiers())
 mutable struct ShellCompletionProvider <: CompletionProvider end
 struct LatexCompletions <: CompletionProvider end
 
-Base.active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : mistate.active_module
+Base.active_module(mistate::MIState) = mistate.active_module
+Base.active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : Base.active_module(mistate)
 Base.active_module(::AbstractREPL) = Main
 Base.active_module(d::REPLDisplay) = Base.active_module(d.repl)
 
@@ -1117,7 +1118,7 @@ end
 LineEdit.reset_state(hist::REPLHistoryProvider) = history_reset_state(hist)
 
 function return_callback(s)
-    ast = Base.parse_input_line(takestring!(copy(LineEdit.buffer(s))), depwarn=false)
+    ast = Base.parse_input_line(takestring!(copy(LineEdit.buffer(s))); mod=Base.active_module(s), depwarn=false)
     return !(isa(ast, Expr) && ast.head === :incomplete)
 end
 
@@ -1286,7 +1287,7 @@ function setup_interface(
         repl = repl,
         complete = replc,
         # When we're done transform the entered line into a call to helpmode function
-        on_done = respond(line::String->helpmode(outstream(repl), line, repl.mistate.active_module),
+        on_done = respond(line::String->helpmode(outstream(repl), line, Base.active_module(repl)),
                           repl, julia_prompt, pass_empty=true, suppress_on_semicolon=false))
 
 
@@ -1367,7 +1368,7 @@ function setup_interface(
     help_mode.hist = hp
     dummy_pkg_mode.hist = hp
 
-    julia_prompt.on_done = respond(x->Base.parse_input_line(x,filename=repl_filename(repl,hp)), repl, julia_prompt)
+    julia_prompt.on_done = respond(x->Base.parse_input_line(x; filename=repl_filename(repl,hp), mod=Base.active_module(repl)), repl, julia_prompt)
 
     shell_prompt_len = length(SHELL_PROMPT)
     help_prompt_len = length(HELP_PROMPT)
@@ -1531,7 +1532,7 @@ function setup_interface(
                 dump_tail = false
                 nl_pos = findfirst('\n', input[oldpos:end])
                 if s.current_mode == julia_prompt
-                    ast, pos = Meta.parse(input, oldpos, raise=false, depwarn=false)
+                    ast, pos = Meta.parse(input, oldpos, raise=false, depwarn=false, mod=Base.active_module(s))
                     if (isa(ast, Expr) && (ast.head === :error || ast.head === :incomplete)) ||
                             (pos > ncodeunits(input) && !endswith(input, '\n'))
                         # remaining text is incomplete (an error, or parser ran to the end but didn't stop with a newline):
@@ -1787,7 +1788,7 @@ function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
         end
         line = readline(repl.stream, keep=true)
         if !isempty(line)
-            ast = Base.parse_input_line(line)
+            ast = Base.parse_input_line(line; mod=Base.active_module(repl))
             if have_color
                 print(repl.stream, Base.color_normal)
             end
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
@@ -993,6 +993,7 @@ end
 
 function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true, hint::Bool=false)
     # filename needs to be string so macro can be evaluated
+    # TODO: JuliaSyntax version API here
     node = parseall(CursorNode, string, ignore_errors=true, keep_parens=true, filename="none")
     cur = @something seek_pos(node, pos) node
 

Original file line number	Diff line number	Diff line change
`@@ -830,7 +830,7 @@ static jl_value_t jl_parse_eval_all(jl_module_t module, jl_value_t *text,`
`830`	`830`	`JL_GC_PUSH3(&ast, &result, &expression);`
`831`	`831`
`832`	`832`	`ast = jl_svecref(jl_parse(jl_string_data(text), jl_string_len(text),`
`833`		`- filename, 1, 0, (jl_value_t*)jl_all_sym), 0);`
	`833`	`+ filename, 1, 0, (jl_value_t*)jl_all_sym, module), 0);`
`834`	`834`	`if (!jl_is_expr(ast) \|\| ((jl_expr_t*)ast)->head != jl_toplevel_sym) {`
`835`	`835`	`jl_errorf("jl_parse_all() must generate a top level expression");`
`836`	`836`	`}`