diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ef97350..5a0d91c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,59 +70,14 @@ jobs: "OpenModelica"; spec=Pkg.RegistrySpec(url="https://github.com/JKRT/OpenModelicaRegistry.git"), ) - - name: Bootstrap inherited WendaoArrow direct sources - shell: julia --project=. {0} + - name: Resolve and build package + env: + JULIA_PKG_PRECOMPILE_AUTO: "0" run: | - ENV["JULIA_PKG_PRECOMPILE_AUTO"] = "0" - - using Downloads - using Pkg - using TOML - - function raw_project_url(url::String, rev::String) - repo_url = replace(url, r"\.git$" => "") - raw_base = replace(repo_url, "https://github.com/" => "https://raw.githubusercontent.com/") - return string(raw_base, "/", rev, "/Project.toml") - end - - function package_spec_from_source(name::String, source::Dict{String,Any}) - if haskey(source, "subdir") - return PackageSpec( - name = name, - url = source["url"], - rev = source["rev"], - subdir = source["subdir"], - ) - end - return PackageSpec(name = name, url = source["url"], rev = source["rev"]) - end - - project = TOML.parsefile(Base.active_project()) - wendaoarrow_spec = project["sources"]["WendaoArrow"] - wendaoarrow_project = TOML.parsefile( - Downloads.download( - raw_project_url(wendaoarrow_spec["url"], wendaoarrow_spec["rev"]), - ), - ) - - inherited_source_specs = PackageSpec[] - for dependency_name in sort!(collect(keys(get(wendaoarrow_project, "deps", Dict())))) - sources = get(wendaoarrow_project, "sources", Dict()) - haskey(sources, dependency_name) || continue - push!( - inherited_source_specs, - package_spec_from_source(dependency_name, sources[dependency_name]), - ) - end - - isempty(inherited_source_specs) || Pkg.add(inherited_source_specs) - - Pkg.resolve() - - uses: julia-actions/julia-buildpkg@v1.6 - with: - project: . - - uses: julia-actions/julia-runtest@v1 + julia --project=. -e 'using Pkg; Pkg.resolve(); Pkg.instantiate(); Pkg.build()' + - name: Run package tests env: JULIA_NUM_THREADS: ${{ matrix.nthreads }} - with: - project: . + JULIA_PKG_PRECOMPILE_AUTO: "0" + run: | + julia --project=. -e 'using Pkg; Pkg.test("WendaoCodeParser"; coverage=false)' diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 7b6f690..155665b 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -62,59 +62,14 @@ jobs: "OpenModelica"; spec=Pkg.RegistrySpec(url="https://github.com/JKRT/OpenModelicaRegistry.git"), ) - - name: Bootstrap inherited WendaoArrow direct sources - shell: julia --project=. {0} + - name: Resolve and build package + env: + JULIA_PKG_PRECOMPILE_AUTO: "0" run: | - ENV["JULIA_PKG_PRECOMPILE_AUTO"] = "0" - - using Downloads - using Pkg - using TOML - - function raw_project_url(url::String, rev::String) - repo_url = replace(url, r"\.git$" => "") - raw_base = replace(repo_url, "https://github.com/" => "https://raw.githubusercontent.com/") - return string(raw_base, "/", rev, "/Project.toml") - end - - function package_spec_from_source(name::String, source::Dict{String,Any}) - if haskey(source, "subdir") - return PackageSpec( - name = name, - url = source["url"], - rev = source["rev"], - subdir = source["subdir"], - ) - end - return PackageSpec(name = name, url = source["url"], rev = source["rev"]) - end - - project = TOML.parsefile(Base.active_project()) - wendaoarrow_spec = project["sources"]["WendaoArrow"] - wendaoarrow_project = TOML.parsefile( - Downloads.download( - raw_project_url(wendaoarrow_spec["url"], wendaoarrow_spec["rev"]), - ), - ) - - inherited_source_specs = PackageSpec[] - for dependency_name in sort!(collect(keys(get(wendaoarrow_project, "deps", Dict())))) - sources = get(wendaoarrow_project, "sources", Dict()) - haskey(sources, dependency_name) || continue - push!( - inherited_source_specs, - package_spec_from_source(dependency_name, sources[dependency_name]), - ) - end - - isempty(inherited_source_specs) || Pkg.add(inherited_source_specs) - - Pkg.resolve() - - uses: julia-actions/julia-buildpkg@v1.6 - with: - project: . - - uses: julia-actions/julia-runtest@v1 + julia --project=. -e 'using Pkg; Pkg.resolve(); Pkg.instantiate(); Pkg.build()' + - name: Run package tests env: JULIA_NUM_THREADS: ${{ matrix.nthreads }} - with: - project: . + JULIA_PKG_PRECOMPILE_AUTO: "0" + run: | + julia --project=. -e 'using Pkg; Pkg.test("WendaoCodeParser"; coverage=false)' diff --git a/Project.toml b/Project.toml index 24407d4..393eb86 100644 --- a/Project.toml +++ b/Project.toml @@ -5,29 +5,38 @@ authors = ["CyberXiuXian Workshop"] [deps] Absyn = "ce2f92e2-a952-11e9-0543-8b443f216f1d" +Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" +ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" ImmutableList = "4a558cac-c1ed-11e9-20da-3584bcd8709a" JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MetaModelica = "9d7f2a79-07b5-5542-8b19-c0100dda6b06" OMParser = "11f87224-cae7-4e99-a924-e50d12f62c59" +PureHTTP2 = "7d1e1b98-28e7-4969-8df9-5a308937986a" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" WendaoArrow = "561c8d8d-4bcf-4807-873b-a6b7d1e55843" [sources] Absyn = {rev = "master", url = "https://github.com/OpenModelica/Absyn.jl"} +Arrow = {rev = "91944a6d968bbe5acd2f864346269ef0c350bd2a", url = "https://github.com/JuliaCN/arrow-julia.git"} +ArrowTypes = {rev = "91944a6d968bbe5acd2f864346269ef0c350bd2a", subdir = "src/ArrowTypes", url = "https://github.com/JuliaCN/arrow-julia.git"} ImmutableList = {rev = "master", url = "https://github.com/OpenModelica/ImmutableList.jl"} JuliaSyntax = {rev = "main", url = "https://github.com/JuliaLang/JuliaSyntax.jl"} MetaModelica = {rev = "master", url = "https://github.com/OpenModelica/MetaModelica.jl"} OMParser = {rev = "d59051069e43fb2624aa13fe8935532ca15aecec", url = "https://github.com/tao3k/OMParser.jl"} -WendaoArrow = {rev = "3ca26f93e7a0dd7342ce011114b72f74242e74de", url = "https://github.com/tao3k/WendaoArrow.jl.git"} +PureHTTP2 = {rev = "03d8853e9556a4355d7b650853cda62b1906d88e", url = "https://github.com/s-celles/PureHTTP2.jl"} +WendaoArrow = {rev = "3325a646785e022a3286d08f28b19dafb4e7c8dd", url = "https://github.com/tao3k/WendaoArrow.jl.git"} [compat] Absyn = "1.3" +Arrow = "2.8.1" +ArrowTypes = "2.3.0" ImmutableList = "0.1" JuliaSyntax = "2" MetaModelica = "0.0.5" OMParser = "0.0.3" +PureHTTP2 = "0.5.0" Tables = "1" WendaoArrow = "0.1" julia = "1.12" diff --git a/README.md b/README.md index f5718ff..2a9034d 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,11 @@ Current backend status: consumed upstream 5. The current workspace lock pins `WendaoArrow.jl` to `https://github.com/tao3k/WendaoArrow.jl.git` at - `334615136a8b68f18eedc614e0cc5ad33494ecc8` instead of a local sibling path, - so package resolution and GitHub Actions use the same Arrow transport - revision + `3325a646785e022a3286d08f28b19dafb4e7c8dd` +6. The package also pins the inherited `Arrow.jl`, `ArrowTypes`, and + `PureHTTP2.jl` transport sources directly in `Project.toml`, so clean + package resolution and GitHub Actions do not rely on a workflow-local + inherited-source bootstrap Native bridge note: @@ -304,5 +306,7 @@ GitHub Actions note: 1. package-local CI now runs `Pkg.build()` plus `Pkg.test()` on `ubuntu-latest` and `macos-latest` for Julia `1.12` and `pre` 2. a separate nightly workflow runs weekly on `ubuntu-latest` -3. both workflows bootstrap `General` plus `OpenModelicaRegistry` before build - and test, so remote runners do not depend on preinstalled registries +3. both workflows bootstrap `General` plus `OpenModelicaRegistry` before + running `Pkg.resolve()`, `Pkg.instantiate()`, `Pkg.build()`, and package + tests, so remote runners resolve the same source-locked transport stack as + local runs diff --git a/src/contracts/rows/response.jl b/src/contracts/rows/response.jl index 4f039aa..d2c9904 100644 --- a/src/contracts/rows/response.jl +++ b/src/contracts/rows/response.jl @@ -1,3 +1,5 @@ +const PARSER_RESPONSE_PARTITION_ROW_LIMIT = 512 + function parser_response_arrow_table( responses::AbstractVector{ParserResponse}; metadata = nothing, @@ -24,8 +26,73 @@ function parser_response_arrow_table( merged_metadata["x-wendao-code-parser-contract-shape"] = _is_ast_query_route(route_name) ? "ast_match_rows" : "summary_item_rows" return WendaoArrow.schema_table( - Tables.rowtable(rows); + _parser_response_row_source( + rows; + schema_version = WENDAOCODEPARSER_SCHEMA_VERSION, + metadata = merged_metadata, + ); schema_version = WENDAOCODEPARSER_SCHEMA_VERSION, metadata = merged_metadata, ) end + +function _parser_response_row_source( + rows::AbstractVector{<:NamedTuple}; + schema_version::AbstractString, + metadata, +) + length(rows) <= PARSER_RESPONSE_PARTITION_ROW_LIMIT && return Tables.rowtable(rows) + partition_schema = _parser_response_partition_schema(rows) + partitions = [ + WendaoArrow.schema_table( + _parser_response_partition_table(collect(chunk), partition_schema); + schema_version = schema_version, + metadata = metadata, + ) for chunk in Iterators.partition(rows, PARSER_RESPONSE_PARTITION_ROW_LIMIT) + ] + return Tables.partitioner(partitions) +end + +function _parser_response_partition_schema(rows::AbstractVector{<:NamedTuple}) + names = fieldnames(typeof(first(rows))) + types = map(name -> _parser_response_partition_column_type(rows, name), names) + return NamedTuple{names}(Tuple(types)) +end + +function _parser_response_partition_table( + rows::AbstractVector{<:NamedTuple}, + partition_schema::NamedTuple, +) + names = keys(partition_schema) + columns = map( + name -> + _parser_response_partition_column(rows, name, getproperty(partition_schema, name)), + names, + ) + return NamedTuple{names}(Tuple(columns)) +end + +function _parser_response_partition_column_type( + rows::AbstractVector{<:NamedTuple}, + name::Symbol, +) + column_type = Union{} + for row in rows + column_type = Base.promote_typejoin(column_type, typeof(getproperty(row, name))) + end + return column_type +end + +function _parser_response_partition_column( + rows::AbstractVector{<:NamedTuple}, + name::Symbol, + column_type, +) + column = Vector{column_type}(undef, length(rows)) + for (index, row) in pairs(rows) + column[index] = getproperty(row, name) + end + + column_type === Missing && return fill(missing, length(column)) + return column +end diff --git a/src/parsers/julia/dependencies.jl b/src/parsers/julia/dependencies.jl index c7d465a..b0829e7 100644 --- a/src/parsers/julia/dependencies.jl +++ b/src/parsers/julia/dependencies.jl @@ -185,15 +185,15 @@ function _push_import!( existing_index = findfirst( entry -> String(entry["module"]) == import_name && - String(get(entry, "dependency_kind", "import")) == dependency_kind && - String(get(entry, "dependency_form", "path")) == dependency_form && - String(get(entry, "dependency_parent", "")) == - String(something(dependency_parent, "")) && - String(get(entry, "dependency_member", "")) == - String(something(dependency_member, "")) && - String(get(entry, "dependency_alias", "")) == - String(something(dependency_alias, "")) && - String(get(entry, "owner_path", get(entry, "module_path", ""))) == scope_key, + String(get(entry, "dependency_kind", "import")) == dependency_kind && + String(get(entry, "dependency_form", "path")) == dependency_form && + String(get(entry, "dependency_parent", "")) == + String(something(dependency_parent, "")) && + String(get(entry, "dependency_member", "")) == + String(something(dependency_member, "")) && + String(get(entry, "dependency_alias", "")) == + String(something(dependency_alias, "")) && + String(get(entry, "owner_path", get(entry, "module_path", ""))) == scope_key, state.imports, ) if !isnothing(existing_index) diff --git a/src/parsers/julia/syntax.jl b/src/parsers/julia/syntax.jl index e891885..a708103 100644 --- a/src/parsers/julia/syntax.jl +++ b/src/parsers/julia/syntax.jl @@ -3,7 +3,7 @@ function _julia_kind_name(node) end function _julia_child_nodes(node) - return JuliaSyntax.haschildren(node) ? JuliaSyntax.children(node) : () + return JuliaSyntax.is_leaf(node) ? () : JuliaSyntax.children(node) end function _julia_first_child_of_kind(node, expected_kind::AbstractString) @@ -41,7 +41,8 @@ end function _julia_function_signature(node, source::String) signature = _julia_node_signature(node, source) - startswith(signature, "function ") && return strip(signature[length("function ")+1:end]) + startswith(signature, "function ") && + return strip(signature[(length("function ")+1):end]) return signature end diff --git a/test/cases/flight_julia_parameter_owner_signatures.jl b/test/cases/flight_julia_parameter_owner_signatures.jl index 74a9f88..4665543 100644 --- a/test/cases/flight_julia_parameter_owner_signatures.jl +++ b/test/cases/flight_julia_parameter_owner_signatures.jl @@ -27,7 +27,7 @@ x_rows = findall( index -> summary_columns.item_group[index] == "parameter" && - summary_columns.item_name[index] == "x", + summary_columns.item_name[index] == "x", eachindex(summary_columns.item_group), ) @test length(x_rows) == 2 diff --git a/test/cases/flight_native_columns.jl b/test/cases/flight_native_columns.jl index 0f9b87a..ada88eb 100644 --- a/test/cases/flight_native_columns.jl +++ b/test/cases/flight_native_columns.jl @@ -34,7 +34,7 @@ foo_indices = findall( index -> julia_summary_columns.item_group[index] == "symbol" && - julia_summary_columns.item_name[index] == "foo", + julia_summary_columns.item_name[index] == "foo", eachindex(julia_summary_columns.item_group), ) @test sort( @@ -144,7 +144,7 @@ modelica_symbol_indices = findall( index -> modelica_summary_columns.item_group[index] == "symbol" && - modelica_summary_columns.item_name[index] == "n", + modelica_summary_columns.item_name[index] == "n", eachindex(modelica_summary_columns.item_group), ) @test sort( @@ -179,19 +179,15 @@ end ) @test response.success - documentation_item = first( - item for item in response.summary_items if item["group"] == "documentation" - ) + documentation_item = + first(item for item in response.summary_items if item["group"] == "documentation") @test length(String(documentation_item["content"])) > WendaoCodeParser.PARSER_SUMMARY_HEAVY_TEXT_MAX_CHARS summary_table = parser_response_arrow_table(MODELICA_FILE_SUMMARY_ROUTE, [response]) summary_columns = Tables.columntable(summary_table) - documentation_index = findfirst( - ==("documentation"), - summary_columns.item_group, - ) + documentation_index = findfirst(==("documentation"), summary_columns.item_group) @test !isnothing(documentation_index) documentation_payload = String(summary_columns.item_content[documentation_index]) @@ -219,7 +215,7 @@ end "content" => repeat("Modelica summary payload. ", 80), "module" => "Partitioned", "path" => "Partitioned", - ) for _ in 1:80 + ) for _ = 1:80 ], ) @@ -238,6 +234,133 @@ end @test count(==("documentation"), roundtrip_columns.item_group) == 80 end +@testset "Flight summary rows partition dense parser-summary responses" begin + response = ParserResponse( + "req-flight-modelica-dense-summary", + "Dense.mo", + "modelica_file_summary", + "omparser"; + success = true, + summary_items = [ + Dict( + "group" => "documentation", + "name" => "Dense$(index)", + "kind" => "package", + "content" => "dense summary row $(index)", + "module" => "Dense", + "path" => "Dense.$(index)", + ) for index = 1:1025 + ], + ) + + summary_table = parser_response_arrow_table(MODELICA_FILE_SUMMARY_ROUTE, [response]) + partitions = collect(Tables.partitions(summary_table)) + summary_columns = Tables.columntable(summary_table) + summary_schema = Tables.schema(summary_table) + roundtrip_table = WendaoCodeParser.WendaoArrow.Arrow.Table( + WendaoCodeParser.WendaoArrow.Arrow.tobuffer(summary_table), + ) + roundtrip_columns = Tables.columntable(roundtrip_table) + roundtrip_schema = Tables.schema(roundtrip_table) + + @test !isempty(partitions) + @test !isnothing(Tables.schema(first(partitions))) + @test summary_schema.types[findfirst(==(:item_reexported), summary_schema.names)] == + Missing + @test summary_schema.types[findfirst(==(:item_top_level), summary_schema.names)] == + Missing + @test summary_schema.types[findfirst(==(:item_is_partial), summary_schema.names)] == + Missing + @test summary_schema.types[findfirst(==(:item_is_final), summary_schema.names)] == + Missing + @test summary_schema.types[findfirst( + ==(:item_is_encapsulated), + summary_schema.names, + )] == Missing + @test roundtrip_schema.types[findfirst(==(:item_reexported), roundtrip_schema.names)] == + Missing + @test roundtrip_schema.types[findfirst(==(:item_top_level), roundtrip_schema.names)] == + Missing + @test roundtrip_schema.types[findfirst(==(:item_is_partial), roundtrip_schema.names)] == + Missing + @test roundtrip_schema.types[findfirst(==(:item_is_final), roundtrip_schema.names)] == + Missing + @test roundtrip_schema.types[findfirst( + ==(:item_is_encapsulated), + roundtrip_schema.names, + )] == Missing + @test length(summary_columns.item_group) == 1025 + @test count(==("documentation"), summary_columns.item_group) == 1025 + @test all(ismissing, summary_columns.item_reexported) + @test all(ismissing, summary_columns.item_top_level) + @test all(ismissing, summary_columns.item_is_partial) + @test all(ismissing, summary_columns.item_is_final) + @test all(ismissing, summary_columns.item_is_encapsulated) + @test length(roundtrip_columns.item_group) == 1025 + @test count(==("documentation"), roundtrip_columns.item_group) == 1025 +end + +@testset "Flight summary rows preserve nullable column schema across partitions" begin + response = ParserResponse( + "req-flight-modelica-mixed-nullable-summary", + "MixedNullable.mo", + "modelica_file_summary", + "omparser"; + success = true, + summary_items = [ + Dict( + "group" => "documentation", + "name" => "Mixed$(index)", + "kind" => "package", + "content" => "mixed nullable row $(index)", + "module" => "MixedNullable", + "path" => "MixedNullable.$(index)", + ) for index = 1:512 + ], + ) + append!( + response.summary_items, + [ + Dict( + "group" => "symbol", + "name" => "Mixed$(index)", + "kind" => "constant", + "content" => "mixed nullable row $(index)", + "module" => "MixedNullable", + "path" => "MixedNullable.$(index)", + "reexported" => isodd(index), + ) for index = 513:1025 + ], + ) + + summary_table = parser_response_arrow_table(MODELICA_FILE_SUMMARY_ROUTE, [response]) + partitions = collect(Tables.partitions(summary_table)) + summary_columns = Tables.columntable(summary_table) + summary_schema = Tables.schema(summary_table) + partition_schema_types = map(partitions) do partition + partition_schema = Tables.schema(partition) + partition_schema.types[findfirst(==(:item_reexported), partition_schema.names)] + end + roundtrip_table = WendaoCodeParser.WendaoArrow.Arrow.Table( + WendaoCodeParser.WendaoArrow.Arrow.tobuffer(summary_table), + ) + roundtrip_columns = Tables.columntable(roundtrip_table) + roundtrip_schema = Tables.schema(roundtrip_table) + + @test length(partitions) > 1 + @test all(==(first(partition_schema_types)), partition_schema_types) + @test first(partition_schema_types) == + summary_schema.types[findfirst(==(:item_reexported), summary_schema.names)] + @test roundtrip_schema.types[findfirst(==(:item_reexported), roundtrip_schema.names)] == + first(partition_schema_types) + @test count(ismissing, summary_columns.item_reexported) == 512 + @test count(==(true), skipmissing(summary_columns.item_reexported)) > 0 + @test count(==(false), skipmissing(summary_columns.item_reexported)) > 0 + @test count(ismissing, roundtrip_columns.item_reexported) == 512 + @test count(==(true), skipmissing(roundtrip_columns.item_reexported)) > 0 + @test count(==(false), skipmissing(roundtrip_columns.item_reexported)) > 0 +end + @testset "Flight summary rows parse committed Modelica demo fixtures" begin fixture_cases = ( ( @@ -272,13 +395,20 @@ end summary_table = parser_response_arrow_table(MODELICA_FILE_SUMMARY_ROUTE, [response]) summary_columns = Tables.columntable(summary_table) partitions = collect(Tables.partitions(summary_table)) + partition_row_counts = + [length(Tables.columntable(partition).item_group) for partition in partitions] roundtrip_table = WendaoCodeParser.WendaoArrow.Arrow.Table( WendaoCodeParser.WendaoArrow.Arrow.tobuffer(summary_table), ) roundtrip_columns = Tables.columntable(roundtrip_table) @test length(summary_columns.item_group) == length(response.summary_items) - @test length(partitions) == 1 + @test !isempty(partitions) + @test all( + <=(WendaoCodeParser.PARSER_RESPONSE_PARTITION_ROW_LIMIT), + partition_row_counts, + ) + @test sum(partition_row_counts) == length(response.summary_items) @test length(roundtrip_columns.item_group) == length(response.summary_items) end end