From 471aa8f1076f3c4f10831c49dcbeb6eed48816ed Mon Sep 17 00:00:00 2001 From: Luke Kiernan Date: Fri, 7 Nov 2025 14:09:29 -0700 Subject: [PATCH 1/2] TypeSortedCollections: first pass at Base.iterate --- src/TypeSortedCollections.jl | 91 ++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 29 deletions(-) diff --git a/src/TypeSortedCollections.jl b/src/TypeSortedCollections.jl index fc7bc4e0f..e433844a1 100644 --- a/src/TypeSortedCollections.jl +++ b/src/TypeSortedCollections.jl @@ -92,13 +92,17 @@ end @inline Base.eltype(A::TypeSortedCollection) = Union{map(eltype, A.data)...} +# lots of lisp-y type recursion in definitions below + +# eltypes just looks at the types of the vectors stored in the TSC. eltypes(::Type{TypeSortedCollection{D, N}}) where {D, N} = eltypes(D) + function eltypes(::Type{T}) where {T <: TupleOfVectors} Base.tuple_type_cons(eltype(Base.tuple_type_head(T)), eltypes(Base.tuple_type_tail(T))) end eltypes(::Type{Tuple{}}) = Tuple{} -function vectortypes(::Type{T}) where {T <: Tuple} +function vectortypes(::Type{T}) where {T <: Tuple} # Tuple, not TupleOfVectors. needed why? Base.tuple_type_cons( Vector{Base.tuple_type_head(T)}, vectortypes(Base.tuple_type_tail(T)), @@ -141,14 +145,15 @@ Base.empty!(x::TypeSortedCollection) = foreach(empty!, x.data) @inline Base.length(x::TypeSortedCollection) = mapreduce(length, +, x.data; init = 0) indices(x::TypeSortedCollection) = x.indices -# Trick from StaticArrays: +# Trick from StaticArrays: [find the first TypeSortedCollection in a vararg list] @inline first_tsc(a1::TypeSortedCollection, as...) = a1 @inline first_tsc(a1, as...) = first_tsc(as...) @inline first_tsc_type(a1::Type{<:TypeSortedCollection}, as::Type...) = a1 @inline first_tsc_type(a1::Type, as::Type...) = first_tsc_type(as...) -# inspired by Base.ith_all +# inspired by Base.ith_all. +# example: _getindex_all(Val(i), j, tsc1, tsc2, tsc3) returns (tsc1[i][j], tsc2[i][j], tsc3[i][j]) @inline _getindex_all(::Val, j, vecindex) = () Base.@propagate_inbounds @inline _getindex_all( vali::Val{i}, @@ -158,6 +163,7 @@ Base.@propagate_inbounds @inline _getindex_all( as..., ) where {i} = (_getindex(vali, j, vecindex, a1), _getindex_all(vali, j, vecindex, as...)...) @inline _getindex(::Val, j, vecindex, a) = a # for anything that's not an AbstractVector or TypeSortedCollection, don't index (for use in broadcast!) +# concretely: in an expression like tsc .+ 5, 5 should be broadcasted, not indexed. @inline _getindex(::Val, j, vecindex, a::AbstractVector) = a[vecindex] @inline _getindex(::Val, j, vecindex, a::Ref) = a[] @inline _getindex(::Val{i}, j, vecindex, a::TypeSortedCollection) where {i} = a.data[i][j] @@ -165,6 +171,7 @@ Base.@propagate_inbounds @inline _getindex_all( @inline _setindex!(::Val{i}, j, vecindex, a::TypeSortedCollection, val) where {i} = a.data[i][j] = val +# defined so that we can check if it's okay to do a .+ b on two TSCs a, b. @inline lengths_match(a1) = true @inline lengths_match(a1::TSCOrAbstractVector, a2::TSCOrAbstractVector, as...) = length(a1) == length(a2) && lengths_match(a2, as...) @@ -313,35 +320,61 @@ end end end -#= -@generated function Base.iterate(tsc::TSCOrAbstractVector{N}) where {N} - expr = Expr(:block) - for i in 1:N - vali = Val(i) - push!( - expr.args, - quote - let inds = leading_tsc.indices[$i] - @boundscheck indices_match($vali, inds, A1, As...) || - indices_match_fail() - @inbounds for j in LinearIndices(inds) - vecindex = inds[j] - f(_getindex_all($vali, j, vecindex, A1, As...)...) - end - end - end, - ) +# TODO seems like this should really be written using axes. +# TODO are structs Val-friendly? I don't see a simple way to make do with just a single Int state. +struct TSCIterState + type_index::Int # Which type group (1 to N) + elem_index::Int # Position within that type group +end + +# TODO do I need @inline or @generated here on left side? +Base.iterate(tsc::TypeSortedCollection{D, N}) where {D, N} = _iterate_tsc(tsc, Val(1), 1) +Base.iterate( + tsc::TypeSortedCollection{D, N}, + state::TSCIterState, +) where {D, N} = _iterate_tsc(tsc, Val(state.type_index), state.elem_index) + +# TODO what about TSCOrAbstractVector? +# TODO bounds errors and @inbounds? +# PERF: for "large" N (how large?), the recursive tail might be unwieldy, very long. +# if we could assume the collections are all non-empty, this would be simpler. +@generated function _iterate_tsc( + tsc::TypeSortedCollection{D, N}, + ::Val{group}, + elem_idx::Int, +) where {D, N, group} + if group > N + return :(nothing) end + quote - Base.@_inline_meta - leading_tsc = first_tsc(A1, As...) - @boundscheck lengths_match(A1, As...) || lengths_match_fail() - $expr - nothing + vec = tsc.data[$group] + + if elem_idx <= length(vec) + element = vec[elem_idx] + return (element, TSCIterState($group, elem_idx + 1)) + end + + # Move to next group (compile-time recursion) + return _iterate_tsc(tsc, Val($(group + 1)), 1) end end -@generated function Base.iterate(tsc::TypeSortedCollection, state::Int) - return +# slow: only intended for testing purposes. +function Base.collect(tsc::TypeSortedCollection) + if isempty(tsc) + return Vector{eltype(tsc)}() + end + nonempty_data_array = 1 + while isempty(tsc.data[nonempty_data_array]) + nonempty_data_array += 1 + end + sample_element = first(tsc.data[nonempty_data_array]) + result = Vector{eltype(tsc)}(fill(sample_element, length(tsc))) + for (indices, items) in zip(tsc.indices, tsc.data) + for (i, item) in zip(indices, items) + result[i] = item + end + end + return result end -=# From 5172dcde812025da0f33dc691dc7ae33feb93589 Mon Sep 17 00:00:00 2001 From: Luke Kiernan Date: Fri, 7 Nov 2025 14:13:38 -0700 Subject: [PATCH 2/2] TypeSortedCollection tests --- test/test_TypeSortedCollections.jl | 216 +++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 test/test_TypeSortedCollections.jl diff --git a/test/test_TypeSortedCollections.jl b/test/test_TypeSortedCollections.jl new file mode 100644 index 000000000..3e4b7e0aa --- /dev/null +++ b/test/test_TypeSortedCollections.jl @@ -0,0 +1,216 @@ +@testset "IS.TypeSortedCollection Tests" begin + @testset "Constructor Tests" begin + # Test empty constructor + D = Tuple{Vector{Int}, Vector{String}} + tsc = IS.TypeSortedCollection{D, 2}() + @test isempty(tsc) + @test length(tsc) == 0 + @test IS.num_types(tsc) == 2 + + # Test constructor with data and indices + data = ([1, 2], ["a", "b"]) + indices = ([1, 3], [2, 4]) + tsc = IS.TypeSortedCollection(data, indices) + @test length(tsc) == 4 + @test !isempty(tsc) + + # Test constructor from array + A = [1, "hello", 2.5, "world", 3] + tsc = IS.TypeSortedCollection(A) + @test length(tsc) == 5 + + # Test constructor with preserve_order + tsc_ordered = IS.TypeSortedCollection(A, true) + @test length(tsc_ordered) == 5 + + # Test constructor with explicit indices + A = [1, "a", 2, "b"] + indices = ([1, 3], [2, 4]) + tsc = IS.TypeSortedCollection(A, indices) + @test collect(tsc) == A + end + + @testset "Basic Operations" begin + tsc = IS.TypeSortedCollection{Tuple{Vector{Int}, Vector{String}}, 2}() + + # Test push! + push!(tsc, 42) + @test length(tsc) == 1 + @test !isempty(tsc) + + push!(tsc, "hello") + @test length(tsc) == 2 + + # Test error on incompatible type + @test_throws ArgumentError push!(tsc, 2.5) + + # Test append! + append!(tsc, [1, 2, "world", "test"]) + @test length(tsc) == 6 + + # Test empty! and isempty + empty!(tsc) + @test isempty(tsc) + @test length(tsc) == 0 + end + + @testset "Type System Tests" begin + tsc = IS.TypeSortedCollection([1, "hello", 2.5]) + + # Test eltype + @test eltype(tsc) == Union{Int, String, Float64} + + # Test num_types + @test IS.num_types(tsc) == 3 + @test IS.num_types(typeof(tsc)) == 3 + + # Test indices function + idxs = IS.indices(tsc) + @test length(idxs) == 3 + end + + @testset "Iteration Tests" begin + A = [1, "hello", 2, "world", 3.0] + tsc = IS.TypeSortedCollection(A) + + # Test basic iteration + collected = collect(tsc) + @test length(collected) == 5 + @test Set(collected) == Set(A) # Same elements, possibly different order + + # Test iteration state + iter_result = iterate(tsc) + @test iter_result !== nothing + element, state = iter_result + @test element isa Union{Int, String, Float64} + @test state isa IS.TSCIterState + + # Test complete iteration + count = 0 + for item in tsc + println("Iterated item: $item") + count += 1 + end + @test count == 5 + end + + @testset "map! Tests" begin + A = [1, "a", 2, "b"] + B = [3, "x", 4, "y"] + indices = ([1, 3], [2, 4]) + src1 = IS.TypeSortedCollection(A, indices) + src2 = IS.TypeSortedCollection(B, indices) + dest2 = IS.TypeSortedCollection([0, "", 0, ""], indices) + map!(*, dest2, src1, src2) + + @test collect(dest2) == A .* B + end + + @testset "foreach Tests" begin + tsc = IS.TypeSortedCollection([1, 2, "a", "b"]) + results = [] + + foreach(x -> push!(results, x), tsc) + @test length(results) == 4 + @test Set(results) == Set([1, 2, "a", "b"]) + end + + @testset "mapreduce Tests" begin + tsc = IS.TypeSortedCollection([1, 2, 3, 4]) + + # Test sum + result = mapreduce(identity, +, tsc; init = 0) + @test result == 10 + + # Test with function + result = mapreduce(x -> x^2, +, tsc; init = 0) + @test result == 30 # 1 + 4 + 9 + 16 + + # Test with mixed types + tsc_mixed = IS.TypeSortedCollection([1, 2]) + result = mapreduce(x -> 1, +, tsc_mixed; init = 0) + @test result == 2 + end + + @testset "any/all Tests" begin + tsc_nums = IS.TypeSortedCollection([1, 2, 3, 4]) + + # Test any + @test any(x -> x > 3, tsc_nums) == true + @test any(x -> x > 10, tsc_nums) == false + + # Test all + @test all(x -> x > 0, tsc_nums) == true + @test all(x -> x > 2, tsc_nums) == false + + # Test with empty collection + empty_tsc = IS.TypeSortedCollection{Tuple{Vector{Int}}, 1}() + @test any(x -> true, empty_tsc) == false + @test all(x -> false, empty_tsc) == true + + # Test with mixed types + mixed_tsc = IS.TypeSortedCollection([1, "hello"]) + @test any(x -> isa(x, String), mixed_tsc) == true + @test all(x -> isa(x, String), mixed_tsc) == false + end + + @testset "Helper Function Tests" begin + tsc1 = IS.TypeSortedCollection([1, 2]) + tsc2 = IS.TypeSortedCollection([3, 4]) + vec = [5, 6] + + # Test first_tsc + @test IS.first_tsc(tsc1, tsc2) === tsc1 + @test IS.first_tsc(vec, tsc1, tsc2) === tsc1 + + # Test lengths_match + @test IS.lengths_match(tsc1, tsc2) == true + @test IS.lengths_match(tsc1, [1, 2]) == true + @test IS.lengths_match(tsc1, [1, 2, 3]) == false + end + + @testset "Error Handling Tests" begin + # Test constructor errors + @test_throws Exception IS.TypeSortedCollection{Tuple{Vector{Int}}, 2}( + ([1],), + ([1],), + ) # Wrong N + + # Test incompatible indices + #= + data = ([1, 2], ["a"]) + indices = ([1, 3], [2]) # indices don't match total length + # really? + @test_throws Exception IS.TypeSortedCollection(data, indices) + =# + + # Test duplicate indices + data = ([1], [2]) + indices = ([1], [1]) # duplicate index + @test_throws Exception IS.TypeSortedCollection(data, indices) + end + + @testset "Edge Cases" begin + # Test with single type + # collect here seems to be a problem. + single_type = IS.TypeSortedCollection([1, 2, 3]) + @test length(single_type) == 3 + @test collect(single_type) == [1, 2, 3] + + # Test with empty vectors in data + D = Tuple{Vector{Int}, Vector{String}} + empty_tsc = IS.TypeSortedCollection{D, 2}() + @test isempty(empty_tsc) + @test collect(empty_tsc) == [] + + # Test preserve_order with repeated types + A = [1, "a", 2, "b", 3] + tsc_ordered = IS.TypeSortedCollection(A, true) + collected = collect(tsc_ordered) + # Should maintain relative order within type groups + int_positions = [i for (i, x) in enumerate(collected) if isa(x, Int)] + string_positions = [i for (i, x) in enumerate(collected) if isa(x, String)] + @test length(int_positions) == 3 + @test length(string_positions) == 2 + end +end