Skip to content

Commit 41c2a6e

Browse files
committed
Add _groupby
1 parent bb4d387 commit 41c2a6e

File tree

2 files changed

+54
-5
lines changed

2 files changed

+54
-5
lines changed

src/SoleBase.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,14 @@ function slicedataset(
5656
return deepcopy(dataset)
5757
else
5858
dataset_slice = vec(collect(dataset_slice))
59-
@assert eltype(dataset_slice) <: Integer
60-
@assert (allow_no_instances ||
59+
if !(eltype(dataset_slice) <: Integer)
60+
error("Cannot slice dataset with slice of type $(eltype(dataset_slice))")
61+
end
62+
if !(allow_no_instances ||
6163
(!(dataset_slice isa Union{AbstractVector{<:Integer},Tuple{<:Integer}}) ||
62-
length(dataset_slice) > 0)) "Cannot apply empty slice to dataset."
64+
length(dataset_slice) > 0))
65+
error("Cannot apply empty slice to dataset.")
66+
end
6367
return instances(dataset, dataset_slice, Val(return_view); kwargs...)
6468
end
6569
end

src/utils.jl

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,15 @@ Log detailed debug info
1414
"""
1515
const LogDetail = LogLevel(-1500)
1616

17-
17+
"""
18+
Returns the human-readable size in Bytes/KBs/MBs/GBs/TBs of a Julia object.
19+
"""
1820
function humansize(X; digits = 2, minshowndigits = digits)
1921
s = Base.summarysize(X)
2022
d = repeat('0', digits-minshowndigits)
21-
if !startswith(string(round(s/1024/1024/1024, digits=digits)), "0.$(d)")
23+
if !startswith(string(round(s/1024/1024/1024/1024, digits=digits)), "0.$(d)")
24+
"$(s/1024/1024/1024/1024 |> x->round(x, digits=digits)) TBs"
25+
elseif !startswith(string(round(s/1024/1024/1024, digits=digits)), "0.$(d)")
2226
"$(s/1024/1024/1024 |> x->round(x, digits=digits)) GBs"
2327
elseif !startswith(string(round(s/1024/1024, digits=digits)), "0.$(d)")
2428
"$(s/1024/1024 |> x->round(x, digits=digits)) MBs"
@@ -68,6 +72,47 @@ function nat_sort(x, y)
6872
return length(xarr) < length(yarr)
6973
end
7074

75+
# https://discourse.julialang.org/t/groupby-function/9896
76+
77+
"""
78+
group items of list l according to the corresponding values in list v
79+
80+
julia> _groupby([31,28,31,30,31,30,31,31,30,31,30,31],
81+
[:Jan,:Feb,:Mar,:Apr,:May,:Jun,:Jul,:Aug,:Sep,:Oct,:Nov,:Dec])
82+
Dict{Int64,Array{Symbol,1}} with 3 entries:
83+
31 => Symbol[:Jan, :Mar, :May, :Jul, :Aug, :Oct, :Dec]
84+
28 => Symbol[:Feb]
85+
30 => Symbol[:Apr, :Jun, :Sep, :Nov]
86+
87+
"""
88+
function _groupby(v::AbstractVector, l::AbstractVector)
89+
@assert length(v) == length(l) "$(@show v, l)"
90+
res = Dict{eltype(v),Vector{eltype(l)}}()
91+
for (k, val) in zip(v, l)
92+
push!(get!(res, k, similar(l, 0)), val)
93+
end
94+
res
95+
end
96+
97+
"""
98+
group items of list l according to the values taken by function f on them
99+
100+
julia> _groupby(iseven,1:10)
101+
Dict{Bool,Array{Int64,1}} with 2 entries:
102+
false => [1, 3, 5, 7, 9]
103+
true => [2, 4, 6, 8, 10]
104+
105+
Note:in this version l is required to be non-empty since I do not know how to
106+
access the return type of a function
107+
"""
108+
function _groupby(f,l::AbstractVector)
109+
res = Dict(f(l[1]) => [l[1]]) # l should be nonempty
110+
for val in l[2:end]
111+
push!(get!(res, f(val), similar(l, 0)), val)
112+
end
113+
res
114+
end
115+
71116
############################################################################################
72117

73118
_typejoin(S::_S) where {_S} = S

0 commit comments

Comments
 (0)