@@ -88,6 +88,11 @@ the last interval, which is closed on both ends, i.e. `[lower, upper]`.
8888If `x` accepts missing values (i.e. `eltype(x) >: Missing`) the returned array will
8989also accept them.
9090
91+ !!! note
92+ For floating point data, breaks may be rounded to `sigdigits` significant digits
93+ when generating interval labels, meaning that they may not reflect exactly the cutpoints
94+ used.
95+
9196# Keyword arguments
9297* `extend::Union{Bool, Missing}=false`: when `false`, an error is raised if some values
9398 in `x` fall outside of the breaks; when `true`, breaks are automatically added to include
@@ -312,24 +317,20 @@ in (sorted) `qs`.
312317function find_breaks (v:: AbstractVector , qs:: AbstractVector )
313318 n = length (qs)
314319 breaks = similar (v, n)
315- breaks_prev = similar (v, n)
316- n == 0 && return (breaks, breaks_prev)
320+ n == 0 && return breaks
317321
318322 i = 1
319323 q = qs[1 ]
320- @inbounds for j in eachindex (v)
321- x = v[j]
324+ @inbounds for x in v
322325 # Use isless and isequal to differentiate -0.0 from 0.0
323326 if isless (q, x) || isequal (q, x)
324327 breaks[i] = x
325- # FIXME : handle duplicated breaks
326- breaks_prev[i] = v[clamp (j- 1 , firstindex (v), lastindex (v))]
327328 i += 1
328329 i > n && break
329330 q = qs[i]
330331 end
331332 end
332- return ( breaks, breaks_prev)
333+ return breaks
333334end
334335
335336"""
@@ -346,6 +347,11 @@ but breaks are taken from actual data values instead of estimated quantiles.
346347If `x` contains `missing` values, they are automatically skipped when computing
347348quantiles.
348349
350+ !!! note
351+ For floating point data, breaks may be rounded to `sigdigits` significant digits
352+ when generating interval labels, meaning that they may not reflect exactly the cutpoints
353+ used.
354+
349355# Keyword arguments
350356* `labels::Union{AbstractVector, Function}`: a vector of strings, characters
351357 or numbers giving the names to use for the intervals; or a function
@@ -376,8 +382,7 @@ function cut(x::AbstractArray, ngroups::Integer;
376382 throw (ArgumentError (" NaN values are not allowed in input vector" ))
377383 end
378384 qs = quantile! (sorted_x, (1 : (ngroups- 1 ))/ ngroups, sorted= true )
379- breaks, breaks_prev = find_breaks (sorted_x, qs)
380- breaks = [min_x; breaks; max_x]
385+ breaks = [min_x; find_breaks (sorted_x, qs); max_x]
381386 if ! allowempty && ! allunique (@view breaks[1 : end - 1 ])
382387 throw (ArgumentError (" cannot compute $ngroups quantiles due to " *
383388 " too many duplicated values in `x`. " *
@@ -386,38 +391,6 @@ function cut(x::AbstractArray, ngroups::Integer;
386391 end
387392 if labels === nothing
388393 labels = allowempty ? numbered_formatter : default_formatter
389-
390- if eltype (breaks) <: AbstractFloat
391- while true
392- local i
393- for outer i in 2 : lastindex (breaks)
394- b1 = breaks[i- 1 ]
395- b2 = breaks[i]
396- isequal (b1, b2) && continue
397-
398- # Find minimal number of digits so that `floor` does not
399- # return a value that is lower than value immediately below break
400- # We skip the first break, which is the minimum and has no equivalent
401- # in `breaks_prev`
402- b1_rounded = round (b1, sigdigits= sigdigits)
403- b2_rounded = round (b2, sigdigits= sigdigits)
404- if i < lastindex (breaks) &&
405- (isequal (b2_rounded, breaks_prev[i- 1 ]) || isless (b2_rounded, breaks_prev[i- 1 ]))
406- sigdigits += 1
407- break
408- end
409-
410- # Find minimal number of digits so that breaks are unique
411- b1_str = Printf. format (CUT_FMT, sigdigits, b1)
412- b2_str = Printf. format (CUT_FMT, sigdigits, b2)
413- if b1_str == b2_str
414- sigdigits += 1
415- break
416- end
417- end
418- i == lastindex (breaks) && break
419- end
420- end
421394 end
422395 return cut (x, breaks; labels= labels, sigdigits= sigdigits, allowempty= allowempty)
423396end
0 commit comments