Skip to content

Commit 0d6e7eb

Browse files
author
KristofferC
committed
- do not filter out same string from different modes
- use a consistent type for `seen` and pass in if we should deduplicate - reuse a set accross searches
1 parent 62a4e31 commit 0d6e7eb

File tree

3 files changed

+33
-15
lines changed

3 files changed

+33
-15
lines changed

stdlib/REPL/src/History/resumablefiltering.jl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -253,24 +253,25 @@ end
253253

254254

255255
"""
256-
filterchunkrev!(out, candidates, spec; idx, maxtime, maxresults, seen) -> Int
256+
filterchunkrev!(out, candidates, spec; idx, maxtime, maxresults, seen, deduplicate) -> Int
257257
258258
Incrementally filter `candidates[1:idx]` in reverse order.
259259
260260
Pushes matches onto `out` until either `maxtime` is exceeded or `maxresults`
261-
collected, then returns the new resume index. When `seen` is provided (a Set{String}),
262-
only unique entries (by content) are added to avoid showing duplicate history items.
261+
collected, then returns the new resume index. When `deduplicate` is true,
262+
only unique entries (by mode and content) are added to avoid showing duplicate history items.
263263
"""
264264
function filterchunkrev!(out::Vector{HistEntry}, candidates::DenseVector{HistEntry},
265265
spec::FilterSpec, idx::Int = length(candidates);
266266
maxtime::Float64 = Inf, maxresults::Int = length(candidates),
267-
seen::Union{Nothing,Set{String}} = nothing)
267+
seen::Set{Tuple{Symbol,String}} = Set{Tuple{Symbol,String}}(),
268+
deduplicate::Bool = false)
268269
batchsize = clamp(length(candidates) ÷ 512, 10, 1000)
269270
for batch in Iterators.partition(idx:-1:1, batchsize)
270271
time() > maxtime && break
271272
for outer idx in batch
272273
entry = candidates[idx]
273-
if !isnothing(seen) && entry.content seen
274+
if deduplicate && (entry.mode, entry.content) seen
274275
continue
275276
end
276277
if !isempty(spec.modes)
@@ -298,7 +299,7 @@ function filterchunkrev!(out::Vector{HistEntry}, candidates::DenseVector{HistEnt
298299
end
299300
end
300301
matchfail && continue
301-
!isnothing(seen) && push!(seen, entry.content)
302+
deduplicate && push!(seen, (entry.mode, entry.content))
302303
pushfirst!(out, entry)
303304
length(out) == maxresults && break
304305
end

stdlib/REPL/src/History/search.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ function run_display!((; term, pstate), events::Channel{Symbol}, hist::Vector{Hi
6262
cands_temp = HistEntry[]
6363
# Filter state
6464
filter_idx = 0
65-
filter_seen = nothing
65+
filter_seen = Set{Tuple{Symbol,String}}()
66+
isfiltering = false
6667
# Event loop
6768
while true
6869
event = @lock events if !isempty(events) take!(events) end
@@ -158,12 +159,12 @@ function run_display!((; term, pstate), events::Channel{Symbol}, hist::Vector{Hi
158159
# with no filter (empty query), show all history including duplicates.
159160
isfiltering = !isempty(filter_spec.exacts) || !isempty(filter_spec.negatives) ||
160161
!isempty(filter_spec.regexps) || !isempty(filter_spec.modes)
161-
filter_seen = isfiltering ? Set{String}() : nothing
162+
empty!(filter_seen)
162163
filter_idx = filterchunkrev!(
163164
state, cands_current;
164165
maxtime = time() + 0.01,
165166
maxresults = outsize[1],
166-
seen = filter_seen)
167+
seen = filter_seen, deduplicate = isfiltering)
167168
if filter_idx == 0
168169
cands_cachestate = addcache!(
169170
cands_cache, cands_cachestate, cands_cond => state.candidates)
@@ -195,7 +196,7 @@ function run_display!((; term, pstate), events::Channel{Symbol}, hist::Vector{Hi
195196
filter_idx = filterchunkrev!(
196197
state, cands_current, filter_idx;
197198
maxtime = time() + 0.01,
198-
seen = filter_seen)
199+
seen = filter_seen, deduplicate = isfiltering)
199200
if filter_idx == 0
200201
cands_cachestate = addcache!(
201202
cands_cache, cands_cachestate, cands_cond => state.candidates)
@@ -213,10 +214,11 @@ end
213214

214215
function filterchunkrev!(state::SelectorState, candidates::DenseVector{HistEntry}, idx::Int = length(candidates);
215216
maxtime::Float64 = Inf, maxresults::Int = length(candidates),
216-
seen::Union{Nothing,Set{String}} = nothing)
217+
seen::Set{Tuple{Symbol,String}} = Set{Tuple{Symbol,String}}(),
218+
deduplicate::Bool = false)
217219
oldlen = length(state.candidates)
218220
idx = filterchunkrev!(state.candidates, candidates, state.filter, idx;
219-
maxtime = maxtime, maxresults = maxresults, seen = seen)
221+
maxtime = maxtime, maxresults = maxresults, seen = seen, deduplicate = deduplicate)
220222
newlen = length(state.candidates)
221223
newcands = view(state.candidates, (oldlen + 1):newlen)
222224
gfound = Int[]

stdlib/REPL/test/history.jl

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ end
346346
end
347347
@testset "Uniqueness" begin
348348
empty!(results)
349-
# Create entries with duplicate content
349+
# Create entries with duplicate content in the same mode
350350
dup_entries = [
351351
HistEntry(:julia, now(UTC), "println(\"hello\")", 1),
352352
HistEntry(:julia, now(UTC), "cos(2π)", 2),
@@ -359,8 +359,8 @@ end
359359
# When filtering with seen Set, duplicates are removed
360360
cset = ConditionSet("cos")
361361
spec = FilterSpec(cset)
362-
seen = Set{String}()
363-
@test filterchunkrev!(results, dup_entries, spec; seen=seen) == 0
362+
seen = Set{Tuple{Symbol,String}}()
363+
@test filterchunkrev!(results, dup_entries, spec; seen=seen, deduplicate=true) == 0
364364
# Should only get unique entries matching the filter
365365
# Since we iterate in reverse (7->1), we keep the most recent occurrence of each unique content
366366
@test length(results) == 1
@@ -372,6 +372,21 @@ end
372372
@test filterchunkrev!(results, dup_entries, spec2) == 0
373373
@test length(results) == 7 # All entries, including duplicates
374374
@test results == dup_entries
375+
# Test that same content in different modes is NOT deduplicated
376+
empty!(results)
377+
mode_entries = [
378+
HistEntry(:julia, now(UTC), "ls", 1),
379+
HistEntry(:shell, now(UTC), "ls", 2),
380+
HistEntry(:julia, now(UTC), "ls", 3), # duplicate in :julia mode
381+
HistEntry(:shell, now(UTC), "pwd", 4),
382+
]
383+
seen = Set{Tuple{Symbol,String}}()
384+
cset3 = ConditionSet("ls")
385+
spec3 = FilterSpec(cset3)
386+
@test filterchunkrev!(results, mode_entries, spec3; seen=seen, deduplicate=true) == 0
387+
@test length(results) == 2 # "ls" from :julia and "ls" from :shell
388+
@test results[1] == mode_entries[2] # :shell ls
389+
@test results[2] == mode_entries[3] # :julia ls (most recent)
375390
end
376391
end
377392
@testset "Strictness comparison" begin

0 commit comments

Comments
 (0)