Skip to content

Commit faa01eb

Browse files
authored
Merge pull request #66 from GnomedDev/extract-if
Add ThinVec::extract_if
2 parents 3b4d5c8 + 971a22e commit faa01eb

File tree

2 files changed

+212
-7
lines changed

2 files changed

+212
-7
lines changed

.github/workflows/rust.yml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,13 @@ jobs:
2020
rustup toolchain install nightly --component miri
2121
rustup override set nightly
2222
cargo miri setup
23-
- name: Test (default) with Miri
24-
run: MIRIFLAGS=-Zmiri-strict-provenance cargo miri test
23+
# We do not use Stacked Borrows anymore, since ExtractIf (which is lifted from std) does not pass SB,
24+
# and if std can do it in non-magical data structure code, we can do it too.
2525
- name: Test (default) with Miri + Tree Borrows
2626
run: MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-tree-borrows" cargo miri test
2727
# AutoThinVec needs tree borrows.
2828
- name: Test (gecko-ffi) with Miri
2929
run: MIRIFLAGS="-Zmiri-strict-provenance -Zmiri-tree-borrows" cargo miri test --features=gecko-ffi
30-
- name: Test (unstable features) with Miri
31-
run: MIRIFLAGS=-Zmiri-strict-provenance cargo miri test --features=unstable
3230

3331
build:
3432
runs-on: ubuntu-latest
@@ -40,14 +38,14 @@ jobs:
4038
run: cargo build --features=malloc_size_of --verbose
4139
- name: Run tests
4240
run: cargo test --verbose
43-
- name: Run tests
44-
run: cargo test --verbose
4541
- name: Run tests (serde)
4642
run: cargo test --features=serde --verbose
4743
- name: Run tests (gecko-ffi)
4844
run: cargo test --tests --features=gecko-ffi --verbose
4945
- name: Run tests (no_std)
5046
run: cargo test --tests --no-default-features --verbose
47+
- name: Run tests (unstable)
48+
run: cargo +nightly test --features=unstable --verbose
5149

5250
msrv:
5351
runs-on: ubuntu-latest

src/lib.rs

Lines changed: 208 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ use core::ops::Bound;
160160
use core::ops::{Deref, DerefMut, RangeBounds};
161161
use core::ptr::NonNull;
162162
use core::slice::Iter;
163-
use core::{fmt, mem, ptr, slice};
163+
use core::{fmt, mem, ops, ptr, slice};
164164

165165
use impl_details::*;
166166

@@ -1541,6 +1541,140 @@ impl<T> ThinVec<T> {
15411541
}
15421542
}
15431543

1544+
/// Creates an iterator which uses a closure to determine if an element should be removed.
1545+
///
1546+
/// If the closure returns true, then the element is removed and yielded.
1547+
/// If the closure returns false, the element will remain in the vector and will not be yielded
1548+
/// by the iterator.
1549+
///
1550+
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
1551+
/// or the iteration short-circuits, then the remaining elements will be retained.
1552+
/// Use [`ThinVec::retain`] with a negated predicate if you do not need the returned iterator.
1553+
///
1554+
/// Using this method is equivalent to the following code:
1555+
///
1556+
/// ```
1557+
/// # use thin_vec::{ThinVec, thin_vec};
1558+
/// # let some_predicate = |x: &mut i32| { *x == 2 || *x == 3 || *x == 6 };
1559+
/// # let mut vec = thin_vec![1, 2, 3, 4, 5, 6];
1560+
/// let mut i = 0;
1561+
/// while i < vec.len() {
1562+
/// if some_predicate(&mut vec[i]) {
1563+
/// let val = vec.remove(i);
1564+
/// // your code here
1565+
/// } else {
1566+
/// i += 1;
1567+
/// }
1568+
/// }
1569+
///
1570+
/// # assert_eq!(vec, thin_vec![1, 4, 5]);
1571+
/// ```
1572+
///
1573+
/// But `extract_if` is easier to use. `extract_if` is also more efficient,
1574+
/// because it can backshift the elements of the array in bulk.
1575+
///
1576+
/// Note that `extract_if` also lets you mutate every element in the filter closure,
1577+
/// regardless of whether you choose to keep or remove it.
1578+
///
1579+
/// # Examples
1580+
///
1581+
/// Splitting an array into evens and odds, reusing the original allocation:
1582+
///
1583+
/// ```
1584+
/// use thin_vec::{ThinVec, thin_vec};
1585+
///
1586+
/// let mut numbers = thin_vec![1, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15];
1587+
///
1588+
/// let evens = numbers.extract_if(.., |x| *x % 2 == 0).collect::<ThinVec<_>>();
1589+
/// let odds = numbers;
1590+
///
1591+
/// assert_eq!(evens, thin_vec![2, 4, 6, 8, 14]);
1592+
/// assert_eq!(odds, thin_vec![1, 3, 5, 9, 11, 13, 15]);
1593+
/// ```
1594+
pub fn extract_if<F, R: RangeBounds<usize>>(
1595+
&mut self,
1596+
range: R,
1597+
filter: F,
1598+
) -> ExtractIf<'_, T, F>
1599+
where
1600+
F: FnMut(&mut T) -> bool,
1601+
{
1602+
// Copy of https://github.com/rust-lang/rust/blob/ee361e8fca1c30e13e7a31cc82b64c045339d3a8/library/core/src/slice/index.rs#L37
1603+
fn slice_index_fail(start: usize, end: usize, len: usize) -> ! {
1604+
if start > len {
1605+
panic!(
1606+
"range start index {} out of range for slice of length {}",
1607+
start, len
1608+
)
1609+
}
1610+
1611+
if end > len {
1612+
panic!(
1613+
"range end index {} out of range for slice of length {}",
1614+
end, len
1615+
)
1616+
}
1617+
1618+
if start > end {
1619+
panic!("slice index starts at {} but ends at {}", start, end)
1620+
}
1621+
1622+
// Only reachable if the range was a `RangeInclusive` or a
1623+
// `RangeToInclusive`, with `end == len`.
1624+
panic!(
1625+
"range end index {} out of range for slice of length {}",
1626+
end, len
1627+
)
1628+
}
1629+
1630+
// Backport of https://github.com/rust-lang/rust/blob/ee361e8fca1c30e13e7a31cc82b64c045339d3a8/library/core/src/slice/index.rs#L855
1631+
pub fn slice_range<R>(range: R, bounds: ops::RangeTo<usize>) -> ops::Range<usize>
1632+
where
1633+
R: ops::RangeBounds<usize>,
1634+
{
1635+
let len = bounds.end;
1636+
1637+
let end = match range.end_bound() {
1638+
ops::Bound::Included(&end) if end >= len => slice_index_fail(0, end, len),
1639+
// Cannot overflow because `end < len` implies `end < usize::MAX`.
1640+
ops::Bound::Included(&end) => end + 1,
1641+
1642+
ops::Bound::Excluded(&end) if end > len => slice_index_fail(0, end, len),
1643+
ops::Bound::Excluded(&end) => end,
1644+
ops::Bound::Unbounded => len,
1645+
};
1646+
1647+
let start = match range.start_bound() {
1648+
ops::Bound::Excluded(&start) if start >= end => slice_index_fail(start, end, len),
1649+
// Cannot overflow because `start < end` implies `start < usize::MAX`.
1650+
ops::Bound::Excluded(&start) => start + 1,
1651+
1652+
ops::Bound::Included(&start) if start > end => slice_index_fail(start, end, len),
1653+
ops::Bound::Included(&start) => start,
1654+
1655+
ops::Bound::Unbounded => 0,
1656+
};
1657+
1658+
ops::Range { start, end }
1659+
}
1660+
1661+
let old_len = self.len();
1662+
let ops::Range { start, end } = slice_range(range, ..old_len);
1663+
1664+
// Guard against the vec getting leaked (leak amplification)
1665+
unsafe {
1666+
self.set_len(0);
1667+
}
1668+
ExtractIf {
1669+
vec: self,
1670+
idx: start,
1671+
del: 0,
1672+
end,
1673+
old_len,
1674+
pred: filter,
1675+
}
1676+
}
1677+
15441678
/// Resize the buffer and update its capacity, without changing the length.
15451679
/// Unsafe because it can cause length to be greater than capacity.
15461680
unsafe fn reallocate(&mut self, new_cap: usize) {
@@ -2776,6 +2910,79 @@ impl<T> Drain<'_, T> {
27762910
}
27772911
}
27782912

2913+
/// An iterator for [`ThinVec`] which uses a closure to determine if an element should be removed.
2914+
#[must_use = "iterators are lazy and do nothing unless consumed"]
2915+
pub struct ExtractIf<'a, T, F> {
2916+
vec: &'a mut ThinVec<T>,
2917+
/// The index of the item that will be inspected by the next call to `next`.
2918+
idx: usize,
2919+
/// Elements at and beyond this point will be retained. Must be equal or smaller than `old_len`.
2920+
end: usize,
2921+
/// The number of items that have been drained (removed) thus far.
2922+
del: usize,
2923+
/// The original length of `vec` prior to draining.
2924+
old_len: usize,
2925+
/// The filter test predicate.
2926+
pred: F,
2927+
}
2928+
2929+
impl<T, F> Iterator for ExtractIf<'_, T, F>
2930+
where
2931+
F: FnMut(&mut T) -> bool,
2932+
{
2933+
type Item = T;
2934+
2935+
fn next(&mut self) -> Option<T> {
2936+
unsafe {
2937+
while self.idx < self.end {
2938+
let i = self.idx;
2939+
let v = slice::from_raw_parts_mut(self.vec.as_mut_ptr(), self.old_len);
2940+
let drained = (self.pred)(&mut v[i]);
2941+
// Update the index *after* the predicate is called. If the index
2942+
// is updated prior and the predicate panics, the element at this
2943+
// index would be leaked.
2944+
self.idx += 1;
2945+
if drained {
2946+
self.del += 1;
2947+
return Some(ptr::read(&v[i]));
2948+
} else if self.del > 0 {
2949+
let del = self.del;
2950+
let src: *const T = &v[i];
2951+
let dst: *mut T = &mut v[i - del];
2952+
ptr::copy_nonoverlapping(src, dst, 1);
2953+
}
2954+
}
2955+
None
2956+
}
2957+
}
2958+
2959+
fn size_hint(&self) -> (usize, Option<usize>) {
2960+
(0, Some(self.end - self.idx))
2961+
}
2962+
}
2963+
2964+
impl<A, F> Drop for ExtractIf<'_, A, F> {
2965+
fn drop(&mut self) {
2966+
unsafe {
2967+
if self.idx < self.old_len && self.del > 0 {
2968+
// This is a pretty messed up state, and there isn't really an
2969+
// obviously right thing to do. We don't want to keep trying
2970+
// to execute `pred`, so we just backshift all the unprocessed
2971+
// elements and tell the vec that they still exist. The backshift
2972+
// is required to prevent a double-drop of the last successfully
2973+
// drained item prior to a panic in the predicate.
2974+
let ptr = self.vec.as_mut_ptr();
2975+
let src = ptr.add(self.idx);
2976+
let dst = src.sub(self.del);
2977+
let tail_len = self.old_len - self.idx;
2978+
src.copy_to(dst, tail_len);
2979+
}
2980+
2981+
self.vec.set_len(self.old_len - self.del);
2982+
}
2983+
}
2984+
}
2985+
27792986
/// Write is implemented for `ThinVec<u8>` by appending to the vector.
27802987
/// The vector will grow as needed.
27812988
/// This implementation is identical to the one for `Vec<u8>`.

0 commit comments

Comments
 (0)