Skip to content

Commit 49ed21a

Browse files
committed
Add map and set extract_if
1 parent dceb0f0 commit 49ed21a

File tree

7 files changed

+323
-5
lines changed

7 files changed

+323
-5
lines changed

src/map.rs

+41-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ mod tests;
1616
pub use self::core::raw_entry_v1::{self, RawEntryApiV1};
1717
pub use self::core::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
1818
pub use self::iter::{
19-
Drain, IntoIter, IntoKeys, IntoValues, Iter, IterMut, IterMut2, Keys, Splice, Values, ValuesMut,
19+
Drain, ExtractIf, IntoIter, IntoKeys, IntoValues, Iter, IterMut, IterMut2, Keys, Splice,
20+
Values, ValuesMut,
2021
};
2122
pub use self::mutable::MutableEntryKey;
2223
pub use self::mutable::MutableKeys;
@@ -36,7 +37,7 @@ use alloc::vec::Vec;
3637
#[cfg(feature = "std")]
3738
use std::collections::hash_map::RandomState;
3839

39-
use self::core::IndexMapCore;
40+
pub(crate) use self::core::{ExtractCore, IndexMapCore};
4041
use crate::util::{third, try_simplify_range};
4142
use crate::{Bucket, Entries, Equivalent, HashValue, TryReserveError};
4243

@@ -306,6 +307,44 @@ impl<K, V, S> IndexMap<K, V, S> {
306307
Drain::new(self.core.drain(range))
307308
}
308309

310+
/// Creates an iterator which uses a closure to determine if an element should be removed.
311+
///
312+
/// If the closure returns true, the element is removed from the map and yielded.
313+
/// If the closure returns false, or panics, the element remains in the map and will not be
314+
/// yielded.
315+
///
316+
/// Note that `extract_if` lets you mutate every value in the filter closure, regardless of
317+
/// whether you choose to keep or remove it.
318+
///
319+
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
320+
/// or the iteration short-circuits, then the remaining elements will be retained.
321+
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
322+
///
323+
/// [`retain`]: IndexMap::retain
324+
///
325+
/// # Examples
326+
///
327+
/// Splitting a map into even and odd keys, reusing the original map:
328+
///
329+
/// ```
330+
/// use indexmap::IndexMap;
331+
///
332+
/// let mut map: IndexMap<i32, i32> = (0..8).map(|x| (x, x)).collect();
333+
/// let extracted: IndexMap<i32, i32> = map.extract_if(|k, _v| k % 2 == 0).collect();
334+
///
335+
/// let evens = extracted.keys().copied().collect::<Vec<_>>();
336+
/// let odds = map.keys().copied().collect::<Vec<_>>();
337+
///
338+
/// assert_eq!(evens, vec![0, 2, 4, 6]);
339+
/// assert_eq!(odds, vec![1, 3, 5, 7]);
340+
/// ```
341+
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, K, V, F>
342+
where
343+
F: FnMut(&K, &mut V) -> bool,
344+
{
345+
ExtractIf::new(&mut self.core, pred)
346+
}
347+
309348
/// Splits the collection into two at the given index.
310349
///
311350
/// Returns a newly allocated map containing the elements in the range

src/map/core.rs

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//! However, we should probably not let this show in the public API or docs.
99
1010
mod entry;
11+
mod extract;
1112

1213
pub mod raw_entry_v1;
1314

@@ -25,6 +26,7 @@ type Indices = hash_table::HashTable<usize>;
2526
type Entries<K, V> = Vec<Bucket<K, V>>;
2627

2728
pub use entry::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
29+
pub(crate) use extract::ExtractCore;
2830

2931
/// Core of the map that does not depend on S
3032
#[derive(Debug)]
@@ -163,6 +165,7 @@ impl<K, V> IndexMapCore<K, V> {
163165

164166
#[inline]
165167
pub(crate) fn len(&self) -> usize {
168+
debug_assert_eq!(self.entries.len(), self.indices.len());
166169
self.indices.len()
167170
}
168171

src/map/core/extract.rs

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#![allow(unsafe_code)]
2+
3+
use super::{Bucket, IndexMapCore};
4+
5+
impl<K, V> IndexMapCore<K, V> {
6+
pub(crate) fn extract(&mut self) -> ExtractCore<'_, K, V> {
7+
// SAFETY: We must have consistent lengths to start, so that's a hard assertion.
8+
// Then the worst `set_len(0)` can do is leak items if `ExtractCore` doesn't drop.
9+
assert_eq!(self.entries.len(), self.indices.len());
10+
unsafe {
11+
self.entries.set_len(0);
12+
}
13+
ExtractCore {
14+
map: self,
15+
current: 0,
16+
new_len: 0,
17+
}
18+
}
19+
}
20+
21+
pub(crate) struct ExtractCore<'a, K, V> {
22+
map: &'a mut IndexMapCore<K, V>,
23+
current: usize,
24+
new_len: usize,
25+
}
26+
27+
impl<K, V> Drop for ExtractCore<'_, K, V> {
28+
fn drop(&mut self) {
29+
let old_len = self.map.indices.len();
30+
let mut new_len = self.new_len;
31+
debug_assert!(new_len <= self.current);
32+
debug_assert!(self.current <= old_len);
33+
debug_assert!(old_len <= self.map.entries.capacity());
34+
35+
// SAFETY: We assume `new_len` and `current` were correctly maintained by the iterator.
36+
// So `entries[new_len..current]` were extracted, but the rest before and after are valid.
37+
unsafe {
38+
if new_len == self.current {
39+
// Nothing was extracted, so any remaining items can be left in place.
40+
new_len = old_len;
41+
} else if self.current < old_len {
42+
// Need to shift the remaining items down.
43+
let tail_len = old_len - self.current;
44+
let base = self.map.entries.as_mut_ptr();
45+
let src = base.add(self.current);
46+
let dest = base.add(new_len);
47+
src.copy_to(dest, tail_len);
48+
new_len += tail_len;
49+
}
50+
self.map.entries.set_len(new_len);
51+
}
52+
53+
if new_len != old_len {
54+
// We don't keep track of *which* items were extracted, so reindex everything.
55+
self.map.rebuild_hash_table();
56+
}
57+
}
58+
}
59+
60+
impl<K, V> ExtractCore<'_, K, V> {
61+
pub(crate) fn extract_if<F>(&mut self, mut pred: F) -> Option<Bucket<K, V>>
62+
where
63+
F: FnMut(&mut Bucket<K, V>) -> bool,
64+
{
65+
let old_len = self.map.indices.len();
66+
debug_assert!(old_len <= self.map.entries.capacity());
67+
68+
let base = self.map.entries.as_mut_ptr();
69+
while self.current < old_len {
70+
// SAFETY: We're maintaining both indices within bounds of the original entries, so
71+
// 0..new_len and current..old_len are always valid items for our Drop to keep.
72+
unsafe {
73+
let item = base.add(self.current);
74+
if pred(&mut *item) {
75+
// Extract it!
76+
self.current += 1;
77+
return Some(item.read());
78+
} else {
79+
// Keep it, shifting it down if needed.
80+
if self.new_len != self.current {
81+
debug_assert!(self.new_len < self.current);
82+
let dest = base.add(self.new_len);
83+
item.copy_to_nonoverlapping(dest, 1);
84+
}
85+
self.current += 1;
86+
self.new_len += 1;
87+
}
88+
}
89+
}
90+
None
91+
}
92+
93+
pub(crate) fn remaining(&self) -> usize {
94+
self.map.indices.len() - self.current
95+
}
96+
}

src/map/iter.rs

+54-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use super::core::IndexMapCore;
2-
use super::{Bucket, Entries, IndexMap, Slice};
1+
use super::{Bucket, Entries, ExtractCore, IndexMap, IndexMapCore, Slice};
32

43
use alloc::vec::{self, Vec};
54
use core::fmt;
@@ -773,3 +772,56 @@ where
773772
.finish()
774773
}
775774
}
775+
776+
/// An extracting iterator for `IndexMap`.
777+
///
778+
/// This `struct` is created by [`IndexMap::extract_if()`].
779+
/// See its documentation for more.
780+
pub struct ExtractIf<'a, K, V, F>
781+
where
782+
F: FnMut(&K, &mut V) -> bool,
783+
{
784+
inner: ExtractCore<'a, K, V>,
785+
pred: F,
786+
}
787+
788+
impl<K, V, F> ExtractIf<'_, K, V, F>
789+
where
790+
F: FnMut(&K, &mut V) -> bool,
791+
{
792+
pub(super) fn new(core: &mut IndexMapCore<K, V>, pred: F) -> ExtractIf<'_, K, V, F> {
793+
ExtractIf {
794+
inner: core.extract(),
795+
pred,
796+
}
797+
}
798+
}
799+
800+
impl<K, V, F> Iterator for ExtractIf<'_, K, V, F>
801+
where
802+
F: FnMut(&K, &mut V) -> bool,
803+
{
804+
type Item = (K, V);
805+
806+
fn next(&mut self) -> Option<Self::Item> {
807+
self.inner
808+
.extract_if(|bucket| {
809+
let (key, value) = bucket.ref_mut();
810+
(self.pred)(key, value)
811+
})
812+
.map(Bucket::key_value)
813+
}
814+
815+
fn size_hint(&self) -> (usize, Option<usize>) {
816+
(0, Some(self.inner.remaining()))
817+
}
818+
}
819+
820+
impl<'a, K, V, F> fmt::Debug for ExtractIf<'a, K, V, F>
821+
where
822+
F: FnMut(&K, &mut V) -> bool,
823+
{
824+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
825+
f.debug_struct("ExtractIf").finish_non_exhaustive()
826+
}
827+
}

src/set.rs

+36-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ mod slice;
88
mod tests;
99

1010
pub use self::iter::{
11-
Difference, Drain, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
11+
Difference, Drain, ExtractIf, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
1212
};
1313
pub use self::mutable::MutableValues;
1414
pub use self::slice::Slice;
@@ -257,6 +257,41 @@ impl<T, S> IndexSet<T, S> {
257257
Drain::new(self.map.core.drain(range))
258258
}
259259

260+
/// Creates an iterator which uses a closure to determine if a value should be removed.
261+
///
262+
/// If the closure returns true, then the value is removed and yielded.
263+
/// If the closure returns false, the value will remain in the list and will not be yielded
264+
/// by the iterator.
265+
///
266+
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
267+
/// or the iteration short-circuits, then the remaining elements will be retained.
268+
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
269+
///
270+
/// [`retain`]: IndexSet::retain
271+
///
272+
/// # Examples
273+
///
274+
/// Splitting a set into even and odd values, reusing the original set:
275+
///
276+
/// ```
277+
/// use indexmap::IndexSet;
278+
///
279+
/// let mut set: IndexSet<i32> = (0..8).collect();
280+
/// let extracted: IndexSet<i32> = set.extract_if(|v| v % 2 == 0).collect();
281+
///
282+
/// let evens = extracted.into_iter().collect::<Vec<_>>();
283+
/// let odds = set.into_iter().collect::<Vec<_>>();
284+
///
285+
/// assert_eq!(evens, vec![0, 2, 4, 6]);
286+
/// assert_eq!(odds, vec![1, 3, 5, 7]);
287+
/// ```
288+
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, T, F>
289+
where
290+
F: FnMut(&T) -> bool,
291+
{
292+
ExtractIf::new(&mut self.map.core, pred)
293+
}
294+
260295
/// Splits the collection into two at the given index.
261296
///
262297
/// Returns a newly allocated set containing the elements in the range

src/set/iter.rs

+52
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use crate::map::{ExtractCore, IndexMapCore};
2+
13
use super::{Bucket, Entries, IndexSet, Slice};
24

35
use alloc::vec::{self, Vec};
@@ -625,3 +627,53 @@ impl<I: fmt::Debug> fmt::Debug for UnitValue<I> {
625627
fmt::Debug::fmt(&self.0, f)
626628
}
627629
}
630+
631+
/// An extracting iterator for `IndexSet`.
632+
///
633+
/// This `struct` is created by [`IndexSet::extract_if()`].
634+
/// See its documentation for more.
635+
pub struct ExtractIf<'a, T, F>
636+
where
637+
F: FnMut(&T) -> bool,
638+
{
639+
inner: ExtractCore<'a, T, ()>,
640+
pred: F,
641+
}
642+
643+
impl<T, F> ExtractIf<'_, T, F>
644+
where
645+
F: FnMut(&T) -> bool,
646+
{
647+
pub(super) fn new(core: &mut IndexMapCore<T, ()>, pred: F) -> ExtractIf<'_, T, F> {
648+
ExtractIf {
649+
inner: core.extract(),
650+
pred,
651+
}
652+
}
653+
}
654+
655+
impl<T, F> Iterator for ExtractIf<'_, T, F>
656+
where
657+
F: FnMut(&T) -> bool,
658+
{
659+
type Item = T;
660+
661+
fn next(&mut self) -> Option<Self::Item> {
662+
self.inner
663+
.extract_if(|bucket| (self.pred)(bucket.key_ref()))
664+
.map(Bucket::key)
665+
}
666+
667+
fn size_hint(&self) -> (usize, Option<usize>) {
668+
(0, Some(self.inner.remaining()))
669+
}
670+
}
671+
672+
impl<'a, T, F> fmt::Debug for ExtractIf<'a, T, F>
673+
where
674+
F: FnMut(&T) -> bool,
675+
{
676+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
677+
f.debug_struct("ExtractIf").finish_non_exhaustive()
678+
}
679+
}

0 commit comments

Comments
 (0)