Skip to content

Commit f7aa64a

Browse files
committed
Further sequester Group/Tag code
1 parent eea9804 commit f7aa64a

File tree

10 files changed

+169
-136
lines changed

10 files changed

+169
-136
lines changed

src/raw/bitmask.rs renamed to src/control/bitmask.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::imp::{
1+
use super::group::{
22
BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE,
33
};
44

@@ -102,7 +102,7 @@ impl IntoIterator for BitMask {
102102

103103
/// Iterator over the contents of a `BitMask`, returning the indices of set
104104
/// bits.
105-
#[derive(Copy, Clone)]
105+
#[derive(Clone)]
106106
pub(crate) struct BitMaskIter(pub(crate) BitMask);
107107

108108
impl Iterator for BitMaskIter {

src/raw/generic.rs renamed to src/control/group/generic.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use super::bitmask::BitMask;
2-
use super::Tag;
1+
use super::super::{BitMask, Tag};
32
use core::{mem, ptr};
43

54
// Use the native word size as the group size. Using a 64-bit group size on
@@ -81,8 +80,7 @@ impl Group {
8180
#[inline]
8281
#[allow(clippy::cast_ptr_alignment)]
8382
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
84-
// FIXME: use align_offset once it stabilizes
85-
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
83+
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
8684
Group(ptr::read(ptr.cast()))
8785
}
8886

@@ -91,8 +89,7 @@ impl Group {
9189
#[inline]
9290
#[allow(clippy::cast_ptr_alignment)]
9391
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
94-
// FIXME: use align_offset once it stabilizes
95-
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
92+
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
9693
ptr::write(ptr.cast(), self.0);
9794
}
9895

src/control/group/mod.rs

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
cfg_if! {
2+
// Use the SSE2 implementation if possible: it allows us to scan 16 buckets
3+
// at once instead of 8. We don't bother with AVX since it would require
4+
// runtime dispatch and wouldn't gain us much anyways: the probability of
5+
// finding a match drops off drastically after the first few buckets.
6+
//
7+
// I attempted an implementation on ARM using NEON instructions, but it
8+
// turns out that most NEON instructions have multi-cycle latency, which in
9+
// the end outweighs any gains over the generic implementation.
10+
if #[cfg(all(
11+
target_feature = "sse2",
12+
any(target_arch = "x86", target_arch = "x86_64"),
13+
not(miri),
14+
))] {
15+
mod sse2;
16+
use sse2 as imp;
17+
} else if #[cfg(all(
18+
target_arch = "aarch64",
19+
target_feature = "neon",
20+
// NEON intrinsics are currently broken on big-endian targets.
21+
// See https://github.com/rust-lang/stdarch/issues/1484.
22+
target_endian = "little",
23+
not(miri),
24+
))] {
25+
mod neon;
26+
use neon as imp;
27+
} else {
28+
mod generic;
29+
use generic as imp;
30+
}
31+
}
32+
pub(crate) use self::imp::Group;
33+
pub(super) use self::imp::{
34+
BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE,
35+
};

src/raw/neon.rs renamed to src/control/group/neon.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use super::bitmask::BitMask;
2-
use super::Tag;
1+
use super::super::{BitMask, Tag};
32
use core::arch::aarch64 as neon;
43
use core::mem;
54
use core::num::NonZeroU64;
@@ -52,8 +51,7 @@ impl Group {
5251
#[inline]
5352
#[allow(clippy::cast_ptr_alignment)]
5453
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
55-
// FIXME: use align_offset once it stabilizes
56-
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
54+
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
5755
Group(neon::vld1_u8(ptr.cast()))
5856
}
5957

@@ -62,8 +60,7 @@ impl Group {
6260
#[inline]
6361
#[allow(clippy::cast_ptr_alignment)]
6462
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
65-
// FIXME: use align_offset once it stabilizes
66-
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
63+
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
6764
neon::vst1_u8(ptr.cast(), self.0);
6865
}
6966

src/raw/sse2.rs renamed to src/control/group/sse2.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use super::bitmask::BitMask;
2-
use super::Tag;
1+
use super::super::{BitMask, Tag};
32
use core::mem;
43
use core::num::NonZeroU16;
54

@@ -58,8 +57,7 @@ impl Group {
5857
#[inline]
5958
#[allow(clippy::cast_ptr_alignment)]
6059
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
61-
// FIXME: use align_offset once it stabilizes
62-
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
60+
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
6361
Group(x86::_mm_load_si128(ptr.cast()))
6462
}
6563

@@ -68,8 +66,7 @@ impl Group {
6866
#[inline]
6967
#[allow(clippy::cast_ptr_alignment)]
7068
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
71-
// FIXME: use align_offset once it stabilizes
72-
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
69+
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
7370
x86::_mm_store_si128(ptr.cast(), self.0);
7471
}
7572

src/control/mod.rs

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
mod bitmask;
2+
mod group;
3+
mod tag;
4+
5+
use self::bitmask::BitMask;
6+
pub(crate) use self::{
7+
bitmask::BitMaskIter,
8+
group::Group,
9+
tag::{Tag, TagSliceExt},
10+
};

src/control/tag.rs

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
use core::{fmt, mem};
2+
3+
/// Single tag in a control group.
4+
#[derive(Copy, Clone, PartialEq, Eq)]
5+
#[repr(transparent)]
6+
pub(crate) struct Tag(pub(super) u8);
7+
impl Tag {
8+
/// Control tag value for an empty bucket.
9+
pub(crate) const EMPTY: Tag = Tag(0b1111_1111);
10+
11+
/// Control tag value for a deleted bucket.
12+
pub(crate) const DELETED: Tag = Tag(0b1000_0000);
13+
14+
/// Checks whether a control tag represents a full bucket (top bit is clear).
15+
#[inline]
16+
pub(crate) const fn is_full(self) -> bool {
17+
self.0 & 0x80 == 0
18+
}
19+
20+
/// Checks whether a control tag represents a special value (top bit is set).
21+
#[inline]
22+
pub(crate) const fn is_special(self) -> bool {
23+
self.0 & 0x80 != 0
24+
}
25+
26+
/// Checks whether a special control value is EMPTY (just check 1 bit).
27+
#[inline]
28+
pub(crate) const fn special_is_empty(self) -> bool {
29+
debug_assert!(self.is_special());
30+
self.0 & 0x01 != 0
31+
}
32+
33+
/// Creates a control tag representing a full bucket with the given hash.
34+
#[inline]
35+
#[allow(clippy::cast_possible_truncation)]
36+
pub(crate) const fn full(hash: u64) -> Tag {
37+
// Constant for function that grabs the top 7 bits of the hash.
38+
const MIN_HASH_LEN: usize = if mem::size_of::<usize>() < mem::size_of::<u64>() {
39+
mem::size_of::<usize>()
40+
} else {
41+
mem::size_of::<u64>()
42+
};
43+
44+
// Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
45+
// value, some hash functions (such as FxHash) produce a usize result
46+
// instead, which means that the top 32 bits are 0 on 32-bit platforms.
47+
// So we use MIN_HASH_LEN constant to handle this.
48+
let top7 = hash >> (MIN_HASH_LEN * 8 - 7);
49+
Tag((top7 & 0x7f) as u8) // truncation
50+
}
51+
}
52+
impl fmt::Debug for Tag {
53+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54+
if self.is_special() {
55+
if self.special_is_empty() {
56+
f.pad("EMPTY")
57+
} else {
58+
f.pad("DELETED")
59+
}
60+
} else {
61+
f.debug_tuple("full").field(&(self.0 & 0x7F)).finish()
62+
}
63+
}
64+
}
65+
66+
/// Extension trait for slices of tags.
67+
pub(crate) trait TagSliceExt {
68+
/// Fills the control with the given tag.
69+
fn fill_tag(&mut self, tag: Tag);
70+
71+
/// Clears out the control.
72+
fn fill_empty(&mut self) {
73+
self.fill_tag(Tag::EMPTY)
74+
}
75+
}
76+
impl TagSliceExt for [Tag] {
77+
fn fill_tag(&mut self, tag: Tag) {
78+
// SAFETY: We have access to the entire slice, so, we can write to the entire slice.
79+
unsafe { self.as_mut_ptr().write_bytes(tag.0, self.len()) }
80+
}
81+
}

src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ doc_comment::doctest!("../README.md");
6161
#[macro_use]
6262
mod macros;
6363

64+
mod control;
6465
mod raw;
66+
mod util;
6567

6668
mod external_trait_impls;
6769
mod map;

0 commit comments

Comments
 (0)