Skip to content

Commit 2098bd4

Browse files
committed
WIP Switch to a full bitwidth h2
* Changes: - Use all values of h2, not just 130 of it. - Convert SSE2 implementation for benchmarking. * Motivation: Using 256 values instead of 130 could theoretically lower the number of false-positive residual matches by close to 50%. On the other hand, it does make h2 slightly more complicated to compute, and possibly to operate on.
1 parent 3741813 commit 2098bd4

File tree

2 files changed

+241
-28
lines changed

2 files changed

+241
-28
lines changed

src/raw/mod.rs

+207-10
Original file line numberDiff line numberDiff line change
@@ -105,28 +105,29 @@ trait SizedTypeProperties: Sized {
105105
impl<T> SizedTypeProperties for T {}
106106

107107
/// Control byte value for an empty bucket.
108-
const EMPTY: u8 = 0b1111_1111;
108+
const EMPTY: u8 = 0b0111_1111;
109109

110110
/// Control byte value for a deleted bucket.
111-
const DELETED: u8 = 0b1000_0000;
111+
const DELETED: u8 = 0b0111_1110;
112112

113113
/// Checks whether a control byte represents a full bucket (top bit is clear).
114114
#[inline]
115115
fn is_full(ctrl: u8) -> bool {
116-
ctrl & 0x80 == 0
116+
(ctrl as i8) < (DELETED as i8)
117117
}
118118

119119
/// Checks whether a control byte represents a special value (top bit is set).
120120
#[inline]
121121
fn is_special(ctrl: u8) -> bool {
122-
ctrl & 0x80 != 0
122+
(ctrl as i8) >= (DELETED as i8)
123123
}
124124

125125
/// Checks whether a special control value is EMPTY (just check 1 bit).
126126
#[inline]
127127
fn special_is_empty(ctrl: u8) -> bool {
128128
debug_assert!(is_special(ctrl));
129-
ctrl & 0x01 != 0
129+
130+
ctrl == EMPTY
130131
}
131132

132133
/// Primary hash function, used to select the initial bucket to probe from.
@@ -137,23 +138,46 @@ fn h1(hash: u64) -> usize {
137138
hash as usize
138139
}
139140

140-
// Constant for h2 function that grabing the top 7 bits of the hash.
141+
// Constant for h2 function that grabing the top 8 bits of the hash.
141142
const MIN_HASH_LEN: usize = if mem::size_of::<usize>() < mem::size_of::<u64>() {
142143
mem::size_of::<usize>()
143144
} else {
144145
mem::size_of::<u64>()
145146
};
146147

147-
/// Secondary hash function, saved in the low 7 bits of the control byte.
148+
/// Secondary hash function, saved in the control byte.
148149
#[inline]
149150
#[allow(clippy::cast_possible_truncation)]
150151
fn h2(hash: u64) -> u8 {
151-
// Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
152+
const fn compute_control() -> [u8; 256] {
153+
let mut result = [0; 256];
154+
155+
let mut i = 0;
156+
157+
while i < 256 {
158+
result[i] = i as u8;
159+
160+
i += 1;
161+
}
162+
163+
// Avoid overlap with special values.
164+
result[EMPTY as usize] += 8;
165+
result[DELETED as usize] += 8;
166+
167+
result
168+
}
169+
170+
#[rustfmt::skip]
171+
const CONTROL: [u8; 256] = compute_control();
172+
173+
// Grab the top 8 bits of the hash. While the hash is normally a full 64-bit
152174
// value, some hash functions (such as FxHash) produce a usize result
153175
// instead, which means that the top 32 bits are 0 on 32-bit platforms.
154176
// So we use MIN_HASH_LEN constant to handle this.
155-
let top7 = hash >> (MIN_HASH_LEN * 8 - 7);
156-
(top7 & 0x7f) as u8 // truncation
177+
let top8 = hash >> (MIN_HASH_LEN * 8 - 7);
178+
179+
// Lookup matching control byte, avoid overlap with special control.
180+
CONTROL[top8 as usize]
157181
}
158182

159183
/// Probe sequence based on triangular numbers, which is guaranteed (since our
@@ -4562,6 +4586,179 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
45624586
}
45634587
}
45644588

4589+
#[cfg(test)]
4590+
mod test_group {
4591+
use super::*;
4592+
4593+
type RawGroup = [u8; Group::WIDTH];
4594+
4595+
fn load(raw: RawGroup) -> Group {
4596+
// Safety:
4597+
// - `raw.len() == Group::WIDTH`.
4598+
unsafe { Group::load(raw.as_ptr()) }
4599+
}
4600+
4601+
fn store(group: Group) -> RawGroup {
4602+
#[repr(align(16))]
4603+
struct Aligned(RawGroup);
4604+
4605+
let mut result = Aligned(RawGroup::default());
4606+
4607+
// Safety:
4608+
// - `raw.len() == Group::WIDTH`.
4609+
// - `raw` is suitably aligned.
4610+
unsafe { group.store_aligned(result.0.as_mut_ptr()) }
4611+
4612+
result.0
4613+
}
4614+
4615+
#[test]
4616+
fn test_match_byte() {
4617+
use ::alloc::vec::Vec;
4618+
4619+
let mut raw = RawGroup::default();
4620+
4621+
for (i, slot) in raw.iter_mut().enumerate() {
4622+
if i % 2 == 0 {
4623+
*slot = EMPTY;
4624+
} else {
4625+
*slot = 0x44;
4626+
}
4627+
}
4628+
4629+
let group = load(raw);
4630+
4631+
let is_match = group.match_byte(0x44);
4632+
4633+
let matched: Vec<_> = is_match.into_iter().collect();
4634+
4635+
assert_eq!(Group::WIDTH / 2, matched.len(), "{matched:?}");
4636+
assert!(matched.iter().all(|i| *i % 2 != 0), "{matched:?}");
4637+
}
4638+
4639+
#[test]
4640+
fn test_match_empty() {
4641+
use ::alloc::vec::Vec;
4642+
4643+
let mut raw = RawGroup::default();
4644+
4645+
for (i, slot) in raw.iter_mut().enumerate() {
4646+
if i % 2 == 0 {
4647+
*slot = EMPTY;
4648+
} else {
4649+
*slot = DELETED;
4650+
}
4651+
}
4652+
4653+
let group = load(raw);
4654+
4655+
let is_empty = group.match_empty();
4656+
4657+
let empty: Vec<_> = is_empty.into_iter().collect();
4658+
4659+
assert_eq!(Group::WIDTH / 2, empty.len(), "{empty:?}");
4660+
assert!(empty.iter().all(|i| *i % 2 == 0), "{empty:?}");
4661+
}
4662+
4663+
#[test]
4664+
fn test_match_empty_or_deleted() {
4665+
use ::alloc::vec::Vec;
4666+
4667+
let mut raw = RawGroup::default();
4668+
4669+
for (i, slot) in raw.iter_mut().enumerate() {
4670+
let value = match i % 4 {
4671+
0 => EMPTY,
4672+
1 => 2,
4673+
2 => DELETED,
4674+
3 => 255,
4675+
_ => unreachable!("i % 4 < 4"),
4676+
};
4677+
4678+
*slot = value;
4679+
}
4680+
4681+
let group = load(raw);
4682+
4683+
let is_empty_or_deleted = group.match_empty_or_deleted();
4684+
4685+
let empty_or_deleted: Vec<_> = is_empty_or_deleted.into_iter().collect();
4686+
4687+
assert_eq!(
4688+
Group::WIDTH / 2,
4689+
empty_or_deleted.len(),
4690+
"{empty_or_deleted:?}"
4691+
);
4692+
assert!(
4693+
empty_or_deleted.iter().all(|i| *i % 2 == 0),
4694+
"{empty_or_deleted:?}"
4695+
);
4696+
}
4697+
4698+
#[test]
4699+
fn test_match_full() {
4700+
use ::alloc::vec::Vec;
4701+
4702+
let mut raw = RawGroup::default();
4703+
4704+
for (i, slot) in raw.iter_mut().enumerate() {
4705+
let value = match i % 4 {
4706+
0 => EMPTY,
4707+
1 => 2,
4708+
2 => DELETED,
4709+
3 => 255,
4710+
_ => unreachable!("i % 4 < 4"),
4711+
};
4712+
4713+
*slot = value;
4714+
}
4715+
4716+
let group = load(raw);
4717+
4718+
let is_full = group.match_full();
4719+
4720+
let full: Vec<_> = is_full.into_iter().collect();
4721+
4722+
assert_eq!(Group::WIDTH / 2, full.len(), "{full:?}");
4723+
assert!(full.iter().all(|i| *i % 2 != 0), "{full:?}");
4724+
}
4725+
4726+
#[test]
4727+
fn test_convert_special_to_empty_and_full_to_deleted() {
4728+
use ::alloc::vec::Vec;
4729+
4730+
let mut raw = RawGroup::default();
4731+
4732+
for (i, slot) in raw.iter_mut().enumerate() {
4733+
let value = match i % 4 {
4734+
0 => EMPTY,
4735+
1 => 2,
4736+
2 => DELETED,
4737+
3 => 255,
4738+
_ => unreachable!("i % 4 < 4"),
4739+
};
4740+
4741+
*slot = value;
4742+
}
4743+
4744+
let group = load(raw);
4745+
4746+
let converted = group.convert_special_to_empty_and_full_to_deleted();
4747+
4748+
dbg!(store(converted));
4749+
4750+
let empty: Vec<_> = converted.match_empty().into_iter().collect();
4751+
4752+
assert_eq!(Group::WIDTH / 2, empty.len(), "{empty:?}");
4753+
assert!(empty.iter().all(|i| *i % 2 == 0), "{empty:?}");
4754+
4755+
let deleted: Vec<_> = converted.match_byte(DELETED).into_iter().collect();
4756+
4757+
assert_eq!(Group::WIDTH / 2, deleted.len(), "{deleted:?}");
4758+
assert!(deleted.iter().all(|i| *i % 2 != 0), "{deleted:?}");
4759+
}
4760+
}
4761+
45654762
#[cfg(test)]
45664763
mod test_map {
45674764
use super::*;

src/raw/sse2.rs

+34-18
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::bitmask::BitMask;
2-
use super::EMPTY;
2+
use super::{DELETED, EMPTY};
33
use core::mem;
44
use core::num::NonZeroU16;
55

@@ -102,6 +102,9 @@ impl Group {
102102
/// `EMPTY` or `DELETED`.
103103
#[inline]
104104
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
105+
debug_assert_eq!(127, EMPTY);
106+
debug_assert_eq!(126, DELETED);
107+
105108
#[allow(
106109
// byte: i32 as u16
107110
// note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
@@ -110,15 +113,30 @@ impl Group {
110113
clippy::cast_possible_truncation
111114
)]
112115
unsafe {
113-
// A byte is EMPTY or DELETED iff the high bit is set
114-
BitMask(x86::_mm_movemask_epi8(self.0) as u16)
116+
// A byte is EMPTY or DELETED iff it is greater than or equal to DELETED.
117+
let is_special = x86::_mm_cmpgt_epi8(self.0, x86::_mm_set1_epi8(DELETED as i8 - 1));
118+
BitMask(x86::_mm_movemask_epi8(is_special) as u16)
115119
}
116120
}
117121

118122
/// Returns a `BitMask` indicating all bytes in the group which are full.
119123
#[inline]
120124
pub(crate) fn match_full(&self) -> BitMask {
121-
self.match_empty_or_deleted().invert()
125+
debug_assert_eq!(127, EMPTY);
126+
debug_assert_eq!(126, DELETED);
127+
128+
#[allow(
129+
// byte: i32 as u16
130+
// note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
131+
// upper 16-bits of the i32 are zeroed:
132+
clippy::cast_sign_loss,
133+
clippy::cast_possible_truncation
134+
)]
135+
unsafe {
136+
// A byte is full iff it is strictly less than DELETED.
137+
let is_full = x86::_mm_cmplt_epi8(self.0, x86::_mm_set1_epi8(DELETED as i8));
138+
BitMask(x86::_mm_movemask_epi8(is_full) as u16)
139+
}
122140
}
123141

124142
/// Performs the following transformation on all bytes in the group:
@@ -127,22 +145,20 @@ impl Group {
127145
/// - `FULL => DELETED`
128146
#[inline]
129147
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
130-
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
131-
// and high_bit = 0 (FULL) to 1000_0000
132-
//
133-
// Here's this logic expanded to concrete values:
134-
// let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false)
135-
// 1111_1111 | 1000_0000 = 1111_1111
136-
// 0000_0000 | 1000_0000 = 1000_0000
137-
#[allow(
138-
clippy::cast_possible_wrap, // byte: 0x80_u8 as i8
139-
)]
148+
debug_assert_eq!(127, EMPTY);
149+
debug_assert_eq!(126, DELETED);
150+
151+
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
140152
unsafe {
141-
let zero = x86::_mm_setzero_si128();
142-
let special = x86::_mm_cmpgt_epi8(zero, self.0);
153+
let empty = x86::_mm_set1_epi8(EMPTY as i8);
154+
let deleted = x86::_mm_set1_epi8(DELETED as i8);
155+
156+
let is_full = x86::_mm_cmplt_epi8(self.0, deleted);
157+
let is_special = x86::_mm_cmpeq_epi8(is_full, x86::_mm_set1_epi8(0));
158+
143159
Group(x86::_mm_or_si128(
144-
special,
145-
x86::_mm_set1_epi8(0x80_u8 as i8),
160+
x86::_mm_and_si128(is_full, deleted),
161+
x86::_mm_and_si128(is_special, empty),
146162
))
147163
}
148164
}

0 commit comments

Comments
 (0)