Skip to content

Commit 61ed56b

Browse files
committed
WIP Switch to a full bitwidth h2
* Changes: - Use all values of h2, not just 130 of it. - Convert SSE2 implementation for benchmarking. * Motivation: Using 256 values instead of 130 could theoretically lower the number of false-positive residual matches by close to 50%. On the other hand, it does make h2 slightly more complicated to compute, and possibly to operate on.
1 parent 3741813 commit 61ed56b

File tree

2 files changed

+239
-28
lines changed

2 files changed

+239
-28
lines changed

src/raw/mod.rs

+200-10
Original file line numberDiff line numberDiff line change
@@ -105,28 +105,29 @@ trait SizedTypeProperties: Sized {
105105
impl<T> SizedTypeProperties for T {}
106106

107107
/// Control byte value for an empty bucket.
108-
const EMPTY: u8 = 0b1111_1111;
108+
const EMPTY: u8 = 0b0111_1111;
109109

110110
/// Control byte value for a deleted bucket.
111-
const DELETED: u8 = 0b1000_0000;
111+
const DELETED: u8 = 0b0111_1110;
112112

113113
/// Checks whether a control byte represents a full bucket (top bit is clear).
114114
#[inline]
115115
fn is_full(ctrl: u8) -> bool {
116-
ctrl & 0x80 == 0
116+
(ctrl as i8) < (DELETED as i8)
117117
}
118118

119119
/// Checks whether a control byte represents a special value (top bit is set).
120120
#[inline]
121121
fn is_special(ctrl: u8) -> bool {
122-
ctrl & 0x80 != 0
122+
(ctrl as i8) >= (DELETED as i8)
123123
}
124124

125125
/// Checks whether a special control value is EMPTY (just check 1 bit).
126126
#[inline]
127127
fn special_is_empty(ctrl: u8) -> bool {
128128
debug_assert!(is_special(ctrl));
129-
ctrl & 0x01 != 0
129+
130+
ctrl == EMPTY
130131
}
131132

132133
/// Primary hash function, used to select the initial bucket to probe from.
@@ -137,23 +138,46 @@ fn h1(hash: u64) -> usize {
137138
hash as usize
138139
}
139140

140-
// Constant for h2 function that grabing the top 7 bits of the hash.
141+
// Constant for h2 function that grabing the top 8 bits of the hash.
141142
const MIN_HASH_LEN: usize = if mem::size_of::<usize>() < mem::size_of::<u64>() {
142143
mem::size_of::<usize>()
143144
} else {
144145
mem::size_of::<u64>()
145146
};
146147

147-
/// Secondary hash function, saved in the low 7 bits of the control byte.
148+
/// Secondary hash function, saved in the control byte.
148149
#[inline]
149150
#[allow(clippy::cast_possible_truncation)]
150151
fn h2(hash: u64) -> u8 {
151-
// Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
152+
const fn compute_control() -> [u8; 256] {
153+
let mut result = [0; 256];
154+
155+
let mut i = 0;
156+
157+
while i < 256 {
158+
result[i] = i as u8;
159+
160+
i += 1;
161+
}
162+
163+
// Avoid overlap with special values.
164+
result[EMPTY as usize] += 8;
165+
result[DELETED as usize] += 8;
166+
167+
result
168+
}
169+
170+
#[rustfmt::skip]
171+
const CONTROL: [u8; 256] = compute_control();
172+
173+
// Grab the top 8 bits of the hash. While the hash is normally a full 64-bit
152174
// value, some hash functions (such as FxHash) produce a usize result
153175
// instead, which means that the top 32 bits are 0 on 32-bit platforms.
154176
// So we use MIN_HASH_LEN constant to handle this.
155-
let top7 = hash >> (MIN_HASH_LEN * 8 - 7);
156-
(top7 & 0x7f) as u8 // truncation
177+
let top8 = hash >> (MIN_HASH_LEN * 8 - 7);
178+
179+
// Lookup matching control byte, avoid overlap with special control.
180+
CONTROL[top8 as usize]
157181
}
158182

159183
/// Probe sequence based on triangular numbers, which is guaranteed (since our
@@ -4562,6 +4586,172 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
45624586
}
45634587
}
45644588

4589+
#[cfg(test)]
4590+
mod test_group {
4591+
use super::*;
4592+
4593+
type RawGroup = [u8; Group::WIDTH];
4594+
4595+
fn load(raw: RawGroup) -> Group {
4596+
// Safety:
4597+
// - `raw.len() == Group::WIDTH`.
4598+
unsafe { Group::load(raw.as_ptr()) }
4599+
}
4600+
4601+
fn store(group: Group) -> RawGroup {
4602+
#[repr(align(16))]
4603+
struct Aligned(RawGroup);
4604+
4605+
let mut result = Aligned(RawGroup::default());
4606+
4607+
// Safety:
4608+
// - `raw.len() == Group::WIDTH`.
4609+
// - `raw` is suitably aligned.
4610+
unsafe { group.store_aligned(result.0.as_mut_ptr()) }
4611+
4612+
result.0
4613+
}
4614+
4615+
#[test]
4616+
fn test_match_byte() {
4617+
use ::alloc::vec::Vec;
4618+
4619+
let mut raw = RawGroup::default();
4620+
4621+
for (i, slot) in raw.iter_mut().enumerate() {
4622+
if i % 2 == 0 {
4623+
*slot = EMPTY;
4624+
} else {
4625+
*slot = 0x44;
4626+
}
4627+
}
4628+
4629+
let group = load(raw);
4630+
4631+
let is_match = group.match_byte(0x44);
4632+
4633+
let matched: Vec<_> = is_match.into_iter().collect();
4634+
4635+
assert_eq!(Group::WIDTH / 2, matched.len(), "{matched:?}");
4636+
assert!(matched.iter().all(|i| *i % 2 != 0), "{matched:?}");
4637+
}
4638+
4639+
#[test]
4640+
fn test_match_empty() {
4641+
use ::alloc::vec::Vec;
4642+
4643+
let mut raw = RawGroup::default();
4644+
4645+
for (i, slot) in raw.iter_mut().enumerate() {
4646+
if i % 2 == 0 {
4647+
*slot = EMPTY;
4648+
} else {
4649+
*slot = DELETED;
4650+
}
4651+
}
4652+
4653+
let group = load(raw);
4654+
4655+
let is_empty = group.match_empty();
4656+
4657+
let empty: Vec<_> = is_empty.into_iter().collect();
4658+
4659+
assert_eq!(Group::WIDTH / 2, empty.len(), "{empty:?}");
4660+
assert!(empty.iter().all(|i| *i % 2 == 0), "{empty:?}");
4661+
}
4662+
4663+
#[test]
4664+
fn test_match_empty_or_deleted() {
4665+
use ::alloc::vec::Vec;
4666+
4667+
let mut raw = RawGroup::default();
4668+
4669+
for (i, slot) in raw.iter_mut().enumerate() {
4670+
let value = match i % 4 {
4671+
0 => EMPTY,
4672+
1 => 2,
4673+
2 => DELETED,
4674+
3 => 255,
4675+
_ => unreachable!("i % 4 < 4"),
4676+
};
4677+
4678+
*slot = value;
4679+
}
4680+
4681+
let group = load(raw);
4682+
4683+
let is_empty_or_deleted = group.match_empty_or_deleted();
4684+
4685+
let empty_or_deleted: Vec<_> = is_empty_or_deleted.into_iter().collect();
4686+
4687+
assert_eq!(Group::WIDTH / 2, empty_or_deleted.len(), "{empty_or_deleted:?}");
4688+
assert!(empty_or_deleted.iter().all(|i| *i % 2 == 0), "{empty_or_deleted:?}");
4689+
}
4690+
4691+
#[test]
4692+
fn test_match_full() {
4693+
use ::alloc::vec::Vec;
4694+
4695+
let mut raw = RawGroup::default();
4696+
4697+
for (i, slot) in raw.iter_mut().enumerate() {
4698+
let value = match i % 4 {
4699+
0 => EMPTY,
4700+
1 => 2,
4701+
2 => DELETED,
4702+
3 => 255,
4703+
_ => unreachable!("i % 4 < 4"),
4704+
};
4705+
4706+
*slot = value;
4707+
}
4708+
4709+
let group = load(raw);
4710+
4711+
let is_full = group.match_full();
4712+
4713+
let full: Vec<_> = is_full.into_iter().collect();
4714+
4715+
assert_eq!(Group::WIDTH / 2, full.len(), "{full:?}");
4716+
assert!(full.iter().all(|i| *i % 2 != 0), "{full:?}");
4717+
}
4718+
4719+
#[test]
4720+
fn test_convert_special_to_empty_and_full_to_deleted() {
4721+
use ::alloc::vec::Vec;
4722+
4723+
let mut raw = RawGroup::default();
4724+
4725+
for (i, slot) in raw.iter_mut().enumerate() {
4726+
let value = match i % 4 {
4727+
0 => EMPTY,
4728+
1 => 2,
4729+
2 => DELETED,
4730+
3 => 255,
4731+
_ => unreachable!("i % 4 < 4"),
4732+
};
4733+
4734+
*slot = value;
4735+
}
4736+
4737+
let group = load(raw);
4738+
4739+
let converted = group.convert_special_to_empty_and_full_to_deleted();
4740+
4741+
dbg!(store(converted));
4742+
4743+
let empty: Vec<_> = converted.match_empty().into_iter().collect();
4744+
4745+
assert_eq!(Group::WIDTH / 2, empty.len(), "{empty:?}");
4746+
assert!(empty.iter().all(|i| *i % 2 == 0), "{empty:?}");
4747+
4748+
let deleted: Vec<_> = converted.match_byte(DELETED).into_iter().collect();
4749+
4750+
assert_eq!(Group::WIDTH / 2, deleted.len(), "{deleted:?}");
4751+
assert!(deleted.iter().all(|i| *i % 2 != 0), "{deleted:?}");
4752+
}
4753+
}
4754+
45654755
#[cfg(test)]
45664756
mod test_map {
45674757
use super::*;

src/raw/sse2.rs

+39-18
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::bitmask::BitMask;
2-
use super::EMPTY;
2+
use super::{EMPTY, DELETED};
33
use core::mem;
44
use core::num::NonZeroU16;
55

@@ -102,6 +102,9 @@ impl Group {
102102
/// `EMPTY` or `DELETED`.
103103
#[inline]
104104
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
105+
debug_assert_eq!(127, EMPTY);
106+
debug_assert_eq!(126, DELETED);
107+
105108
#[allow(
106109
// byte: i32 as u16
107110
// note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
@@ -110,15 +113,30 @@ impl Group {
110113
clippy::cast_possible_truncation
111114
)]
112115
unsafe {
113-
// A byte is EMPTY or DELETED iff the high bit is set
114-
BitMask(x86::_mm_movemask_epi8(self.0) as u16)
116+
// A byte is EMPTY or DELETED iff it is greater than or equal to DELETED.
117+
let is_special = x86::_mm_cmpgt_epi8(self.0, x86::_mm_set1_epi8(DELETED as i8 - 1));
118+
BitMask(x86::_mm_movemask_epi8(is_special) as u16)
115119
}
116120
}
117121

118122
/// Returns a `BitMask` indicating all bytes in the group which are full.
119123
#[inline]
120124
pub(crate) fn match_full(&self) -> BitMask {
121-
self.match_empty_or_deleted().invert()
125+
debug_assert_eq!(127, EMPTY);
126+
debug_assert_eq!(126, DELETED);
127+
128+
#[allow(
129+
// byte: i32 as u16
130+
// note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
131+
// upper 16-bits of the i32 are zeroed:
132+
clippy::cast_sign_loss,
133+
clippy::cast_possible_truncation
134+
)]
135+
unsafe {
136+
// A byte is full iff it is strictly less than DELETED.
137+
let is_full = x86::_mm_cmplt_epi8(self.0, x86::_mm_set1_epi8(DELETED as i8));
138+
BitMask(x86::_mm_movemask_epi8(is_full) as u16)
139+
}
122140
}
123141

124142
/// Performs the following transformation on all bytes in the group:
@@ -127,23 +145,26 @@ impl Group {
127145
/// - `FULL => DELETED`
128146
#[inline]
129147
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
130-
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
131-
// and high_bit = 0 (FULL) to 1000_0000
132-
//
133-
// Here's this logic expanded to concrete values:
134-
// let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false)
135-
// 1111_1111 | 1000_0000 = 1111_1111
136-
// 0000_0000 | 1000_0000 = 1000_0000
148+
debug_assert_eq!(127, EMPTY);
149+
debug_assert_eq!(126, DELETED);
150+
137151
#[allow(
138-
clippy::cast_possible_wrap, // byte: 0x80_u8 as i8
152+
clippy::cast_sign_loss,
153+
clippy::cast_possible_truncation
139154
)]
140155
unsafe {
141-
let zero = x86::_mm_setzero_si128();
142-
let special = x86::_mm_cmpgt_epi8(zero, self.0);
143-
Group(x86::_mm_or_si128(
144-
special,
145-
x86::_mm_set1_epi8(0x80_u8 as i8),
146-
))
156+
let empty = x86::_mm_set1_epi8(EMPTY as i8);
157+
let deleted = x86::_mm_set1_epi8(DELETED as i8);
158+
159+
let is_full = x86::_mm_cmplt_epi8(self.0, deleted);
160+
let is_special = x86::_mm_cmpeq_epi8(is_full, x86::_mm_set1_epi8(0));
161+
162+
Group(
163+
x86::_mm_or_si128(
164+
x86::_mm_and_si128(is_full, deleted),
165+
x86::_mm_and_si128(is_special, empty)
166+
)
167+
)
147168
}
148169
}
149170
}

0 commit comments

Comments
 (0)