@@ -105,28 +105,29 @@ trait SizedTypeProperties: Sized {
105
105
impl < T > SizedTypeProperties for T { }
106
106
107
107
/// Control byte value for an empty bucket.
108
- const EMPTY : u8 = 0b1111_1111 ;
108
+ const EMPTY : u8 = 0b0111_1111 ;
109
109
110
110
/// Control byte value for a deleted bucket.
111
- const DELETED : u8 = 0b1000_0000 ;
111
+ const DELETED : u8 = 0b0111_1110 ;
112
112
113
113
/// Checks whether a control byte represents a full bucket (top bit is clear).
114
114
#[ inline]
115
115
fn is_full ( ctrl : u8 ) -> bool {
116
- ctrl & 0x80 == 0
116
+ ( ctrl as i8 ) < ( DELETED as i8 )
117
117
}
118
118
119
119
/// Checks whether a control byte represents a special value (top bit is set).
120
120
#[ inline]
121
121
fn is_special ( ctrl : u8 ) -> bool {
122
- ctrl & 0x80 != 0
122
+ ( ctrl as i8 ) >= ( DELETED as i8 )
123
123
}
124
124
125
125
/// Checks whether a special control value is EMPTY (just check 1 bit).
126
126
#[ inline]
127
127
fn special_is_empty ( ctrl : u8 ) -> bool {
128
128
debug_assert ! ( is_special( ctrl) ) ;
129
- ctrl & 0x01 != 0
129
+
130
+ ctrl == EMPTY
130
131
}
131
132
132
133
/// Primary hash function, used to select the initial bucket to probe from.
@@ -137,23 +138,46 @@ fn h1(hash: u64) -> usize {
137
138
hash as usize
138
139
}
139
140
140
- // Constant for h2 function that grabing the top 7 bits of the hash.
141
+ // Constant for h2 function that grabing the top 8 bits of the hash.
141
142
const MIN_HASH_LEN : usize = if mem:: size_of :: < usize > ( ) < mem:: size_of :: < u64 > ( ) {
142
143
mem:: size_of :: < usize > ( )
143
144
} else {
144
145
mem:: size_of :: < u64 > ( )
145
146
} ;
146
147
147
- /// Secondary hash function, saved in the low 7 bits of the control byte.
148
+ /// Secondary hash function, saved in the control byte.
148
149
#[ inline]
149
150
#[ allow( clippy:: cast_possible_truncation) ]
150
151
fn h2 ( hash : u64 ) -> u8 {
151
- // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
152
+ const fn compute_control ( ) -> [ u8 ; 256 ] {
153
+ let mut result = [ 0 ; 256 ] ;
154
+
155
+ let mut i = 0 ;
156
+
157
+ while i < 256 {
158
+ result[ i] = i as u8 ;
159
+
160
+ i += 1 ;
161
+ }
162
+
163
+ // Avoid overlap with special values.
164
+ result[ EMPTY as usize ] += 8 ;
165
+ result[ DELETED as usize ] += 8 ;
166
+
167
+ result
168
+ }
169
+
170
+ #[ rustfmt:: skip]
171
+ const CONTROL : [ u8 ; 256 ] = compute_control ( ) ;
172
+
173
+ // Grab the top 8 bits of the hash. While the hash is normally a full 64-bit
152
174
// value, some hash functions (such as FxHash) produce a usize result
153
175
// instead, which means that the top 32 bits are 0 on 32-bit platforms.
154
176
// So we use MIN_HASH_LEN constant to handle this.
155
- let top7 = hash >> ( MIN_HASH_LEN * 8 - 7 ) ;
156
- ( top7 & 0x7f ) as u8 // truncation
177
+ let top8 = hash >> ( MIN_HASH_LEN * 8 - 7 ) ;
178
+
179
+ // Lookup matching control byte, avoid overlap with special control.
180
+ CONTROL [ top8 as usize ]
157
181
}
158
182
159
183
/// Probe sequence based on triangular numbers, which is guaranteed (since our
@@ -4562,6 +4586,172 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
4562
4586
}
4563
4587
}
4564
4588
4589
+ #[ cfg( test) ]
4590
+ mod test_group {
4591
+ use super :: * ;
4592
+
4593
+ type RawGroup = [ u8 ; Group :: WIDTH ] ;
4594
+
4595
+ fn load ( raw : RawGroup ) -> Group {
4596
+ // Safety:
4597
+ // - `raw.len() == Group::WIDTH`.
4598
+ unsafe { Group :: load ( raw. as_ptr ( ) ) }
4599
+ }
4600
+
4601
+ fn store ( group : Group ) -> RawGroup {
4602
+ #[ repr( align( 16 ) ) ]
4603
+ struct Aligned ( RawGroup ) ;
4604
+
4605
+ let mut result = Aligned ( RawGroup :: default ( ) ) ;
4606
+
4607
+ // Safety:
4608
+ // - `raw.len() == Group::WIDTH`.
4609
+ // - `raw` is suitably aligned.
4610
+ unsafe { group. store_aligned ( result. 0 . as_mut_ptr ( ) ) }
4611
+
4612
+ result. 0
4613
+ }
4614
+
4615
+ #[ test]
4616
+ fn test_match_byte ( ) {
4617
+ use :: alloc:: vec:: Vec ;
4618
+
4619
+ let mut raw = RawGroup :: default ( ) ;
4620
+
4621
+ for ( i, slot) in raw. iter_mut ( ) . enumerate ( ) {
4622
+ if i % 2 == 0 {
4623
+ * slot = EMPTY ;
4624
+ } else {
4625
+ * slot = 0x44 ;
4626
+ }
4627
+ }
4628
+
4629
+ let group = load ( raw) ;
4630
+
4631
+ let is_match = group. match_byte ( 0x44 ) ;
4632
+
4633
+ let matched: Vec < _ > = is_match. into_iter ( ) . collect ( ) ;
4634
+
4635
+ assert_eq ! ( Group :: WIDTH / 2 , matched. len( ) , "{matched:?}" ) ;
4636
+ assert ! ( matched. iter( ) . all( |i| * i % 2 != 0 ) , "{matched:?}" ) ;
4637
+ }
4638
+
4639
+ #[ test]
4640
+ fn test_match_empty ( ) {
4641
+ use :: alloc:: vec:: Vec ;
4642
+
4643
+ let mut raw = RawGroup :: default ( ) ;
4644
+
4645
+ for ( i, slot) in raw. iter_mut ( ) . enumerate ( ) {
4646
+ if i % 2 == 0 {
4647
+ * slot = EMPTY ;
4648
+ } else {
4649
+ * slot = DELETED ;
4650
+ }
4651
+ }
4652
+
4653
+ let group = load ( raw) ;
4654
+
4655
+ let is_empty = group. match_empty ( ) ;
4656
+
4657
+ let empty: Vec < _ > = is_empty. into_iter ( ) . collect ( ) ;
4658
+
4659
+ assert_eq ! ( Group :: WIDTH / 2 , empty. len( ) , "{empty:?}" ) ;
4660
+ assert ! ( empty. iter( ) . all( |i| * i % 2 == 0 ) , "{empty:?}" ) ;
4661
+ }
4662
+
4663
+ #[ test]
4664
+ fn test_match_empty_or_deleted ( ) {
4665
+ use :: alloc:: vec:: Vec ;
4666
+
4667
+ let mut raw = RawGroup :: default ( ) ;
4668
+
4669
+ for ( i, slot) in raw. iter_mut ( ) . enumerate ( ) {
4670
+ let value = match i % 4 {
4671
+ 0 => EMPTY ,
4672
+ 1 => 2 ,
4673
+ 2 => DELETED ,
4674
+ 3 => 255 ,
4675
+ _ => unreachable ! ( "i % 4 < 4" ) ,
4676
+ } ;
4677
+
4678
+ * slot = value;
4679
+ }
4680
+
4681
+ let group = load ( raw) ;
4682
+
4683
+ let is_empty_or_deleted = group. match_empty_or_deleted ( ) ;
4684
+
4685
+ let empty_or_deleted: Vec < _ > = is_empty_or_deleted. into_iter ( ) . collect ( ) ;
4686
+
4687
+ assert_eq ! ( Group :: WIDTH / 2 , empty_or_deleted. len( ) , "{empty_or_deleted:?}" ) ;
4688
+ assert ! ( empty_or_deleted. iter( ) . all( |i| * i % 2 == 0 ) , "{empty_or_deleted:?}" ) ;
4689
+ }
4690
+
4691
+ #[ test]
4692
+ fn test_match_full ( ) {
4693
+ use :: alloc:: vec:: Vec ;
4694
+
4695
+ let mut raw = RawGroup :: default ( ) ;
4696
+
4697
+ for ( i, slot) in raw. iter_mut ( ) . enumerate ( ) {
4698
+ let value = match i % 4 {
4699
+ 0 => EMPTY ,
4700
+ 1 => 2 ,
4701
+ 2 => DELETED ,
4702
+ 3 => 255 ,
4703
+ _ => unreachable ! ( "i % 4 < 4" ) ,
4704
+ } ;
4705
+
4706
+ * slot = value;
4707
+ }
4708
+
4709
+ let group = load ( raw) ;
4710
+
4711
+ let is_full = group. match_full ( ) ;
4712
+
4713
+ let full: Vec < _ > = is_full. into_iter ( ) . collect ( ) ;
4714
+
4715
+ assert_eq ! ( Group :: WIDTH / 2 , full. len( ) , "{full:?}" ) ;
4716
+ assert ! ( full. iter( ) . all( |i| * i % 2 != 0 ) , "{full:?}" ) ;
4717
+ }
4718
+
4719
+ #[ test]
4720
+ fn test_convert_special_to_empty_and_full_to_deleted ( ) {
4721
+ use :: alloc:: vec:: Vec ;
4722
+
4723
+ let mut raw = RawGroup :: default ( ) ;
4724
+
4725
+ for ( i, slot) in raw. iter_mut ( ) . enumerate ( ) {
4726
+ let value = match i % 4 {
4727
+ 0 => EMPTY ,
4728
+ 1 => 2 ,
4729
+ 2 => DELETED ,
4730
+ 3 => 255 ,
4731
+ _ => unreachable ! ( "i % 4 < 4" ) ,
4732
+ } ;
4733
+
4734
+ * slot = value;
4735
+ }
4736
+
4737
+ let group = load ( raw) ;
4738
+
4739
+ let converted = group. convert_special_to_empty_and_full_to_deleted ( ) ;
4740
+
4741
+ dbg ! ( store( converted) ) ;
4742
+
4743
+ let empty: Vec < _ > = converted. match_empty ( ) . into_iter ( ) . collect ( ) ;
4744
+
4745
+ assert_eq ! ( Group :: WIDTH / 2 , empty. len( ) , "{empty:?}" ) ;
4746
+ assert ! ( empty. iter( ) . all( |i| * i % 2 == 0 ) , "{empty:?}" ) ;
4747
+
4748
+ let deleted: Vec < _ > = converted. match_byte ( DELETED ) . into_iter ( ) . collect ( ) ;
4749
+
4750
+ assert_eq ! ( Group :: WIDTH / 2 , deleted. len( ) , "{deleted:?}" ) ;
4751
+ assert ! ( deleted. iter( ) . all( |i| * i % 2 != 0 ) , "{deleted:?}" ) ;
4752
+ }
4753
+ }
4754
+
4565
4755
#[ cfg( test) ]
4566
4756
mod test_map {
4567
4757
use super :: * ;
0 commit comments