1
- use std:: arch:: x86_64:: { _pdep_u64, _pext_u64 } ;
1
+ use std:: arch:: x86_64:: _pdep_u64;
2
2
3
3
use crate :: virtual_bitrank:: VirtualBitRank ;
4
4
@@ -21,10 +21,10 @@ impl ParallelTrie {
21
21
// !("fill_bit_rank {prefix} {mask:064b} {level}");
22
22
for t in [ 0 , 64 << level] {
23
23
let mut sub_mask = 0 ;
24
- for i in 0 .. 64 {
25
- if ( 1 << i ) & mask = = 0 {
26
- continue ;
27
- }
24
+ let mut mask = mask ;
25
+ while mask ! = 0 {
26
+ let i = mask . trailing_zeros ( ) as usize ;
27
+ mask &= mask - 1 ;
28
28
if let Some ( & value) = slices[ i] . get ( 0 ) {
29
29
if ( value ^ prefix) >> ( level + 7 ) == 0 && value & ( 64 << level) == t {
30
30
if WRITE {
@@ -134,19 +134,46 @@ impl ParallelTrie {
134
134
}
135
135
} else {
136
136
let required_bits = word. count_ones ( ) ;
137
- if required_bits == 0 {
138
- return ;
137
+ let mut new_rank = self . data . rank ( rank) as usize + self . root_ones ;
138
+
139
+ if required_bits == 1 {
140
+ // TODO: simply switch to single bit recursion here instead of checking on every level again.
141
+ // NOTE: we cannot easily read here a nibble, since the rank is only a multiple of 2, but not necessarily of 4.
142
+ let mut w = self . data . get_word ( rank) & 3 ;
143
+ while w != 0 {
144
+ let zeros = w. trailing_zeros ( ) ;
145
+ w &= w - 1 ;
146
+ self . recurse ( pos * 2 + zeros as usize , word, new_rank * 2 , level - 1 , v) ;
147
+ new_rank += 1 ;
148
+ }
149
+ } else if required_bits <= 32 {
150
+ let w = self . data . get_word ( rank) ;
151
+ let new_word = unsafe { _pdep_u64 ( w, word) } ;
152
+ if new_word != 0 {
153
+ self . recurse ( pos * 2 , new_word, new_rank * 2 , level - 1 , v) ;
154
+ new_rank += new_word. count_ones ( ) as usize ;
155
+ }
156
+
157
+ let w = w >> required_bits;
158
+ let new_word = unsafe { _pdep_u64 ( w, word) } ;
159
+ if new_word != 0 {
160
+ self . recurse ( pos * 2 + 1 , new_word, new_rank * 2 , level - 1 , v) ;
161
+ }
162
+ } else {
163
+ let w = self . data . get_word ( rank) ;
164
+ let new_word = unsafe { _pdep_u64 ( w, word) } ;
165
+ if new_word != 0 {
166
+ self . recurse ( pos * 2 , new_word, new_rank * 2 , level - 1 , v) ;
167
+ new_rank += new_word. count_ones ( ) as usize ;
168
+ }
169
+
170
+ let rank = rank + required_bits as usize ;
171
+ let w = self . data . get_word ( rank) ;
172
+ let new_word = unsafe { _pdep_u64 ( w, word) } ;
173
+ if new_word != 0 {
174
+ self . recurse ( pos * 2 + 1 , new_word, new_rank * 2 , level - 1 , v) ;
175
+ }
139
176
}
140
- let w = self . data . get_word ( rank) ;
141
- let new_word = unsafe { _pdep_u64 ( w, word) } ;
142
- let new_rank = self . data . rank ( rank) as usize + self . root_ones ;
143
- self . recurse ( pos * 2 , new_word, new_rank * 2 , level - 1 , v) ;
144
-
145
- let rank = rank + required_bits as usize ;
146
- let w = self . data . get_word ( rank) ;
147
- let new_word = unsafe { _pdep_u64 ( w, word) } ;
148
- let new_rank = self . data . rank ( rank) as usize + self . root_ones ;
149
- self . recurse ( pos * 2 + 1 , new_word, new_rank * 2 , level - 1 , v) ;
150
177
}
151
178
}
152
179
}
@@ -155,19 +182,15 @@ impl ParallelTrie {
155
182
mod tests {
156
183
use std:: time:: Instant ;
157
184
158
- use itertools:: { kmerge, Itertools } ;
159
185
use rand:: { thread_rng, Rng } ;
160
186
161
- use crate :: {
162
- parallel:: ParallelTrie , Intersection , Layout , QuarternaryTrie , TrieIterator , TrieTraversal ,
163
- Union ,
164
- } ;
187
+ use crate :: parallel:: ParallelTrie ;
165
188
166
189
#[ test]
167
- fn test_parallel ( ) {
190
+ fn test_parallel_large ( ) {
168
191
// let values = vec![3, 6, 7, 10, 90, 91, 120, 128, 129, 130, 231, 321, 999];
169
192
// let values = vec![3, 6, 7, 321, 999];
170
- let mut values: Vec < _ > = ( 0 ..10_000_000 )
193
+ let mut values: Vec < _ > = ( 0 ..100_000 )
171
194
. map ( |_| thread_rng ( ) . gen_range ( 0 ..100_000_000 ) )
172
195
. collect ( ) ;
173
196
values. sort ( ) ;
0 commit comments