@@ -74,13 +74,6 @@ pub struct NullState<V: SeenValues, O: GroupIndexOperations> {
74
74
}
75
75
76
76
impl < V : SeenValues , O : GroupIndexOperations > NullState < V , O > {
77
- pub fn new ( ) -> Self {
78
- Self {
79
- seen_values : V :: default ( ) ,
80
- _phantom : PhantomData { } ,
81
- }
82
- }
83
-
84
77
/// return the size of all buffers allocated by this null state, not including self
85
78
pub fn size ( & self ) -> usize {
86
79
// capacity is in bits, so convert to bytes
@@ -237,66 +230,10 @@ impl<V: SeenValues, O: GroupIndexOperations> NullState<V, O> {
237
230
pub fn build ( & mut self , emit_to : EmitTo ) -> NullBuffer {
238
231
self . seen_values . emit ( emit_to)
239
232
}
240
-
241
- /// Clone and build a single [`BooleanBuffer`] from `seen_values`,
242
- /// only used for testing.
243
- #[ cfg( test) ]
244
- fn build_cloned_seen_values ( & self ) -> BooleanBuffer {
245
- if let Some ( seen_values) =
246
- self . seen_values . as_any ( ) . downcast_ref :: < FlatSeenValues > ( )
247
- {
248
- seen_values. builder . finish_cloned ( )
249
- } else if let Some ( seen_values) = self
250
- . seen_values
251
- . as_any ( )
252
- . downcast_ref :: < BlockedSeenValues > ( )
253
- {
254
- let mut return_builder = BooleanBufferBuilder :: new ( 0 ) ;
255
- for builder in & seen_values. blocked_builders {
256
- for idx in 0 ..builder. len ( ) {
257
- return_builder. append ( builder. get_bit ( idx) ) ;
258
- }
259
- }
260
- return_builder. finish ( )
261
- } else {
262
- unreachable ! ( "unknown impl of SeenValues" )
263
- }
264
- }
265
-
266
- /// Emit a single [`NullBuffer`], only used for testing.
267
- #[ cfg( test) ]
268
- fn emit_all_in_once ( & mut self , total_num_groups : usize ) -> NullBuffer {
269
- if let Some ( seen_values) =
270
- self . seen_values . as_any ( ) . downcast_ref :: < FlatSeenValues > ( )
271
- {
272
- seen_values. emit ( EmitTo :: All )
273
- } else if let Some ( seen_values) = self
274
- . seen_values
275
- . as_any ( )
276
- . downcast_ref :: < BlockedSeenValues > ( )
277
- {
278
- let mut return_builder = BooleanBufferBuilder :: new ( 0 ) ;
279
- let num_blocks = seen_values. blocked_builders . len ( ) ;
280
- for _ in 0 ..num_blocks {
281
- let blocked_nulls = seen_values. emit ( EmitTo :: NextBlock ( true ) ) ;
282
- for bit in blocked_nulls. inner ( ) . iter ( ) {
283
- return_builder. append ( bit) ;
284
- }
285
- }
286
-
287
- NullBuffer :: new ( return_builder. finish ( ) )
288
- } else {
289
- unreachable ! ( "unknown impl of SeenValues" )
290
- }
291
- }
292
233
}
293
234
294
235
/// Structure marking if accumulating groups are seen at least one
295
236
pub trait SeenValues : Default + Debug + Send {
296
- fn as_any ( & self ) -> & dyn std:: any:: Any {
297
- self
298
- }
299
-
300
237
fn resize ( & mut self , total_num_groups : usize , default_value : bool ) ;
301
238
302
239
fn set_bit ( & mut self , block_id : u32 , block_offset : u64 , value : bool ) ;
@@ -401,6 +338,15 @@ pub struct BlockedSeenValues {
401
338
block_size : usize ,
402
339
}
403
340
341
+ impl BlockedSeenValues {
342
+ pub fn new ( block_size : usize ) -> Self {
343
+ Self {
344
+ blocked_builders : VecDeque :: new ( ) ,
345
+ block_size,
346
+ }
347
+ }
348
+ }
349
+
404
350
impl SeenValues for BlockedSeenValues {
405
351
fn resize ( & mut self , total_num_groups : usize , default_value : bool ) {
406
352
let block_size = self . block_size ;
@@ -471,7 +417,10 @@ impl SeenValues for BlockedSeenValues {
471
417
fn emit ( & mut self , emit_to : EmitTo ) -> NullBuffer {
472
418
assert ! ( matches!( emit_to, EmitTo :: NextBlock ( _) ) ) ;
473
419
474
- let mut block = self . blocked_builders . pop_front ( ) . expect ( "" ) ;
420
+ let mut block = self
421
+ . blocked_builders
422
+ . pop_front ( )
423
+ . expect ( "should not try to emit empty blocks" ) ;
475
424
let nulls = block. finish ( ) ;
476
425
477
426
NullBuffer :: new ( nulls)
@@ -485,9 +434,148 @@ impl SeenValues for BlockedSeenValues {
485
434
}
486
435
}
487
436
437
+ /// Adapter for supporting dynamic dispatching of [`FlatNullState`] and [`BlockedNullState`].
438
+ /// For performance, the cost of batch-level dynamic dispatching is acceptable.
439
+ pub enum NullStateAdapter {
440
+ Flat ( FlatNullState ) ,
441
+ Blocked ( BlockedNullState ) ,
442
+ }
443
+
444
+ impl NullStateAdapter {
445
+ pub fn new ( block_size : Option < usize > ) -> Self {
446
+ if let Some ( blk_size) = block_size {
447
+ Self :: Blocked ( BlockedNullState :: new ( blk_size) )
448
+ } else {
449
+ Self :: Flat ( FlatNullState :: new ( ) )
450
+ }
451
+ }
452
+
453
+ pub fn accumulate < T , F > (
454
+ & mut self ,
455
+ group_indices : & [ usize ] ,
456
+ values : & PrimitiveArray < T > ,
457
+ opt_filter : Option < & BooleanArray > ,
458
+ total_num_groups : usize ,
459
+ value_fn : F ,
460
+ ) where
461
+ T : ArrowPrimitiveType + Send ,
462
+ F : FnMut ( u32 , u64 , T :: Native ) + Send ,
463
+ {
464
+ match self {
465
+ NullStateAdapter :: Flat ( null_state) => null_state. accumulate (
466
+ group_indices,
467
+ values,
468
+ opt_filter,
469
+ total_num_groups,
470
+ value_fn,
471
+ ) ,
472
+ NullStateAdapter :: Blocked ( null_state) => null_state. accumulate (
473
+ group_indices,
474
+ values,
475
+ opt_filter,
476
+ total_num_groups,
477
+ value_fn,
478
+ ) ,
479
+ }
480
+ }
481
+
482
+ pub fn accumulate_boolean < F > (
483
+ & mut self ,
484
+ group_indices : & [ usize ] ,
485
+ values : & BooleanArray ,
486
+ opt_filter : Option < & BooleanArray > ,
487
+ total_num_groups : usize ,
488
+ value_fn : F ,
489
+ ) where
490
+ F : FnMut ( u32 , u64 , bool ) + Send ,
491
+ {
492
+ match self {
493
+ NullStateAdapter :: Flat ( null_state) => null_state. accumulate_boolean (
494
+ group_indices,
495
+ values,
496
+ opt_filter,
497
+ total_num_groups,
498
+ value_fn,
499
+ ) ,
500
+ NullStateAdapter :: Blocked ( null_state) => null_state. accumulate_boolean (
501
+ group_indices,
502
+ values,
503
+ opt_filter,
504
+ total_num_groups,
505
+ value_fn,
506
+ ) ,
507
+ }
508
+ }
509
+
510
+ pub fn build ( & mut self , emit_to : EmitTo ) -> NullBuffer {
511
+ match self {
512
+ NullStateAdapter :: Flat ( null_state) => null_state. build ( emit_to) ,
513
+ NullStateAdapter :: Blocked ( null_state) => null_state. build ( emit_to) ,
514
+ }
515
+ }
516
+
517
+ /// Clone and build a single [`BooleanBuffer`] from `seen_values`,
518
+ /// only used for testing.
519
+ #[ cfg( test) ]
520
+ fn build_cloned_seen_values ( & self ) -> BooleanBuffer {
521
+ match self {
522
+ NullStateAdapter :: Flat ( null_state) => {
523
+ null_state. seen_values . builder . finish_cloned ( )
524
+ }
525
+ NullStateAdapter :: Blocked ( null_state) => {
526
+ let mut return_builder = BooleanBufferBuilder :: new ( 0 ) ;
527
+ for builder in & null_state. seen_values . blocked_builders {
528
+ for idx in 0 ..builder. len ( ) {
529
+ return_builder. append ( builder. get_bit ( idx) ) ;
530
+ }
531
+ }
532
+ return_builder. finish ( )
533
+ }
534
+ }
535
+ }
536
+
537
+ #[ cfg( test) ]
538
+ fn build_all_in_once ( & mut self ) -> NullBuffer {
539
+ match self {
540
+ NullStateAdapter :: Flat ( null_state) => null_state. build ( EmitTo :: All ) ,
541
+ NullStateAdapter :: Blocked ( null_state) => {
542
+ let mut return_builder = BooleanBufferBuilder :: new ( 0 ) ;
543
+ let num_blocks = null_state. seen_values . blocked_builders . len ( ) ;
544
+ for _ in 0 ..num_blocks {
545
+ let blocked_nulls = null_state. build ( EmitTo :: NextBlock ( true ) ) ;
546
+ for bit in blocked_nulls. inner ( ) . iter ( ) {
547
+ return_builder. append ( bit) ;
548
+ }
549
+ }
550
+
551
+ NullBuffer :: new ( return_builder. finish ( ) )
552
+ }
553
+ }
554
+ }
555
+ }
556
+
488
557
pub type FlatNullState = NullState < FlatSeenValues , FlatGroupIndexOperations > ;
558
+
559
+ impl FlatNullState {
560
+ pub fn new ( ) -> Self {
561
+ Self {
562
+ seen_values : FlatSeenValues :: default ( ) ,
563
+ _phantom : PhantomData { } ,
564
+ }
565
+ }
566
+ }
567
+
489
568
pub type BlockedNullState = NullState < BlockedSeenValues , BlockedGroupIndexOperations > ;
490
569
570
+ impl BlockedNullState {
571
+ pub fn new ( block_size : usize ) -> Self {
572
+ Self {
573
+ seen_values : BlockedSeenValues :: new ( block_size) ,
574
+ _phantom : PhantomData { } ,
575
+ }
576
+ }
577
+ }
578
+
491
579
/// Invokes `value_fn(group_index, value)` for each non null, non
492
580
/// filtered value of `value`,
493
581
///
@@ -873,6 +961,7 @@ mod test {
873
961
values,
874
962
values_with_nulls,
875
963
filter,
964
+ block_size : None ,
876
965
}
877
966
. run ( )
878
967
}
@@ -953,6 +1042,7 @@ mod test {
953
1042
values,
954
1043
values_with_nulls,
955
1044
filter,
1045
+ block_size : None ,
956
1046
}
957
1047
}
958
1048
@@ -977,14 +1067,21 @@ mod test {
977
1067
let filter = & self . filter ;
978
1068
979
1069
// no null, no filters
980
- Self :: accumulate_test ( group_indices, & values_array, None , total_num_groups) ;
1070
+ Self :: accumulate_test (
1071
+ group_indices,
1072
+ & values_array,
1073
+ None ,
1074
+ total_num_groups,
1075
+ self . block_size ,
1076
+ ) ;
981
1077
982
1078
// nulls, no filters
983
1079
Self :: accumulate_test (
984
1080
group_indices,
985
1081
& values_with_nulls_array,
986
1082
None ,
987
1083
total_num_groups,
1084
+ self . block_size ,
988
1085
) ;
989
1086
990
1087
// no nulls, filters
@@ -993,6 +1090,7 @@ mod test {
993
1090
& values_array,
994
1091
Some ( filter) ,
995
1092
total_num_groups,
1093
+ self . block_size ,
996
1094
) ;
997
1095
998
1096
// nulls, filters
@@ -1001,6 +1099,7 @@ mod test {
1001
1099
& values_with_nulls_array,
1002
1100
Some ( filter) ,
1003
1101
total_num_groups,
1102
+ self . block_size ,
1004
1103
) ;
1005
1104
}
1006
1105
@@ -1012,12 +1111,14 @@ mod test {
1012
1111
values : & UInt32Array ,
1013
1112
opt_filter : Option < & BooleanArray > ,
1014
1113
total_num_groups : usize ,
1114
+ block_size : Option < usize > ,
1015
1115
) {
1016
1116
Self :: accumulate_values_test (
1017
1117
group_indices,
1018
1118
values,
1019
1119
opt_filter,
1020
1120
total_num_groups,
1121
+ block_size,
1021
1122
) ;
1022
1123
Self :: accumulate_indices_test ( group_indices, values. nulls ( ) , opt_filter) ;
1023
1124
@@ -1041,17 +1142,44 @@ mod test {
1041
1142
values : & UInt32Array ,
1042
1143
opt_filter : Option < & BooleanArray > ,
1043
1144
total_num_groups : usize ,
1145
+ block_size : Option < usize > ,
1044
1146
) {
1045
1147
let mut accumulated_values = vec ! [ ] ;
1046
- let mut null_state = FlatNullState :: new ( ) ;
1148
+ let ( mut null_state, block_size, acc_group_indices) = if let Some ( blk_size) =
1149
+ block_size
1150
+ {
1151
+ let acc_group_indices = group_indices
1152
+ . iter ( )
1153
+ . copied ( )
1154
+ . map ( |index| {
1155
+ let block_id = ( index / blk_size) as u32 ;
1156
+ let block_offset = ( index % blk_size) as u64 ;
1157
+ BlockedGroupIndexOperations :: pack_index ( block_id, block_offset)
1158
+ as usize
1159
+ } )
1160
+ . collect :: < Vec < _ > > ( ) ;
1161
+ (
1162
+ NullStateAdapter :: new ( Some ( blk_size) ) ,
1163
+ blk_size,
1164
+ acc_group_indices,
1165
+ )
1166
+ } else {
1167
+ (
1168
+ NullStateAdapter :: new ( None ) ,
1169
+ 0 ,
1170
+ group_indices. iter ( ) . copied ( ) . collect ( ) ,
1171
+ )
1172
+ } ;
1047
1173
1048
1174
null_state. accumulate (
1049
- group_indices ,
1175
+ & acc_group_indices ,
1050
1176
values,
1051
1177
opt_filter,
1052
1178
total_num_groups,
1053
- |_, group_index, value| {
1054
- accumulated_values. push ( ( group_index as usize , value) ) ;
1179
+ |block_id, block_offset, value| {
1180
+ let flatten_index =
1181
+ ( ( block_id as u64 * block_size as u64 ) + block_offset) as usize ;
1182
+ accumulated_values. push ( ( flatten_index as usize , value) ) ;
1055
1183
} ,
1056
1184
) ;
1057
1185
@@ -1087,13 +1215,13 @@ mod test {
1087
1215
1088
1216
assert_eq ! ( accumulated_values, expected_values,
1089
1217
"\n \n accumulated_values:{accumulated_values:#?}\n \n expected_values:{expected_values:#?}" ) ;
1090
- let seen_values = null_state. seen_values . builder . finish_cloned ( ) ;
1218
+ let seen_values = null_state. build_cloned_seen_values ( ) ;
1091
1219
mock. validate_seen_values ( & seen_values) ;
1092
1220
1093
1221
// Validate the final buffer (one value per group)
1094
1222
let expected_null_buffer = mock. expected_null_buffer ( total_num_groups) ;
1095
1223
1096
- let null_buffer = null_state. build ( EmitTo :: All ) ;
1224
+ let null_buffer = null_state. build_all_in_once ( ) ;
1097
1225
1098
1226
assert_eq ! ( null_buffer, expected_null_buffer) ;
1099
1227
}
0 commit comments