@@ -115,6 +115,9 @@ pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
115
115
impl < T : ArrowPrimitiveType > GroupValuesPrimitive < T > {
116
116
pub fn new ( data_type : DataType ) -> Self {
117
117
assert ! ( PrimitiveArray :: <T >:: is_compatible( & data_type) ) ;
118
+
119
+ // As a optimization, we ensure the `single block` always exist
120
+ // in flat mode, it can eliminate an expansive row-level empty checking
118
121
let mut values = VecDeque :: new ( ) ;
119
122
values. push_back ( Vec :: new ( ) ) ;
120
123
@@ -245,74 +248,15 @@ where
245
248
// ===============================================
246
249
// Emitting in blocked mode
247
250
// ===============================================
248
- // TODO: we should consider if it is necessary to support indices modifying
249
- // in `EmitTo::NextBlock`. It is only used in spilling case, maybe we can
250
- // always emit all in blocked mode. So, we just need to clear the map rather
251
- // than doing expansive modification for each buck in it.
252
- EmitTo :: NextBlock ( true ) => {
251
+ EmitTo :: NextBlock => {
253
252
assert ! (
254
253
self . block_size. is_some( ) ,
255
254
"only support EmitTo::Next in blocked group values"
256
255
) ;
257
256
258
- // We only emit the first block(`block_id == 0`),
259
- // so erase the entries with `block_id == 0`, and decrease entries with `block_id > 0`
260
- self . map . retain ( |packed_idx| {
261
- let old_blk_id =
262
- BlockedGroupIndexOperations :: get_block_id ( * packed_idx) ;
263
- match old_blk_id. checked_sub ( 1 ) {
264
- // `block_id > 0`, shift `block_id` down
265
- Some ( new_blk_id) => {
266
- let blk_offset =
267
- BlockedGroupIndexOperations :: get_block_offset (
268
- * packed_idx,
269
- ) ;
270
- let new_packed_idx = BlockedGroupIndexOperations :: pack_index (
271
- new_blk_id as u32 ,
272
- blk_offset,
273
- ) ;
274
- * packed_idx = new_packed_idx;
275
-
276
- true
277
- }
278
-
279
- // `block_id == 0`, so remove from table
280
- None => false ,
281
- }
282
- } ) ;
283
-
284
- // Similar as `non-nulls`, if `block_id > 0` we decrease, and if `block_id == 0` we erase
285
- let null_block_pair_opt = self . null_group . map ( |packed_idx| {
286
- (
287
- BlockedGroupIndexOperations :: get_block_id ( packed_idx) ,
288
- BlockedGroupIndexOperations :: get_block_offset ( packed_idx) ,
289
- )
290
- } ) ;
291
- let null_idx = match null_block_pair_opt {
292
- Some ( ( blk_id, blk_offset) ) if blk_id > 0 => {
293
- let new_blk_id = blk_id - 1 ;
294
- let new_packed_idx = BlockedGroupIndexOperations :: pack_index (
295
- new_blk_id, blk_offset,
296
- ) ;
297
- self . null_group = Some ( new_packed_idx) ;
298
- None
299
- }
300
- Some ( ( _, blk_offset) ) => {
301
- self . null_group = None ;
302
- Some ( blk_offset as usize )
303
- }
304
- None => None ,
305
- } ;
306
-
307
- let emit_blk = self . values . pop_front ( ) . unwrap ( ) ;
308
- build_primitive ( emit_blk, null_idx)
309
- }
310
-
311
- EmitTo :: NextBlock ( false ) => {
312
- assert ! (
313
- self . block_size. is_some( ) ,
314
- "only support EmitTo::Next in blocked group values"
315
- ) ;
257
+ // Similar as `EmitTo:All`, we will clear the old index infos both
258
+ // in `map` and `null_group`
259
+ self . map . clear ( ) ;
316
260
317
261
let null_block_pair_opt = self . null_group . map ( |packed_idx| {
318
262
(
@@ -359,6 +303,25 @@ where
359
303
self . map . clear ( ) ;
360
304
self . map . shrink_to ( count, |_| 0 ) ; // hasher does not matter since the map is cleared
361
305
}
306
+
307
+ fn supports_blocked_groups ( & self ) -> bool {
308
+ true
309
+ }
310
+
311
+ fn alter_block_size ( & mut self , block_size : Option < usize > ) -> Result < ( ) > {
312
+ self . map . clear ( ) ;
313
+ self . values . clear ( ) ;
314
+ self . null_group = None ;
315
+ self . block_size = block_size;
316
+
317
+ // As mentioned above, we ensure the `single block` always exist
318
+ // in `flat mode`
319
+ if block_size. is_none ( ) {
320
+ self . values . push_back ( Vec :: new ( ) ) ;
321
+ }
322
+
323
+ Ok ( ( ) )
324
+ }
362
325
}
363
326
364
327
impl < T : ArrowPrimitiveType > GroupValuesPrimitive < T >
0 commit comments