Skip to content

Commit fbb36b6

Browse files
committed
improve speed of view take kernel
1 parent 2905ce6 commit fbb36b6

File tree

3 files changed

+78
-5
lines changed

3 files changed

+78
-5
lines changed

arrow-select/src/take.rs

+12-5
Original file line numberDiff line numberDiff line change
@@ -485,13 +485,20 @@ fn take_byte_view<T: ByteViewType, IndexType: ArrowPrimitiveType>(
485485
array: &GenericByteViewArray<T>,
486486
indices: &PrimitiveArray<IndexType>,
487487
) -> Result<GenericByteViewArray<T>, ArrowError> {
488+
let data_len = indices.len();
489+
488490
let new_views = take_native(array.views(), indices);
489491
let new_nulls = take_nulls(array.nulls(), indices);
490-
Ok(GenericByteViewArray::new(
491-
new_views,
492-
array.data_buffers().to_vec(),
493-
new_nulls,
494-
))
492+
493+
let array_data = ArrayData::builder(T::DATA_TYPE)
494+
.len(data_len)
495+
.add_buffer(new_views.into_inner())
496+
.add_buffers(array.data_buffers().to_vec())
497+
.nulls(new_nulls);
498+
499+
let array_data = unsafe { array_data.build_unchecked() };
500+
501+
Ok(GenericByteViewArray::from(array_data))
495502
}
496503

497504
/// `take` implementation for list arrays

arrow/benches/take_kernels.rs

+34
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,40 @@ fn add_benchmark(c: &mut Criterion) {
149149
b.iter(|| bench_take(&values, &indices))
150150
});
151151

152+
let values = create_string_view_array(512, 0.0);
153+
let indices = create_random_index(512, 0.0);
154+
c.bench_function("take stringview 512", |b| b.iter(|| bench_take(&values, &indices)));
155+
156+
let values = create_string_view_array(1024, 0.0);
157+
let indices = create_random_index(1024, 0.0);
158+
c.bench_function("take stringview 1024", |b| {
159+
b.iter(|| bench_take(&values, &indices))
160+
});
161+
162+
let values = create_string_view_array(512, 0.0);
163+
let indices = create_random_index(512, 0.5);
164+
c.bench_function("take stringview null indices 512", |b| {
165+
b.iter(|| bench_take(&values, &indices))
166+
});
167+
168+
let values = create_string_view_array(1024, 0.0);
169+
let indices = create_random_index(1024, 0.5);
170+
c.bench_function("take stringview null indices 1024", |b| {
171+
b.iter(|| bench_take(&values, &indices))
172+
});
173+
174+
let values = create_string_view_array(1024, 0.5);
175+
let indices = create_random_index(1024, 0.0);
176+
c.bench_function("take stringview null values 1024", |b| {
177+
b.iter(|| bench_take(&values, &indices))
178+
});
179+
180+
let values = create_string_view_array(1024, 0.5);
181+
let indices = create_random_index(1024, 0.5);
182+
c.bench_function("take stringview null values null indices 1024", |b| {
183+
b.iter(|| bench_take(&values, &indices))
184+
});
185+
152186
let values = create_primitive_run_array::<Int32Type, Int32Type>(1024, 512);
153187
let indices = create_random_index(1024, 0.0);
154188
c.bench_function(

arrow/src/util/bench_util.rs

+32
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,38 @@ pub fn create_string_array_with_len<Offset: OffsetSizeTrait>(
160160
.collect()
161161
}
162162

163+
164+
/// Creates a random (but fixed-seeded) string view array of a given size and null density.
165+
///
166+
/// See `create_string_array` above for more details.
167+
pub fn create_string_view_array(
168+
size: usize,
169+
null_density: f32,
170+
) -> StringViewArray {
171+
create_string_view_array_with_max_len(size, null_density, 400)
172+
}
173+
174+
/// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length
175+
fn create_string_view_array_with_max_len(
176+
size: usize,
177+
null_density: f32,
178+
max_str_len: usize,
179+
) -> StringViewArray {
180+
let rng = &mut seedable_rng();
181+
(0..size)
182+
.map(|_| {
183+
if rng.gen::<f32>() < null_density {
184+
None
185+
} else {
186+
let str_len = rng.gen_range(0..max_str_len);
187+
let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
188+
let value = String::from_utf8(value).unwrap();
189+
Some(value)
190+
}
191+
})
192+
.collect()
193+
}
194+
163195
/// Creates a random (but fixed-seeded) array of a given size, null density and length
164196
pub fn create_string_view_array_with_len(
165197
size: usize,

0 commit comments

Comments
 (0)