Skip to content

Commit f88921c

Browse files
authored
Add benchmarks for FromIter (PrimitiveArray and BooleanArray) (#8525)
# Which issue does this PR close? - Relates to #8505 . - I want to see any performance regressions to `BooleanArray::from_iter`. # Rationale for this change Add microbenchmarks for observing the performance of `XYZArray::from_iter`. On my machine, executing the benchmarks back to back results in deviations within 1% . ``` Int64Array::from_iter time: [14.292 µs 14.297 µs 14.303 µs] change: [-0.0049% +0.1290% +0.2631%] (p = 0.06 > 0.05) No change in performance detected. Found 26 outliers among 100 measurements (26.00%) 1 (1.00%) low severe 3 (3.00%) low mild 9 (9.00%) high mild 13 (13.00%) high severe Int64Array::from_trusted_len_iter time: [6.7355 µs 6.7472 µs 6.7628 µs] change: [+0.0215% +0.1868% +0.3739%] (p = 0.03 < 0.05) Change within noise threshold. Found 11 outliers among 100 measurements (11.00%) 4 (4.00%) high mild 7 (7.00%) high severe BooleanArray::from_iter time: [7.3389 µs 7.3596 µs 7.3861 µs] change: [-1.3820% -0.8065% -0.2803%] (p = 0.00 < 0.05) Change within noise threshold. Found 16 outliers among 100 measurements (16.00%) 9 (9.00%) high mild 7 (7.00%) high severe ``` # What changes are included in this PR? Only benchmarks # Are these changes tested? Functionality is tested in the implementation file. # Are there any user-facing changes? None
1 parent 4b62c80 commit f88921c

File tree

2 files changed

+41
-4
lines changed

2 files changed

+41
-4
lines changed

arrow/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ harness = false
120120
required-features = ["test_utils"]
121121

122122
[[bench]]
123-
name = "array_from_vec"
123+
name = "array_from"
124124
harness = false
125125

126126
[[bench]]

arrow/benches/array_from_vec.rs renamed to arrow/benches/array_from.rs

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ extern crate arrow;
2525
use arrow::array::*;
2626
use arrow_buffer::i256;
2727
use rand::Rng;
28+
use std::iter::repeat_n;
2829
use std::{hint, sync::Arc};
2930

3031
fn array_from_vec(n: usize) {
@@ -117,7 +118,7 @@ fn decimal256_array_from_vec(array: &[Option<i256>]) {
117118
);
118119
}
119120

120-
fn decimal_benchmark(c: &mut Criterion) {
121+
fn array_from_vec_decimal_benchmark(c: &mut Criterion) {
121122
// bench decimal32 array
122123
// create option<i32> array
123124
let size: usize = 1 << 15;
@@ -170,7 +171,7 @@ fn decimal_benchmark(c: &mut Criterion) {
170171
});
171172
}
172173

173-
fn criterion_benchmark(c: &mut Criterion) {
174+
fn array_from_vec_benchmark(c: &mut Criterion) {
174175
c.bench_function("array_from_vec 128", |b| b.iter(|| array_from_vec(128)));
175176
c.bench_function("array_from_vec 256", |b| b.iter(|| array_from_vec(256)));
176177
c.bench_function("array_from_vec 512", |b| b.iter(|| array_from_vec(512)));
@@ -206,5 +207,41 @@ fn criterion_benchmark(c: &mut Criterion) {
206207
});
207208
}
208209

209-
criterion_group!(benches, criterion_benchmark, decimal_benchmark);
210+
fn gen_option_vector<TItem: Copy>(item: TItem, len: usize) -> Vec<Option<TItem>> {
211+
hint::black_box(
212+
repeat_n(item, len)
213+
.enumerate()
214+
.map(|(idx, item)| if idx % 3 == 0 { None } else { Some(item) })
215+
.collect(),
216+
)
217+
}
218+
219+
fn from_iter_benchmark(c: &mut Criterion) {
220+
const ITER_LEN: usize = 16_384;
221+
222+
// All ArrowPrimitiveType use the same implementation
223+
c.bench_function("Int64Array::from_iter", |b| {
224+
let values = gen_option_vector(1, ITER_LEN);
225+
b.iter(|| hint::black_box(Int64Array::from_iter(values.iter())));
226+
});
227+
c.bench_function("Int64Array::from_trusted_len_iter", |b| {
228+
let values = gen_option_vector(1, ITER_LEN);
229+
b.iter(|| unsafe {
230+
// SAFETY: values.iter() is a TrustedLenIterator
231+
hint::black_box(Int64Array::from_trusted_len_iter(values.iter()))
232+
});
233+
});
234+
235+
c.bench_function("BooleanArray::from_iter", |b| {
236+
let values = gen_option_vector(true, ITER_LEN);
237+
b.iter(|| hint::black_box(BooleanArray::from_iter(values.iter())));
238+
});
239+
}
240+
241+
criterion_group!(
242+
benches,
243+
array_from_vec_benchmark,
244+
array_from_vec_decimal_benchmark,
245+
from_iter_benchmark
246+
);
210247
criterion_main!(benches);

0 commit comments

Comments
 (0)