|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +//! XX utlities for working with nulls |
| 19 | +
|
| 20 | +use arrow::array::{Array, ArrowNumericType, BooleanArray, PrimitiveArray}; |
| 21 | +use arrow::buffer::NullBuffer; |
| 22 | + |
| 23 | +/// Sets the validity mask for a `PrimitiveArray` to `nulls` |
| 24 | +/// replacing any existing null mask |
| 25 | +pub fn set_nulls<T: ArrowNumericType + Send>( |
| 26 | + array: PrimitiveArray<T>, |
| 27 | + nulls: Option<NullBuffer>, |
| 28 | +) -> PrimitiveArray<T> { |
| 29 | + let (dt, values, _old_nulls) = array.into_parts(); |
| 30 | + PrimitiveArray::<T>::new(values, nulls).with_data_type(dt) |
| 31 | +} |
| 32 | + |
| 33 | +/// Converts a `BooleanBuffer` representing a filter to a `NullBuffer. |
| 34 | +/// |
| 35 | +/// The `NullBuffer` is |
| 36 | +/// * `true` (representing valid) for values that were `true` in filter |
| 37 | +/// * `false` (representing null) for values that were `false` or `null` in filter |
| 38 | +fn filter_to_nulls(filter: &BooleanArray) -> Option<NullBuffer> { |
| 39 | + let (filter_bools, filter_nulls) = filter.clone().into_parts(); |
| 40 | + let filter_bools = NullBuffer::from(filter_bools); |
| 41 | + NullBuffer::union(Some(&filter_bools), filter_nulls.as_ref()) |
| 42 | +} |
| 43 | + |
| 44 | +/// Compute an output validity mask for an array that has been filtered |
| 45 | +/// |
| 46 | +/// This can be used to compute nulls for the output of |
| 47 | +/// [`GroupsAccumulator::convert_to_state`], which quickly applies an optional |
| 48 | +/// filter to the input rows by setting any filtered rows to NULL in the output. |
| 49 | +/// Subsequent applications of aggregate functions that ignore NULLs (most of |
| 50 | +/// them) will thus ignore the filtered rows as well. |
| 51 | +/// |
| 52 | +/// # Output element is `true` |
| 53 | +/// * A `true` in the output represents non null output for all values that were both: |
| 54 | +/// |
| 55 | +/// * `true` in any `opt_filter` (aka values that passed the filter) |
| 56 | +/// |
| 57 | +/// * `non null` in `input` |
| 58 | +/// |
| 59 | +/// # Output element is `false` |
| 60 | +/// * is false (null) for all values that were false in the filter or null in the input |
| 61 | +/// |
| 62 | +/// # Example |
| 63 | +/// |
| 64 | +/// ```text |
| 65 | +/// ┌─────┐ ┌─────┐ ┌─────┐ |
| 66 | +/// │true │ │NULL │ │NULL │ |
| 67 | +/// │true │ │ │true │ │true │ |
| 68 | +/// │true │ ───┼─── │false│ ────────▶ │false│ filtered_nulls |
| 69 | +/// │false│ │ │NULL │ │NULL │ |
| 70 | +/// │false│ │true │ │true │ |
| 71 | +/// └─────┘ └─────┘ └─────┘ |
| 72 | +/// array opt_filter output nulls |
| 73 | +/// .nulls() |
| 74 | +/// |
| 75 | +/// false = NULL true = pass false = NULL Meanings |
| 76 | +/// true = valid false = filter true = valid |
| 77 | +/// NULL = filter |
| 78 | +/// ``` |
| 79 | +/// |
| 80 | +/// [`GroupsAccumulator::convert_to_state`]: datafusion_expr::groups_accumulator::GroupsAccumulator::convert_to_state |
| 81 | +pub fn filtered_null_mask( |
| 82 | + opt_filter: Option<&BooleanArray>, |
| 83 | + input: &dyn Array, |
| 84 | +) -> Option<NullBuffer> { |
| 85 | + let opt_filter = opt_filter.and_then(filter_to_nulls); |
| 86 | + NullBuffer::union(opt_filter.as_ref(), input.nulls()) |
| 87 | +} |
0 commit comments