Skip to content

Commit

Permalink
arrow-ord: add support for partitioning nested types
Browse files Browse the repository at this point in the history
This support is currently incorrectly assumed by `BoundedWindowAggExec`, so
partitioning on a nested type (e.g. struct) causes a nested comparison failure
on execution.

This commit adds a check to use distinct on non-nested types and falls back to
using make_comparator on nested types.
  • Loading branch information
asubiotto committed Feb 13, 2025
1 parent d4b9482 commit 07a5285
Showing 1 changed file with 29 additions and 2 deletions.
31 changes: 29 additions & 2 deletions arrow-ord/src/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ use std::ops::Range;

use arrow_array::{Array, ArrayRef};
use arrow_buffer::BooleanBuffer;
use arrow_schema::ArrowError;
use arrow_schema::{ArrowError, SortOptions};

use crate::cmp::distinct;
use crate::ord::make_comparator;

/// A computed set of partitions, see [`partition`]
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -156,7 +157,14 @@ fn find_boundaries(v: &dyn Array) -> Result<BooleanBuffer, ArrowError> {
let slice_len = v.len() - 1;
let v1 = v.slice(0, slice_len);
let v2 = v.slice(1, slice_len);
Ok(distinct(&v1, &v2)?.values().clone())

if !v.data_type().is_nested() {
return Ok(distinct(&v1, &v2)?.values().clone());
}
// Given that we're only comparing values, null ordering in the input or
// sort options do not matter.
let cmp = make_comparator(&v1, &v2, SortOptions::default())?;
Ok((0..slice_len).map(|i| !cmp(i, i).is_eq()).collect())
}

#[cfg(test)]
Expand Down Expand Up @@ -298,4 +306,23 @@ mod tests {
vec![(0..1), (1..2), (2..4), (4..5), (5..7), (7..8), (8..9)],
);
}

#[test]
fn test_partition_nested() {
let ints: ArrayRef = Arc::new(Int64Array::from(vec![
None,
None,
Some(1),
Some(2),
Some(2),
Some(2),
Some(3),
Some(4),
]));
let input = vec![Arc::new(StructArray::try_from(vec![("f1", ints)]).unwrap()) as _];
assert_eq!(
partition(&input).unwrap().ranges(),
vec![0..2, 2..3, 3..6, 6..7, 7..8]
)
}
}

0 comments on commit 07a5285

Please sign in to comment.