Skip to content

Commit

Permalink
Improve ListArray documentation for slices (#7039)
Browse files Browse the repository at this point in the history
* Improve ListArray documentation for slices

* more

* ASCII ART

* Update arrow-array/src/array/list_array.rs

Co-authored-by: Raphael Taylor-Davies <[email protected]>

* Update arrow-array/src/array/list_array.rs

Co-authored-by: Raz Luvaton <[email protected]>

* Apply suggestions from code review

Co-authored-by: Raz Luvaton <[email protected]>

* Fix diagram

---------

Co-authored-by: Raphael Taylor-Davies <[email protected]>
Co-authored-by: Raz Luvaton <[email protected]>
  • Loading branch information
3 people authored Feb 2, 2025
1 parent 8baaa8b commit fdbef3f
Showing 1 changed file with 52 additions and 4 deletions.
56 changes: 52 additions & 4 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,41 @@ impl OffsetSizeTrait for i64 {
/// (offsets[i], │ ListArray (Array)
/// offsets[i+1]) └ ─ ─ ─ ─ ─ ─ ┘ │
/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
/// ```
///
/// # Slicing
///
/// Slicing a `ListArray` creates a new `ListArray` without copying any data,
/// but this means the [`Self::values`] and [`Self::offsets`] may have "unused" data
///
/// For example, calling `slice(1, 3)` on the `ListArray` in the above example
/// would result in the following. Note
///
/// 1. `Values` array is unchanged
/// 2. `Offsets` do not start at `0`, nor cover all values in the Values array.
///
/// ```text
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
/// ┌ ─ ─ ─ ─ ─ ─ ┐ │ ╔═══╗
/// │ ╔═══╗ ╔═══╗ ║ ║ Not used
/// │ ║ 1 ║ ║ A ║ │ 0 │ ╚═══╝
/// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ╠═══╣ ╠═══╣
/// │ [] (empty) │ │ (3,3) │ │ 1 │ │ 3 │ │ ║ 1 ║ ║ B ║ │ 1 │
/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ╠═══╣ ╠═══╣
/// │ NULL │ │ (3,4) │ │ 0 │ │ 3 │ │ ║ 1 ║ ║ C ║ │ 2 │
/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ╠───╣ ╠───╣
/// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ │ 0 │ │ ? │ │ 3 │
/// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤
/// │ 5 │ │ │ 1 │ │ D │ │ 4 │
/// │ └───┘ ├───┤ ├───┤
/// │ │ 0 │ │ ? │ │ 5 │
/// │ Validity ╠═══╣ ╠═══╣
/// Logical Logical (nulls) Offsets │ ║ 1 ║ ║ F ║ │ 6 │
/// Values Offsets │ ╚═══╝ ╚═══╝
/// │ Values │ │
/// (offsets[i], │ ListArray (Array)
/// offsets[i+1]) └ ─ ─ ─ ─ ─ ─ ┘ │
/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
/// ```
///
/// [`StringArray`]: crate::array::StringArray
Expand Down Expand Up @@ -263,13 +296,22 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
/// Returns a reference to the offsets of this list
///
/// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
/// allowing for zero-copy cloning
/// allowing for zero-copy cloning.
///
/// Notes: The `offsets` may not start at 0 and may not cover all values in
/// [`Self::values`]. This can happen when the list array was sliced via
/// [`Self::slice`]. See documentation for [`Self`] for more details.
#[inline]
pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
&self.value_offsets
}

/// Returns a reference to the values of this list
///
/// Note: The list array may not refer to all values in the `values` array.
/// For example if the list array was sliced via [`Self::slice`] values will
/// still contain values both before and after the slice. See documentation
/// for [`Self`] for more details.
#[inline]
pub fn values(&self) -> &ArrayRef {
&self.values
Expand All @@ -296,7 +338,9 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
self.values.slice(start, end - start)
}

/// Returns the offset values in the offsets buffer
/// Returns the offset values in the offsets buffer.
///
/// See [`Self::offsets`] for more details.
#[inline]
pub fn value_offsets(&self) -> &[OffsetSize] {
&self.value_offsets
Expand Down Expand Up @@ -325,6 +369,10 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
}

/// Returns a zero-copy slice of this array with the indicated offset and length.
///
/// Notes: this method does *NOT* slice the underlying values array or modify
/// the values in the offsets buffer. See [`Self::values`] and
/// [`Self::offsets`] for more information.
pub fn slice(&self, offset: usize, length: usize) -> Self {
Self {
data_type: self.data_type.clone(),
Expand Down Expand Up @@ -556,12 +604,12 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSiz

/// A [`GenericListArray`] of variable size lists, storing offsets as `i32`.
///
// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a [`ListArray`]
/// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a [`ListArray`]
pub type ListArray = GenericListArray<i32>;

/// A [`GenericListArray`] of variable size lists, storing offsets as `i64`.
///
// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`]
/// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`]
pub type LargeListArray = GenericListArray<i64>;

#[cfg(test)]
Expand Down

0 comments on commit fdbef3f

Please sign in to comment.