Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions arrow-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1236,9 +1236,14 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> Vec<usize> {
Encoder::Struct(rows, null) => {
let array = as_struct_array(array);
lengths.iter_mut().enumerate().for_each(|(idx, length)| {
match array.is_valid(idx) {
true => *length += 1 + rows.row(idx).as_ref().len(),
false => *length += 1 + null.data.len(),
if array.is_valid(idx) {
// Only calculate row length if there are rows
if rows.num_rows() > 0 {
*length += rows.row(idx).as_ref().len();
}
*length += 1;
} else {
*length += 1 + null.data.len();
}
});
}
Expand Down Expand Up @@ -1330,9 +1335,18 @@ fn encode_column(
.skip(1)
.enumerate()
.for_each(|(idx, offset)| {
let (row, sentinel) = match array.is_valid(idx) {
true => (rows.row(idx), 0x01),
false => (*null, null_sentinel),
let (row, sentinel) = if array.is_valid(idx) {
let row = if rows.num_rows() == 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am worried about adding a new test on each row of the conversion as that may slow down things significantly

I think we could check if the input array's size was greater than zero and just ignore the offsets if not

            let array = as_struct_array(column);
            if array.len() == 0 {
              return OK(())
            }

Or something

Row {
data: &[],
config: &rows.config,
}
} else {
rows.row(idx)
};
(row, 0x01)
} else {
(*null, null_sentinel)
};
let end_offset = *offset + 1 + row.as_ref().len();
data[*offset] = sentinel;
Expand Down Expand Up @@ -2539,4 +2553,17 @@ mod tests {
let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap();
assert_eq!(rows.row(0).cmp(&rows.row(1)), Ordering::Less);
}

#[test]
fn test_empty_struct() {
let s = Arc::new(StructArray::new_empty_fields(5, None)) as ArrayRef;

let sort_fields = vec![SortField::new(s.data_type().clone())];
let converter = RowConverter::new(sort_fields).unwrap();
let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();

let back = converter.convert_rows(&r).unwrap();
assert_eq!(back.len(), 1);
assert_eq!(&back[0], &s);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also verify we have tests covering empty arrays of other offset based types (such as ListArray, StringArray, etc) ?

}
Loading