Skip to content

Commit 9cb6bec

Browse files
authored
Migrate arrow-row to Rust 2024 (#8488)
# Which issue does this PR close? - Contribute to #6827 # Rationale for this change Splitting up #8227. # What changes are included in this PR? Migrate `arrow-row` to Rust 2024 # Are these changes tested? CI # Are there any user-facing changes? Yes
1 parent 28cf02d commit 9cb6bec

File tree

6 files changed

+49
-55
lines changed

6 files changed

+49
-55
lines changed

arrow-row/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ authors = { workspace = true }
2525
license = { workspace = true }
2626
keywords = { workspace = true }
2727
include = { workspace = true }
28-
edition = { workspace = true }
28+
edition = "2024"
2929
rust-version = { workspace = true }
3030

3131
[lib]
@@ -47,4 +47,3 @@ half = { version = "2.1", default-features = false }
4747
arrow-cast = { workspace = true }
4848
arrow-ord = { workspace = true }
4949
rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
50-

arrow-row/src/fixed.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ use crate::null_sentinel;
2020
use arrow_array::builder::BufferBuilder;
2121
use arrow_array::{ArrowPrimitiveType, BooleanArray, FixedSizeBinaryArray};
2222
use arrow_buffer::{
23-
bit_util, i256, ArrowNativeType, BooleanBuffer, Buffer, IntervalDayTime, IntervalMonthDayNano,
24-
MutableBuffer, NullBuffer,
23+
ArrowNativeType, BooleanBuffer, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer,
24+
NullBuffer, bit_util, i256,
2525
};
2626
use arrow_data::{ArrayData, ArrayDataBuilder};
2727
use arrow_schema::{DataType, SortOptions};
@@ -456,7 +456,7 @@ unsafe fn decode_fixed<T: FixedLengthEncoding + ArrowNativeType>(
456456
.null_bit_buffer(Some(nulls));
457457

458458
// SAFETY: Buffers correct length
459-
builder.build_unchecked()
459+
unsafe { builder.build_unchecked() }
460460
}
461461

462462
/// Decodes a `PrimitiveArray` from rows

arrow-row/src/lib.rs

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -936,7 +936,7 @@ impl RowConverter {
936936
self.fields
937937
.iter()
938938
.zip(&self.codecs)
939-
.map(|(field, codec)| decode_column(field, rows, codec, validate_utf8))
939+
.map(|(field, codec)| unsafe { decode_column(field, rows, codec, validate_utf8) })
940940
.collect()
941941
}
942942

@@ -1685,20 +1685,20 @@ unsafe fn decode_column(
16851685
DataType::LargeBinary => Arc::new(decode_binary::<i64>(rows, options)),
16861686
DataType::BinaryView => Arc::new(decode_binary_view(rows, options)),
16871687
DataType::FixedSizeBinary(size) => Arc::new(decode_fixed_size_binary(rows, size, options)),
1688-
DataType::Utf8 => Arc::new(decode_string::<i32>(rows, options, validate_utf8)),
1689-
DataType::LargeUtf8 => Arc::new(decode_string::<i64>(rows, options, validate_utf8)),
1690-
DataType::Utf8View => Arc::new(decode_string_view(rows, options, validate_utf8)),
1688+
DataType::Utf8 => Arc::new(unsafe{ decode_string::<i32>(rows, options, validate_utf8) }),
1689+
DataType::LargeUtf8 => Arc::new(unsafe { decode_string::<i64>(rows, options, validate_utf8) }),
1690+
DataType::Utf8View => Arc::new(unsafe { decode_string_view(rows, options, validate_utf8) }),
16911691
_ => return Err(ArrowError::NotYetImplemented(format!("unsupported data type: {data_type}" )))
16921692
}
16931693
}
16941694
Codec::Dictionary(converter, _) => {
1695-
let cols = converter.convert_raw(rows, validate_utf8)?;
1695+
let cols = unsafe { converter.convert_raw(rows, validate_utf8) }?;
16961696
cols.into_iter().next().unwrap()
16971697
}
16981698
Codec::Struct(converter, _) => {
16991699
let (null_count, nulls) = fixed::decode_nulls(rows);
17001700
rows.iter_mut().for_each(|row| *row = &row[1..]);
1701-
let children = converter.convert_raw(rows, validate_utf8)?;
1701+
let children = unsafe { converter.convert_raw(rows, validate_utf8) }?;
17021702

17031703
let child_data: Vec<ArrayData> = children.iter().map(|c| c.to_data()).collect();
17041704
// Since RowConverter flattens certain data types (i.e. Dictionary),
@@ -1723,44 +1723,37 @@ unsafe fn decode_column(
17231723
.null_bit_buffer(Some(nulls))
17241724
.child_data(child_data);
17251725

1726-
Arc::new(StructArray::from(builder.build_unchecked()))
1726+
Arc::new(StructArray::from(unsafe { builder.build_unchecked() }))
17271727
}
17281728
Codec::List(converter) => match &field.data_type {
17291729
DataType::List(_) => {
1730-
Arc::new(list::decode::<i32>(converter, rows, field, validate_utf8)?)
1730+
Arc::new(unsafe { list::decode::<i32>(converter, rows, field, validate_utf8) }?)
17311731
}
17321732
DataType::LargeList(_) => {
1733-
Arc::new(list::decode::<i64>(converter, rows, field, validate_utf8)?)
1733+
Arc::new(unsafe { list::decode::<i64>(converter, rows, field, validate_utf8) }?)
17341734
}
1735-
DataType::FixedSizeList(_, value_length) => Arc::new(list::decode_fixed_size_list(
1736-
converter,
1737-
rows,
1738-
field,
1739-
validate_utf8,
1740-
value_length.as_usize(),
1741-
)?),
1742-
_ => unreachable!(),
1743-
},
1744-
Codec::RunEndEncoded(converter) => match &field.data_type {
1745-
DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
1746-
DataType::Int16 => Arc::new(run::decode::<Int16Type>(
1747-
converter,
1748-
rows,
1749-
field,
1750-
validate_utf8,
1751-
)?),
1752-
DataType::Int32 => Arc::new(run::decode::<Int32Type>(
1735+
DataType::FixedSizeList(_, value_length) => Arc::new(unsafe {
1736+
list::decode_fixed_size_list(
17531737
converter,
17541738
rows,
17551739
field,
17561740
validate_utf8,
1757-
)?),
1758-
DataType::Int64 => Arc::new(run::decode::<Int64Type>(
1759-
converter,
1760-
rows,
1761-
field,
1762-
validate_utf8,
1763-
)?),
1741+
value_length.as_usize(),
1742+
)
1743+
}?),
1744+
_ => unreachable!(),
1745+
},
1746+
Codec::RunEndEncoded(converter) => match &field.data_type {
1747+
DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
1748+
DataType::Int16 => Arc::new(unsafe {
1749+
run::decode::<Int16Type>(converter, rows, field, validate_utf8)
1750+
}?),
1751+
DataType::Int32 => Arc::new(unsafe {
1752+
run::decode::<Int32Type>(converter, rows, field, validate_utf8)
1753+
}?),
1754+
DataType::Int64 => Arc::new(unsafe {
1755+
run::decode::<Int64Type>(converter, rows, field, validate_utf8)
1756+
}?),
17641757
_ => unreachable!(),
17651758
},
17661759
_ => unreachable!(),
@@ -1773,13 +1766,13 @@ unsafe fn decode_column(
17731766
mod tests {
17741767
use rand::distr::uniform::SampleUniform;
17751768
use rand::distr::{Distribution, StandardUniform};
1776-
use rand::{rng, Rng};
1769+
use rand::{Rng, rng};
17771770

17781771
use arrow_array::builder::*;
17791772
use arrow_array::types::*;
17801773
use arrow_array::*;
1781-
use arrow_buffer::{i256, NullBuffer};
17821774
use arrow_buffer::{Buffer, OffsetBuffer};
1775+
use arrow_buffer::{NullBuffer, i256};
17831776
use arrow_cast::display::{ArrayFormatter, FormatOptions};
17841777
use arrow_ord::sort::{LexicographicalComparator, SortColumn};
17851778

arrow-row/src/list.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::{fixed, null_sentinel, LengthTracker, RowConverter, Rows, SortField};
19-
use arrow_array::{new_null_array, Array, FixedSizeListArray, GenericListArray, OffsetSizeTrait};
18+
use crate::{LengthTracker, RowConverter, Rows, SortField, fixed, null_sentinel};
19+
use arrow_array::{Array, FixedSizeListArray, GenericListArray, OffsetSizeTrait, new_null_array};
2020
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
2121
use arrow_data::ArrayDataBuilder;
2222
use arrow_schema::{ArrowError, DataType, SortOptions};
@@ -174,7 +174,7 @@ pub unsafe fn decode<O: OffsetSizeTrait>(
174174
})
175175
.collect();
176176

177-
let child = converter.convert_raw(&mut child_rows, validate_utf8)?;
177+
let child = unsafe { converter.convert_raw(&mut child_rows, validate_utf8) }?;
178178
assert_eq!(child.len(), 1);
179179

180180
let child_data = child[0].to_data();
@@ -279,7 +279,7 @@ pub unsafe fn decode_fixed_size_list(
279279
_ => {
280280
return Err(ArrowError::InvalidArgumentError(format!(
281281
"Expected FixedSizeListArray, found: {list_type}",
282-
)))
282+
)));
283283
}
284284
};
285285

@@ -301,7 +301,7 @@ pub unsafe fn decode_fixed_size_list(
301301
} else {
302302
for _ in 0..value_length {
303303
let mut temp_child_rows = vec![&row[row_offset..]];
304-
converter.convert_raw(&mut temp_child_rows, validate_utf8)?;
304+
unsafe { converter.convert_raw(&mut temp_child_rows, validate_utf8) }?;
305305
let decoded_bytes = row.len() - row_offset - temp_child_rows[0].len();
306306
let next_offset = row_offset + decoded_bytes;
307307
child_rows.push(&row[row_offset..next_offset]);
@@ -311,13 +311,15 @@ pub unsafe fn decode_fixed_size_list(
311311
*row = &row[row_offset..]; // Update row for the next decoder
312312
}
313313

314-
let children = converter.convert_raw(&mut child_rows, validate_utf8)?;
314+
let children = unsafe { converter.convert_raw(&mut child_rows, validate_utf8) }?;
315315
let child_data = children.iter().map(|c| c.to_data()).collect();
316316
let builder = ArrayDataBuilder::new(list_type.clone())
317317
.len(len)
318318
.null_count(null_count)
319319
.null_bit_buffer(Some(nulls))
320320
.child_data(child_data);
321321

322-
Ok(FixedSizeListArray::from(builder.build_unchecked()))
322+
Ok(FixedSizeListArray::from(unsafe {
323+
builder.build_unchecked()
324+
}))
323325
}

arrow-row/src/run.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::{variable, RowConverter, Rows, SortField};
18+
use crate::{RowConverter, Rows, SortField, variable};
1919
use arrow_array::types::RunEndIndexType;
2020
use arrow_array::{PrimitiveArray, RunArray};
2121
use arrow_buffer::{ArrowNativeType, ScalarBuffer};
@@ -97,7 +97,7 @@ pub unsafe fn decode<R: RunEndIndexType>(
9797
validate_utf8: bool,
9898
) -> Result<RunArray<R>, ArrowError> {
9999
if rows.is_empty() {
100-
let values = converter.convert_raw(&mut [], validate_utf8)?;
100+
let values = unsafe { converter.convert_raw(&mut [], validate_utf8) }?;
101101
let run_ends_array = PrimitiveArray::<R>::try_new(ScalarBuffer::from(vec![]), None)?;
102102
return RunArray::<R>::try_new(&run_ends_array, &values[0]);
103103
}
@@ -143,9 +143,9 @@ pub unsafe fn decode<R: RunEndIndexType>(
143143
// Convert the unique decoded values using the row converter
144144
let mut unique_rows: Vec<&[u8]> = decoded_values.iter().map(|v| v.as_slice()).collect();
145145
let values = if unique_rows.is_empty() {
146-
converter.convert_raw(&mut [], validate_utf8)?
146+
unsafe { converter.convert_raw(&mut [], validate_utf8) }?
147147
} else {
148-
converter.convert_raw(&mut unique_rows, validate_utf8)?
148+
unsafe { converter.convert_raw(&mut unique_rows, validate_utf8) }?
149149
};
150150

151151
// Create run ends array

arrow-row/src/variable.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
use crate::null_sentinel;
1919
use arrow_array::builder::BufferBuilder;
2020
use arrow_array::*;
21-
use arrow_buffer::bit_util::ceil;
2221
use arrow_buffer::MutableBuffer;
22+
use arrow_buffer::bit_util::ceil;
2323
use arrow_data::{ArrayDataBuilder, MAX_INLINE_VIEW_LEN};
2424
use arrow_schema::{DataType, SortOptions};
2525
use builder::make_view;
@@ -358,7 +358,7 @@ pub unsafe fn decode_string<I: OffsetSizeTrait>(
358358

359359
// SAFETY:
360360
// Row data must have come from a valid UTF-8 array
361-
GenericStringArray::from(builder.build_unchecked())
361+
GenericStringArray::from(unsafe { builder.build_unchecked() })
362362
}
363363

364364
/// Decodes a string view array from `rows` with the provided `options`
@@ -372,5 +372,5 @@ pub unsafe fn decode_string_view(
372372
validate_utf8: bool,
373373
) -> StringViewArray {
374374
let view = decode_binary_view_inner(rows, options, validate_utf8);
375-
view.to_string_view_unchecked()
375+
unsafe { view.to_string_view_unchecked() }
376376
}

0 commit comments

Comments
 (0)