Skip to content

Commit 04cd6cf

Browse files
authored
Migrate arrow-data to Rust 2024 (#8455)
# Which issue does this PR close? - Contribute to #6827 # Rationale for this change Splitting up #8227. # What changes are included in this PR? Migrate `arrow-data` to Rust 2024 # Are these changes tested? CI # Are there any user-facing changes? Yes
1 parent d1038bf commit 04cd6cf

23 files changed

+66
-53
lines changed

arrow-data/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ authors = { workspace = true }
2525
license = { workspace = true }
2626
keywords = { workspace = true }
2727
include = { workspace = true }
28-
edition = { workspace = true }
28+
edition = "2024"
2929
rust-version = { workspace = true }
3030

3131
[lib]

arrow-data/src/data.rs

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
use crate::bit_iterator::BitSliceIterator;
2222
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
2323
use arrow_buffer::{
24-
bit_util, i256, ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer,
24+
ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, bit_util, i256,
2525
};
2626
use arrow_schema::{ArrowError, DataType, UnionMode};
2727
use std::mem;
@@ -281,7 +281,7 @@ impl ArrayData {
281281
) -> Self {
282282
let mut skip_validation = UnsafeFlag::new();
283283
// SAFETY: caller responsible for ensuring data is valid
284-
skip_validation.set(true);
284+
unsafe { skip_validation.set(true) };
285285

286286
ArrayDataBuilder {
287287
data_type,
@@ -476,21 +476,20 @@ impl ArrayData {
476476
result += buffer_size;
477477
}
478478
BufferSpec::VariableWidth => {
479-
let buffer_len: usize;
480-
match self.data_type {
479+
let buffer_len = match self.data_type {
481480
DataType::Utf8 | DataType::Binary => {
482481
let offsets = self.typed_offsets::<i32>()?;
483-
buffer_len = (offsets[self.len] - offsets[0] ) as usize;
482+
(offsets[self.len] - offsets[0]) as usize
484483
}
485484
DataType::LargeUtf8 | DataType::LargeBinary => {
486485
let offsets = self.typed_offsets::<i64>()?;
487-
buffer_len = (offsets[self.len] - offsets[0]) as usize;
486+
(offsets[self.len] - offsets[0]) as usize
488487
}
489488
_ => {
490489
return Err(ArrowError::NotYetImplemented(format!(
491-
"Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
492-
self.data_type
493-
)))
490+
"Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
491+
self.data_type
492+
)));
494493
}
495494
};
496495
result += buffer_len;
@@ -554,7 +553,7 @@ impl ArrayData {
554553
if let DataType::Struct(_) = self.data_type() {
555554
// Slice into children
556555
let new_offset = self.offset + offset;
557-
let new_data = ArrayData {
556+
ArrayData {
558557
data_type: self.data_type().clone(),
559558
len: length,
560559
offset: new_offset,
@@ -566,9 +565,7 @@ impl ArrayData {
566565
.map(|data| data.slice(offset, length))
567566
.collect(),
568567
nulls: self.nulls.as_ref().map(|x| x.slice(offset, length)),
569-
};
570-
571-
new_data
568+
}
572569
} else {
573570
let mut new_data = self.clone();
574571

@@ -784,15 +781,19 @@ impl ArrayData {
784781
if buffer.len() < min_buffer_size {
785782
return Err(ArrowError::InvalidArgumentError(format!(
786783
"Need at least {} bytes in buffers[{}] in array of type {:?}, but got {}",
787-
min_buffer_size, i, self.data_type, buffer.len()
784+
min_buffer_size,
785+
i,
786+
self.data_type,
787+
buffer.len()
788788
)));
789789
}
790790

791791
let align_offset = buffer.as_ptr().align_offset(*alignment);
792792
if align_offset != 0 {
793793
return Err(ArrowError::InvalidArgumentError(format!(
794794
"Misaligned buffers[{i}] in array of type {:?}, offset from expected alignment of {alignment} by {}",
795-
self.data_type, align_offset.min(alignment - align_offset)
795+
self.data_type,
796+
align_offset.min(alignment - align_offset)
796797
)));
797798
}
798799
}
@@ -806,7 +807,10 @@ impl ArrayData {
806807
if buffer.len() < min_buffer_size {
807808
return Err(ArrowError::InvalidArgumentError(format!(
808809
"Need at least {} bytes for bitmap in buffers[{}] in array of type {:?}, but got {}",
809-
min_buffer_size, i, self.data_type, buffer.len()
810+
min_buffer_size,
811+
i,
812+
self.data_type,
813+
buffer.len()
810814
)));
811815
}
812816
}
@@ -1058,7 +1062,11 @@ impl ArrayData {
10581062
if field_data.len < self.len {
10591063
return Err(ArrowError::InvalidArgumentError(format!(
10601064
"{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
1061-
self.data_type, i, field.name(), field_data.len, self.len
1065+
self.data_type,
1066+
i,
1067+
field.name(),
1068+
field_data.len,
1069+
self.len
10621070
)));
10631071
}
10641072
}
@@ -1090,7 +1098,9 @@ impl ArrayData {
10901098
if mode == &UnionMode::Sparse && field_data.len < (self.len + self.offset) {
10911099
return Err(ArrowError::InvalidArgumentError(format!(
10921100
"Sparse union child array #{} has length smaller than expected for union array ({} < {})",
1093-
i, field_data.len, self.len + self.offset
1101+
i,
1102+
field_data.len,
1103+
self.len + self.offset
10941104
)));
10951105
}
10961106
}
@@ -1282,7 +1292,7 @@ impl ArrayData {
12821292
"non-nullable child of type {} contains nulls not present in parent {}",
12831293
child.data_type, self.data_type
12841294
))),
1285-
}
1295+
};
12861296
}
12871297
};
12881298

@@ -1988,6 +1998,7 @@ impl ArrayDataBuilder {
19881998
///
19891999
/// Note: This is shorthand for
19902000
/// ```rust
2001+
/// # #[expect(unsafe_op_in_unsafe_fn)]
19912002
/// # let mut builder = arrow_data::ArrayDataBuilder::new(arrow_schema::DataType::Null);
19922003
/// # let _ = unsafe {
19932004
/// builder.skip_validation(true).build().unwrap()
@@ -1999,7 +2010,7 @@ impl ArrayDataBuilder {
19992010
/// The same caveats as [`ArrayData::new_unchecked`]
20002011
/// apply.
20012012
pub unsafe fn build_unchecked(self) -> ArrayData {
2002-
self.skip_validation(true).build().unwrap()
2013+
unsafe { self.skip_validation(true) }.build().unwrap()
20032014
}
20042015

20052016
/// Creates an `ArrayData`, consuming `self`
@@ -2098,7 +2109,9 @@ impl ArrayDataBuilder {
20982109
/// If validation is skipped, the buffers must form a valid Arrow array,
20992110
/// otherwise undefined behavior will result
21002111
pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self {
2101-
self.skip_validation.set(skip_validation);
2112+
unsafe {
2113+
self.skip_validation.set(skip_validation);
2114+
}
21022115
self
21032116
}
21042117
}

arrow-data/src/decimal.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ use arrow_buffer::i256;
2828
use arrow_schema::ArrowError;
2929

3030
pub use arrow_schema::{
31+
DECIMAL_DEFAULT_SCALE, DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
32+
DECIMAL64_DEFAULT_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
3133
DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
32-
DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_DEFAULT_SCALE,
33-
DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
3434
};
3535

3636
/// `MAX_DECIMAL256_FOR_EACH_PRECISION[p]` holds the maximum [`i256`] value that can

arrow-data/src/equal/boolean.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use crate::bit_iterator::BitIndexIterator;
19-
use crate::data::{contains_nulls, ArrayData};
19+
use crate::data::{ArrayData, contains_nulls};
2020
use arrow_buffer::bit_util::get_bit;
2121

2222
use super::utils::{equal_bits, equal_len};

arrow-data/src/equal/dictionary.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::data::{contains_nulls, ArrayData};
18+
use crate::data::{ArrayData, contains_nulls};
1919
use arrow_buffer::ArrowNativeType;
2020

2121
use super::equal_range;

arrow-data/src/equal/fixed_list.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::data::{contains_nulls, ArrayData};
18+
use crate::data::{ArrayData, contains_nulls};
1919
use arrow_schema::DataType;
2020

2121
use super::equal_range;

arrow-data/src/equal/list.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::data::{count_nulls, ArrayData};
18+
use crate::data::{ArrayData, count_nulls};
1919
use arrow_buffer::ArrowNativeType;
2020
use num_integer::Integer;
2121

arrow-data/src/equal/structure.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::data::{contains_nulls, ArrayData};
18+
use crate::data::{ArrayData, contains_nulls};
1919

2020
use super::equal_range;
2121

arrow-data/src/equal/utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::data::{contains_nulls, ArrayData};
18+
use crate::data::{ArrayData, contains_nulls};
1919
use arrow_buffer::bit_chunk_iterator::BitChunks;
2020
use arrow_schema::DataType;
2121

arrow-data/src/equal/variable_size.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::data::{contains_nulls, ArrayData};
18+
use crate::data::{ArrayData, contains_nulls};
1919
use arrow_buffer::ArrowNativeType;
2020
use num_integer::Integer;
2121

0 commit comments

Comments
 (0)