diff --git a/Cargo.toml b/Cargo.toml index e8b277202146..cfdea869bd73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,8 +81,8 @@ include = [ "LICENSE.txt", "NOTICE.txt", ] -edition = "2021" -rust-version = "1.84" +edition = "2024" +rust-version = "1.85" [workspace.dependencies] arrow = { version = "56.2.0", path = "./arrow", default-features = false } diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs index 768fd798c04c..59869db96db4 100644 --- a/arrow-arith/src/arithmetic.rs +++ b/arrow-arith/src/arithmetic.rs @@ -322,7 +322,10 @@ mod tests { // `multiply` overflows on this case. let err = mul(&a, &b).unwrap_err(); - assert_eq!(err.to_string(), "Arithmetic overflow: Overflow happened on: 123456789000000000000000000 * 10000000000000000000"); + assert_eq!( + err.to_string(), + "Arithmetic overflow: Overflow happened on: 123456789000000000000000000 * 10000000000000000000" + ); // Avoid overflow by reducing the scale. let result = multiply_fixed_point(&a, &b, 28).unwrap(); diff --git a/arrow-arith/src/numeric.rs b/arrow-arith/src/numeric.rs index 198447b4db7b..404f7795a3ff 100644 --- a/arrow-arith/src/numeric.rs +++ b/arrow-arith/src/numeric.rs @@ -519,7 +519,7 @@ fn timestamp_op( "Invalid timestamp arithmetic operation: {} {op} {}", l.data_type(), r.data_type() - ))) + ))); } }; Ok(Arc::new(array.with_timezone_opt(l.timezone()))) @@ -1263,7 +1263,10 @@ mod tests { .with_precision_and_scale(37, 37) .unwrap(); let err = mul(&a, &b).unwrap_err().to_string(); - assert_eq!(err, "Invalid argument error: Output scale of Decimal128(3, 3) * Decimal128(37, 37) would exceed max scale of 38"); + assert_eq!( + err, + "Invalid argument error: Output scale of Decimal128(3, 3) * Decimal128(37, 37) would exceed max scale of 38" + ); let a = Decimal128Array::from(vec![1]) .with_precision_and_scale(3, -2) diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs index 031864cb0809..0e2aa5a28ca9 100644 --- a/arrow-array/src/arithmetic.rs +++ b/arrow-array/src/arithmetic.rs @@ -420,13 +420,13 @@ native_type_float_op!( 1., unsafe { // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0` + // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0` #[allow(unnecessary_transmutes)] std::mem::transmute(-1_i32) }, unsafe { // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0` + // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0` #[allow(unnecessary_transmutes)] std::mem::transmute(i32::MAX) } @@ -437,13 +437,13 @@ native_type_float_op!( 1., unsafe { // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0` + // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0` #[allow(unnecessary_transmutes)] std::mem::transmute(-1_i64) }, unsafe { // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0` + // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0` #[allow(unnecessary_transmutes)] std::mem::transmute(i64::MAX) } diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs index 8e2158416f49..7cfa1b52728e 100644 --- a/arrow-array/src/array/binary_array.rs +++ b/arrow-array/src/array/binary_array.rs @@ -90,7 +90,7 @@ impl GenericBinaryArray { &'a self, indexes: impl Iterator> + 'a, ) -> impl Iterator> { - indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) + unsafe { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) } } } diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index fe7ad85b7a05..f2ce93ca350d 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -184,7 +184,7 @@ impl BooleanArray { /// # Safety /// This doesn't check bounds, the caller must ensure that index < self.len() pub unsafe fn value_unchecked(&self, i: usize) -> bool { - self.values.value_unchecked(i) + unsafe { self.values.value_unchecked(i) } } /// Returns the boolean value at index `i`. @@ -222,7 +222,7 @@ impl BooleanArray { &'a self, indexes: impl Iterator> + 'a, ) -> impl Iterator> + 'a { - indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) + unsafe { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) } } /// Create a [`BooleanArray`] by evaluating the operation for @@ -355,7 +355,7 @@ impl ArrayAccessor for &BooleanArray { } unsafe fn value_unchecked(&self, index: usize) -> Self::Item { - BooleanArray::value_unchecked(self, index) + unsafe { BooleanArray::value_unchecked(self, index) } } } diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index 2ff9e9f4f658..fa7f67195e8d 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -283,28 +283,30 @@ impl GenericByteArray { /// # Safety /// Caller is responsible for ensuring that the index is within the bounds of the array pub unsafe fn value_unchecked(&self, i: usize) -> &T::Native { - let end = *self.value_offsets().get_unchecked(i + 1); - let start = *self.value_offsets().get_unchecked(i); - - // Soundness - // pointer alignment & location is ensured by RawPtrBox - // buffer bounds/offset is ensured by the value_offset invariants - - // Safety of `to_isize().unwrap()` - // `start` and `end` are &OffsetSize, which is a generic type that implements the - // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait, - // both of which should cleanly cast to isize on an architecture that supports - // 32/64-bit offsets - let b = std::slice::from_raw_parts( - self.value_data - .as_ptr() - .offset(start.to_isize().unwrap_unchecked()), - (end - start).to_usize().unwrap_unchecked(), - ); - - // SAFETY: - // ArrayData is valid - T::Native::from_bytes_unchecked(b) + unsafe { + let end = *self.value_offsets().get_unchecked(i + 1); + let start = *self.value_offsets().get_unchecked(i); + + // Soundness + // pointer alignment & location is ensured by RawPtrBox + // buffer bounds/offset is ensured by the value_offset invariants + + // Safety of `to_isize().unwrap()` + // `start` and `end` are &OffsetSize, which is a generic type that implements the + // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait, + // both of which should cleanly cast to isize on an architecture that supports + // 32/64-bit offsets + let b = std::slice::from_raw_parts( + self.value_data + .as_ptr() + .offset(start.to_isize().unwrap_unchecked()), + (end - start).to_usize().unwrap_unchecked(), + ); + + // SAFETY: + // ArrayData is valid + T::Native::from_bytes_unchecked(b) + } } /// Returns the element at index `i` @@ -509,7 +511,7 @@ impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray { } unsafe fn value_unchecked(&self, index: usize) -> Self::Item { - GenericByteArray::value_unchecked(self, index) + unsafe { GenericByteArray::value_unchecked(self, index) } } } @@ -603,14 +605,23 @@ mod tests { let nulls = NullBuffer::new_null(3); let err = StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone())).unwrap_err(); - assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3"); + assert_eq!( + err.to_string(), + "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3" + ); let err = BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err(); - assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3"); + assert_eq!( + err.to_string(), + "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3" + ); let non_utf8_data = Buffer::from_slice_ref(b"he\xFFloworld"); let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None).unwrap_err(); - assert_eq!(err.to_string(), "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2"); + assert_eq!( + err.to_string(), + "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2" + ); BinaryArray::new(offsets, non_utf8_data, None); diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs index 7c8993d6028e..3163a329e4b8 100644 --- a/arrow-array/src/array/byte_view_array.rs +++ b/arrow-array/src/array/byte_view_array.rs @@ -324,17 +324,19 @@ impl GenericByteViewArray { /// Caller is responsible for ensuring that the index is within the bounds /// of the array pub unsafe fn value_unchecked(&self, idx: usize) -> &T::Native { - let v = self.views.get_unchecked(idx); - let len = *v as u32; - let b = if len <= MAX_INLINE_VIEW_LEN { - Self::inline_value(v, len as usize) - } else { - let view = ByteView::from(*v); - let data = self.buffers.get_unchecked(view.buffer_index as usize); - let offset = view.offset as usize; - data.get_unchecked(offset..offset + len as usize) - }; - T::Native::from_bytes_unchecked(b) + unsafe { + let v = self.views.get_unchecked(idx); + let len = *v as u32; + let b = if len <= MAX_INLINE_VIEW_LEN { + Self::inline_value(v, len as usize) + } else { + let view = ByteView::from(*v); + let data = self.buffers.get_unchecked(view.buffer_index as usize); + let offset = view.offset as usize; + data.get_unchecked(offset..offset + len as usize) + }; + T::Native::from_bytes_unchecked(b) + } } /// Returns the first `len` bytes the inline value of the view. @@ -344,8 +346,10 @@ impl GenericByteViewArray { /// - The `len` must be the length of the inlined value. It should never be larger than [`MAX_INLINE_VIEW_LEN`]. #[inline(always)] pub unsafe fn inline_value(view: &u128, len: usize) -> &[u8] { - debug_assert!(len <= MAX_INLINE_VIEW_LEN as usize); - std::slice::from_raw_parts((view as *const u128 as *const u8).wrapping_add(4), len) + unsafe { + debug_assert!(len <= MAX_INLINE_VIEW_LEN as usize); + std::slice::from_raw_parts((view as *const u128 as *const u8).wrapping_add(4), len) + } } /// Constructs a new iterator for iterating over the values of this array @@ -540,28 +544,30 @@ impl GenericByteViewArray { /// into the bytes just appended at the end of `data_buf`. #[inline(always)] unsafe fn copy_view_to_buffer(&self, i: usize, data_buf: &mut Vec) -> u128 { - // SAFETY: `i < self.len()` ensures this is in‑bounds. - let raw_view = *self.views().get_unchecked(i); - let mut bv = ByteView::from(raw_view); - - // Inline‑small views stay as‑is. - if bv.length <= MAX_INLINE_VIEW_LEN { - raw_view - } else { - // SAFETY: `bv.buffer_index` and `bv.offset..bv.offset+bv.length` - // must both lie within valid ranges for `self.buffers`. - let buffer = self.buffers.get_unchecked(bv.buffer_index as usize); - let start = bv.offset as usize; - let end = start + bv.length as usize; - let slice = buffer.get_unchecked(start..end); - - // Copy out‑of‑line data into our single “0” buffer. - let new_offset = data_buf.len() as u32; - data_buf.extend_from_slice(slice); - - bv.buffer_index = 0; - bv.offset = new_offset; - bv.into() + unsafe { + // SAFETY: `i < self.len()` ensures this is in‑bounds. + let raw_view = *self.views().get_unchecked(i); + let mut bv = ByteView::from(raw_view); + + // Inline‑small views stay as‑is. + if bv.length <= MAX_INLINE_VIEW_LEN { + raw_view + } else { + // SAFETY: `bv.buffer_index` and `bv.offset..bv.offset+bv.length` + // must both lie within valid ranges for `self.buffers`. + let buffer = self.buffers.get_unchecked(bv.buffer_index as usize); + let start = bv.offset as usize; + let end = start + bv.length as usize; + let slice = buffer.get_unchecked(start..end); + + // Copy out‑of‑line data into our single “0” buffer. + let new_offset = data_buf.len() as u32; + data_buf.extend_from_slice(slice); + + bv.buffer_index = 0; + bv.offset = new_offset; + bv.into() + } } } @@ -624,36 +630,38 @@ impl GenericByteViewArray { right: &GenericByteViewArray, right_idx: usize, ) -> Ordering { - let l_view = left.views().get_unchecked(left_idx); - let l_byte_view = ByteView::from(*l_view); + unsafe { + let l_view = left.views().get_unchecked(left_idx); + let l_byte_view = ByteView::from(*l_view); - let r_view = right.views().get_unchecked(right_idx); - let r_byte_view = ByteView::from(*r_view); + let r_view = right.views().get_unchecked(right_idx); + let r_byte_view = ByteView::from(*r_view); - let l_len = l_byte_view.length; - let r_len = r_byte_view.length; + let l_len = l_byte_view.length; + let r_len = r_byte_view.length; - if l_len <= 12 && r_len <= 12 { - return Self::inline_key_fast(*l_view).cmp(&Self::inline_key_fast(*r_view)); - } + if l_len <= 12 && r_len <= 12 { + return Self::inline_key_fast(*l_view).cmp(&Self::inline_key_fast(*r_view)); + } - // one of the string is larger than 12 bytes, - // we then try to compare the inlined data first + // one of the string is larger than 12 bytes, + // we then try to compare the inlined data first - // Note: In theory, ByteView is only used for string which is larger than 12 bytes, - // but we can still use it to get the inlined prefix for shorter strings. - // The prefix is always the first 4 bytes of the view, for both short and long strings. - let l_inlined_be = l_byte_view.prefix.swap_bytes(); - let r_inlined_be = r_byte_view.prefix.swap_bytes(); - if l_inlined_be != r_inlined_be { - return l_inlined_be.cmp(&r_inlined_be); - } + // Note: In theory, ByteView is only used for string which is larger than 12 bytes, + // but we can still use it to get the inlined prefix for shorter strings. + // The prefix is always the first 4 bytes of the view, for both short and long strings. + let l_inlined_be = l_byte_view.prefix.swap_bytes(); + let r_inlined_be = r_byte_view.prefix.swap_bytes(); + if l_inlined_be != r_inlined_be { + return l_inlined_be.cmp(&r_inlined_be); + } - // unfortunately, we need to compare the full data - let l_full_data: &[u8] = unsafe { left.value_unchecked(left_idx).as_ref() }; - let r_full_data: &[u8] = unsafe { right.value_unchecked(right_idx).as_ref() }; + // unfortunately, we need to compare the full data + let l_full_data: &[u8] = left.value_unchecked(left_idx).as_ref(); + let r_full_data: &[u8] = right.value_unchecked(right_idx).as_ref(); - l_full_data.cmp(r_full_data) + l_full_data.cmp(r_full_data) + } } /// Builds a 128-bit composite key for an inline value: @@ -853,7 +861,7 @@ impl<'a, T: ByteViewType + ?Sized> ArrayAccessor for &'a GenericByteViewArray } unsafe fn value_unchecked(&self, index: usize) -> Self::Item { - GenericByteViewArray::value_unchecked(self, index) + unsafe { GenericByteViewArray::value_unchecked(self, index) } } } @@ -999,7 +1007,7 @@ impl BinaryViewArray { /// # Safety /// Caller is responsible for ensuring that items in array are utf8 data. pub unsafe fn to_string_view_unchecked(self) -> StringViewArray { - StringViewArray::new_unchecked(self.views, self.buffers, self.nulls) + unsafe { StringViewArray::new_unchecked(self.views, self.buffers, self.nulls) } } } @@ -1171,7 +1179,10 @@ mod tests { builder.finish() }; assert_eq!(array.value(0), "large payload over 12 bytes"); - assert_eq!(array.value(1), "another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created"); + assert_eq!( + array.value(1), + "another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created" + ); assert_eq!(2, array.buffers.len()); } diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs index acbdcb8b60fa..c055fcdcf820 100644 --- a/arrow-array/src/array/dictionary_array.rs +++ b/arrow-array/src/array/dictionary_array.rs @@ -947,14 +947,16 @@ where } unsafe fn value_unchecked(&self, index: usize) -> Self::Item { - let val = self.dictionary.keys.value_unchecked(index); - let value_idx = val.as_usize(); - - // As dictionary keys are only verified for non-null indexes - // we must check the value is within bounds - match value_idx < self.values.len() { - true => self.values.value_unchecked(value_idx), - false => Default::default(), + unsafe { + let val = self.dictionary.keys.value_unchecked(index); + let value_idx = val.as_usize(); + + // As dictionary keys are only verified for non-null indexes + // we must check the value is within bounds + match value_idx < self.values.len() { + true => self.values.value_unchecked(value_idx), + false => Default::default(), + } } } } diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index 76d9db04704e..5d15bf5cf113 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -168,12 +168,14 @@ impl FixedSizeBinaryArray { /// Caller is responsible for ensuring that the index is within the bounds /// of the array pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] { - let offset = i + self.offset(); - let pos = self.value_offset_at(offset); - std::slice::from_raw_parts( - self.value_data.as_ptr().offset(pos as isize), - (self.value_offset_at(offset + 1) - pos) as usize, - ) + unsafe { + let offset = i + self.offset(); + let pos = self.value_offset_at(offset); + std::slice::from_raw_parts( + self.value_data.as_ptr().offset(pos as isize), + (self.value_offset_at(offset + 1) - pos) as usize, + ) + } } /// Returns the offset for the element at index `i`. @@ -654,7 +656,7 @@ impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray { } unsafe fn value_unchecked(&self, index: usize) -> Self::Item { - FixedSizeBinaryArray::value_unchecked(self, index) + unsafe { FixedSizeBinaryArray::value_unchecked(self, index) } } } @@ -996,6 +998,9 @@ mod tests { let nulls = NullBuffer::new_null(3); let err = FixedSizeBinaryArray::try_new(2, buffer, Some(nulls)).unwrap_err(); - assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeBinaryArray, expected 5 got 3"); + assert_eq!( + err.to_string(), + "Invalid argument error: Incorrect length of null buffer for FixedSizeBinaryArray, expected 5 got 3" + ); } } diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index 4a338591e5aa..31c403442be4 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -350,7 +350,9 @@ impl From for FixedSizeListArray { let value_length = match data.data_type() { DataType::FixedSizeList(_, len) => *len, data_type => { - panic!("FixedSizeListArray data should contain a FixedSizeList data type, got {data_type:?}") + panic!( + "FixedSizeListArray data should contain a FixedSizeList data type, got {data_type:?}" + ) } }; @@ -685,11 +687,17 @@ mod tests { let nulls = NullBuffer::new_null(2); let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err(); - assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeListArray, expected 3 got 2"); + assert_eq!( + err.to_string(), + "Invalid argument error: Incorrect length of null buffer for FixedSizeListArray, expected 3 got 2" + ); let field = Arc::new(Field::new_list_field(DataType::Int32, false)); let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None).unwrap_err(); - assert_eq!(err.to_string(), "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\""); + assert_eq!( + err.to_string(), + "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\"" + ); // Valid as nulls in child masked by parent let nulls = NullBuffer::new(BooleanBuffer::new(Buffer::from([0b0000101]), 0, 3)); @@ -697,7 +705,10 @@ mod tests { let field = Arc::new(Field::new_list_field(DataType::Int64, true)); let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err(); - assert_eq!(err.to_string(), "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\""); + assert_eq!( + err.to_string(), + "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\"" + ); } #[test] diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 8836b5b0f73d..6a14367b39b7 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -334,9 +334,11 @@ impl GenericListArray { /// # Safety /// Caller must ensure that the index is within the array bounds pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef { - let end = self.value_offsets().get_unchecked(i + 1).as_usize(); - let start = self.value_offsets().get_unchecked(i).as_usize(); - self.values.slice(start, end - start) + unsafe { + let end = self.value_offsets().get_unchecked(i + 1).as_usize(); + let start = self.value_offsets().get_unchecked(i).as_usize(); + self.values.slice(start, end - start) + } } /// Returns ith value of this list array. diff --git a/arrow-array/src/array/list_view_array.rs b/arrow-array/src/array/list_view_array.rs index 7d66d10d263c..a3e40283e17f 100644 --- a/arrow-array/src/array/list_view_array.rs +++ b/arrow-array/src/array/list_view_array.rs @@ -154,7 +154,8 @@ impl GenericListViewArray { if len != sizes.len() { return Err(ArrowError::InvalidArgumentError(format!( "Length of offsets buffer and sizes buffer must be equal for {}ListViewArray, got {len} and {}", - OffsetSize::PREFIX, sizes.len() + OffsetSize::PREFIX, + sizes.len() ))); } @@ -290,9 +291,11 @@ impl GenericListViewArray { /// # Safety /// Caller must ensure that the index is within the array bounds pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef { - let offset = self.value_offsets().get_unchecked(i).as_usize(); - let length = self.value_sizes().get_unchecked(i).as_usize(); - self.values.slice(offset, length) + unsafe { + let offset = self.value_offsets().get_unchecked(i).as_usize(); + let length = self.value_sizes().get_unchecked(i).as_usize(); + self.values.slice(offset, length) + } } /// Returns ith value of this list view array. @@ -366,7 +369,7 @@ impl ArrayAccessor for &GenericListViewArray Self::Item { - GenericListViewArray::value_unchecked(self, index) + unsafe { GenericListViewArray::value_unchecked(self, index) } } } diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs index 9a1e04c7f1c0..a7714ff7cd0b 100644 --- a/arrow-array/src/array/map_array.rs +++ b/arrow-array/src/array/map_array.rs @@ -191,10 +191,12 @@ impl MapArray { /// # Safety /// Caller must ensure that the index is within the array bounds pub unsafe fn value_unchecked(&self, i: usize) -> StructArray { - let end = *self.value_offsets().get_unchecked(i + 1); - let start = *self.value_offsets().get_unchecked(i); - self.entries - .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap()) + unsafe { + let end = *self.value_offsets().get_unchecked(i + 1); + let start = *self.value_offsets().get_unchecked(i); + self.entries + .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap()) + } } /// Returns ith value of this map array. diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 5fdfb9fb2244..df2a60da0e73 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -815,7 +815,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef { DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef, DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef, DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef, - DataType::Dictionary(ref key_type, _) => match key_type.as_ref() { + DataType::Dictionary(key_type, _) => match key_type.as_ref() { DataType::Int8 => Arc::new(DictionaryArray::::from(data)) as ArrayRef, DataType::Int16 => Arc::new(DictionaryArray::::from(data)) as ArrayRef, DataType::Int32 => Arc::new(DictionaryArray::::from(data)) as ArrayRef, @@ -826,7 +826,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef { DataType::UInt64 => Arc::new(DictionaryArray::::from(data)) as ArrayRef, dt => panic!("Unexpected dictionary key type {dt:?}"), }, - DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() { + DataType::RunEndEncoded(run_ends_type, _) => match run_ends_type.data_type() { DataType::Int16 => Arc::new(RunArray::::from(data)) as ArrayRef, DataType::Int32 => Arc::new(RunArray::::from(data)) as ArrayRef, DataType::Int64 => Arc::new(RunArray::::from(data)) as ArrayRef, diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 42594e7a129d..78f1d41e3b2d 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -728,7 +728,7 @@ impl PrimitiveArray { /// caller must ensure that the passed in offset is less than the array len() #[inline] pub unsafe fn value_unchecked(&self, i: usize) -> T::Native { - *self.values.get_unchecked(i) + unsafe { *self.values.get_unchecked(i) } } /// Returns the primitive value at index `i`. @@ -796,7 +796,7 @@ impl PrimitiveArray { &'a self, indexes: impl Iterator> + 'a, ) -> impl Iterator> + 'a { - indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) + unsafe { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) } } /// Returns a zero-copy slice of this array with the indicated offset and length. @@ -1230,7 +1230,7 @@ impl ArrayAccessor for &PrimitiveArray { #[inline] unsafe fn value_unchecked(&self, index: usize) -> Self::Item { - PrimitiveArray::value_unchecked(self, index) + unsafe { PrimitiveArray::value_unchecked(self, index) } } } @@ -1467,15 +1467,24 @@ impl PrimitiveArray { P: std::borrow::Borrow::Native>>, I: IntoIterator, { - let iterator = iter.into_iter(); - let (_, upper) = iterator.size_hint(); - let len = upper.expect("trusted_len_unzip requires an upper limit"); + unsafe { + let iterator = iter.into_iter(); + let (_, upper) = iterator.size_hint(); + let len = upper.expect("trusted_len_unzip requires an upper limit"); - let (null, buffer) = trusted_len_unzip(iterator); + let (null, buffer) = trusted_len_unzip(iterator); - let data = - ArrayData::new_unchecked(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]); - PrimitiveArray::from(data) + let data = ArrayData::new_unchecked( + T::DATA_TYPE, + len, + None, + Some(null), + 0, + vec![buffer], + vec![], + ); + PrimitiveArray::from(data) + } } } @@ -2197,7 +2206,7 @@ mod tests { // chrono::NaiveDatetime::from_timestamp_opt returns None while input is invalid let arr: PrimitiveArray = vec![-7201, -60054].into(); assert_eq!( - "PrimitiveArray\n[\n Cast error: Failed to convert -7201 to temporal for Time32(Second),\n Cast error: Failed to convert -60054 to temporal for Time32(Second),\n]", + "PrimitiveArray\n[\n Cast error: Failed to convert -7201 to temporal for Time32(Second),\n Cast error: Failed to convert -60054 to temporal for Time32(Second),\n]", // "PrimitiveArray\n[\n null,\n null,\n]", format!("{arr:?}") ) @@ -2849,9 +2858,10 @@ mod tests { ] .into(); let debug_str = format!("{array:?}"); - assert_eq!("PrimitiveArray\n[\n Cast error: Failed to convert -1 to temporal for Time32(Second),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400 to temporal for Time32(Second),\n Cast error: Failed to convert 86401 to temporal for Time32(Second),\n null,\n]", - debug_str - ); + assert_eq!( + "PrimitiveArray\n[\n Cast error: Failed to convert -1 to temporal for Time32(Second),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400 to temporal for Time32(Second),\n Cast error: Failed to convert 86401 to temporal for Time32(Second),\n null,\n]", + debug_str + ); } #[test] @@ -2866,7 +2876,8 @@ mod tests { ] .into(); let debug_str = format!("{array:?}"); - assert_eq!("PrimitiveArray\n[\n Cast error: Failed to convert -1 to temporal for Time32(Millisecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000 to temporal for Time32(Millisecond),\n Cast error: Failed to convert 86401000 to temporal for Time32(Millisecond),\n null,\n]", + assert_eq!( + "PrimitiveArray\n[\n Cast error: Failed to convert -1 to temporal for Time32(Millisecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000 to temporal for Time32(Millisecond),\n Cast error: Failed to convert 86401000 to temporal for Time32(Millisecond),\n null,\n]", debug_str ); } @@ -2884,7 +2895,7 @@ mod tests { .into(); let debug_str = format!("{array:?}"); assert_eq!( - "PrimitiveArray\n[\n Cast error: Failed to convert -1 to temporal for Time64(Nanosecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000000000 to temporal for Time64(Nanosecond),\n Cast error: Failed to convert 86401000000000 to temporal for Time64(Nanosecond),\n null,\n]", + "PrimitiveArray\n[\n Cast error: Failed to convert -1 to temporal for Time64(Nanosecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000000000 to temporal for Time64(Nanosecond),\n Cast error: Failed to convert 86401000000000 to temporal for Time64(Nanosecond),\n null,\n]", debug_str ); } @@ -2901,7 +2912,10 @@ mod tests { ] .into(); let debug_str = format!("{array:?}"); - assert_eq!("PrimitiveArray\n[\n Cast error: Failed to convert -1 to temporal for Time64(Microsecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000000 to temporal for Time64(Microsecond),\n Cast error: Failed to convert 86401000000 to temporal for Time64(Microsecond),\n null,\n]", debug_str); + assert_eq!( + "PrimitiveArray\n[\n Cast error: Failed to convert -1 to temporal for Time64(Microsecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000000 to temporal for Time64(Microsecond),\n Cast error: Failed to convert 86401000000 to temporal for Time64(Microsecond),\n null,\n]", + debug_str + ); } #[test] diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs index 05cfa2d17135..3332b4970040 100644 --- a/arrow-array/src/array/run_array.rs +++ b/arrow-array/src/array/run_array.rs @@ -259,7 +259,9 @@ impl From for RunArray { match data.data_type() { DataType::RunEndEncoded(_, _) => {} _ => { - panic!("Invalid data type for RunArray. The data type should be DataType::RunEndEncoded"); + panic!( + "Invalid data type for RunArray. The data type should be DataType::RunEndEncoded" + ); } } @@ -640,8 +642,10 @@ where } unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item { - let physical_index = self.run_array.get_physical_index(logical_index); - self.values().value_unchecked(physical_index) + unsafe { + let physical_index = self.run_array.get_physical_index(logical_index); + self.values().value_unchecked(physical_index) + } } } diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index ed70e5744fff..a85a8451c249 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -48,7 +48,7 @@ impl GenericStringArray { &'a self, indexes: impl Iterator> + 'a, ) -> impl Iterator> { - indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) + unsafe { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) } } /// Fallibly creates a [`GenericStringArray`] from a [`GenericBinaryArray`] returning diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs index fbc34ef0c85b..109a6459c0d9 100644 --- a/arrow-array/src/array/struct_array.rs +++ b/arrow-array/src/array/struct_array.rs @@ -922,7 +922,10 @@ mod tests { (0..30).map(|i| i % 2 == 0).collect::>(), ))), ); - assert_eq!(format!("{arr:?}"), "StructArray\n-- validity:\n[\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n ...10 elements...,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9,\n ...10 elements...,\n 20,\n 21,\n 22,\n 23,\n 24,\n 25,\n 26,\n 27,\n 28,\n 29,\n]\n]") + assert_eq!( + format!("{arr:?}"), + "StructArray\n-- validity:\n[\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n ...10 elements...,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9,\n ...10 elements...,\n 20,\n 21,\n 22,\n 23,\n 24,\n 25,\n 26,\n 27,\n 28,\n 29,\n]\n]" + ) } #[test] diff --git a/arrow-array/src/array/union_array.rs b/arrow-array/src/array/union_array.rs index d105876723da..7caf367b31ba 100644 --- a/arrow-array/src/array/union_array.rs +++ b/arrow-array/src/array/union_array.rs @@ -152,23 +152,25 @@ impl UnionArray { offsets: Option>, children: Vec, ) -> Self { - let mode = if offsets.is_some() { - UnionMode::Dense - } else { - UnionMode::Sparse - }; - - let len = type_ids.len(); - let builder = ArrayData::builder(DataType::Union(fields, mode)) - .add_buffer(type_ids.into_inner()) - .child_data(children.into_iter().map(Array::into_data).collect()) - .len(len); - - let data = match offsets { - Some(offsets) => builder.add_buffer(offsets.into_inner()).build_unchecked(), - None => builder.build_unchecked(), - }; - Self::from(data) + unsafe { + let mode = if offsets.is_some() { + UnionMode::Dense + } else { + UnionMode::Sparse + }; + + let len = type_ids.len(); + let builder = ArrayData::builder(DataType::Union(fields, mode)) + .add_buffer(type_ids.into_inner()) + .child_data(children.into_iter().map(Array::into_data).collect()) + .len(len); + + let data = match offsets { + Some(offsets) => builder.add_buffer(offsets.into_inner()).build_unchecked(), + None => builder.build_unchecked(), + }; + Self::from(data) + } } /// Attempts to create a new `UnionArray`, validating the inputs provided. @@ -219,7 +221,7 @@ impl UnionArray { _ => { return Err(ArrowError::InvalidArgumentError( "Type Ids values must match one of the field type ids".to_owned(), - )) + )); } } } diff --git a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs index 852ba680227f..d68f36d9c54c 100644 --- a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs +++ b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs @@ -405,9 +405,15 @@ mod tests { fn test_fixed_size_dictionary_builder_wrong_size() { let mut b = FixedSizeBinaryDictionaryBuilder::::new(3); let err = b.append(b"too long").unwrap_err().to_string(); - assert_eq!(err, "Invalid argument error: Invalid input length passed to FixedSizeBinaryBuilder. Expected 3 got 8"); + assert_eq!( + err, + "Invalid argument error: Invalid input length passed to FixedSizeBinaryBuilder. Expected 3 got 8" + ); let err = b.append("").unwrap_err().to_string(); - assert_eq!(err, "Invalid argument error: Invalid input length passed to FixedSizeBinaryBuilder. Expected 3 got 0"); + assert_eq!( + err, + "Invalid argument error: Invalid input length passed to FixedSizeBinaryBuilder. Expected 3 got 0" + ); } #[test] diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs index 5c142b277d14..3bf8caac1b33 100644 --- a/arrow-array/src/builder/fixed_size_list_builder.rs +++ b/arrow-array/src/builder/fixed_size_list_builder.rs @@ -172,7 +172,8 @@ where let nulls = self.null_buffer_builder.finish(); assert_eq!( - values.len(), len * self.list_len as usize, + values.len(), + len * self.list_len as usize, "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).", values.len(), self.list_len, @@ -194,7 +195,8 @@ where let nulls = self.null_buffer_builder.finish_cloned(); assert_eq!( - values.len(), len * self.list_len as usize, + values.len(), + len * self.list_len as usize, "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).", values.len(), self.list_len, diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs index cba2bb428e53..81dcf6bdfe6f 100644 --- a/arrow-array/src/builder/generic_bytes_view_builder.rs +++ b/arrow-array/src/builder/generic_bytes_view_builder.rs @@ -121,7 +121,7 @@ impl GenericByteViewBuilder { /// growing buffer size exponentially from 8KB up to 2MB. The /// first buffer allocated is 8KB, then 16KB, then 32KB, etc up to 2MB. /// - /// If this method is used, any new buffers allocated are + /// If this method is used, any new buffers allocated are /// exactly this size. This can be useful for advanced users /// that want to control the memory usage and buffer count. /// @@ -188,14 +188,16 @@ impl GenericByteViewBuilder { /// (2) The range `offset..offset+length` must be within the bounds of the block /// (3) The data in the block must be valid of type `T` pub unsafe fn append_view_unchecked(&mut self, block: u32, offset: u32, len: u32) { - let b = self.completed.get_unchecked(block as usize); - let start = offset as usize; - let end = start.saturating_add(len as usize); - let b = b.get_unchecked(start..end); + unsafe { + let b = self.completed.get_unchecked(block as usize); + let start = offset as usize; + let end = start.saturating_add(len as usize); + let b = b.get_unchecked(start..end); - let view = make_view(b, block, offset); - self.views_buffer.push(view); - self.null_buffer_builder.append_non_null(); + let view = make_view(b, block, offset); + self.views_buffer.push(view); + self.null_buffer_builder.append_non_null(); + } } /// Appends an array to the builder. @@ -390,7 +392,7 @@ impl GenericByteViewBuilder { self.flush_in_progress(); let completed = std::mem::take(&mut self.completed); let nulls = self.null_buffer_builder.finish(); - if let Some((ref mut ht, _)) = self.string_tracker.as_mut() { + if let Some((ht, _)) = self.string_tracker.as_mut() { ht.clear(); } let views = std::mem::take(&mut self.views_buffer); @@ -688,7 +690,10 @@ mod tests { ); let err = v.try_append_view(0, u32::MAX, 1).unwrap_err(); - assert_eq!(err.to_string(), "Invalid argument error: Range 4294967295..4294967296 out of bounds for block of length 17"); + assert_eq!( + err.to_string(), + "Invalid argument error: Range 4294967295..4294967296 out of bounds for block of length 17" + ); let err = v.try_append_view(0, 1, u32::MAX).unwrap_err(); assert_eq!( diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 83eaa3d6544a..3e8836b40d31 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -130,14 +130,16 @@ pub unsafe fn export_array_into_raw( out_array: *mut FFI_ArrowArray, out_schema: *mut FFI_ArrowSchema, ) -> Result<()> { - let data = src.to_data(); - let array = FFI_ArrowArray::new(&data); - let schema = FFI_ArrowSchema::try_from(data.data_type())?; + unsafe { + let data = src.to_data(); + let array = FFI_ArrowArray::new(&data); + let schema = FFI_ArrowSchema::try_from(data.data_type())?; - std::ptr::write_unaligned(out_array, array); - std::ptr::write_unaligned(out_schema, schema); + std::ptr::write_unaligned(out_array, array); + std::ptr::write_unaligned(out_schema, schema); - Ok(()) + Ok(()) + } } // returns the number of bits that buffer `i` (in the C data interface) is expected to have. @@ -160,49 +162,54 @@ fn bit_width(data_type: &DataType, i: usize) -> Result { (DataType::Boolean, _) => { return Err(ArrowError::CDataInterface(format!( "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented." - ))) + ))); } (DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize, (DataType::FixedSizeList(f, num_elems), 1) => { let child_bit_width = bit_width(f.data_type(), 1)?; child_bit_width * (*num_elems as usize) - }, + } (DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => { return Err(ArrowError::CDataInterface(format!( "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented." - ))) - }, + ))); + } // Variable-size list and map have one i32 buffer. // Variable-sized binaries: have two buffers. // "small": first buffer is i32, second is in bytes - (DataType::Utf8, 1) | (DataType::Binary, 1) | (DataType::List(_), 1) | (DataType::Map(_, _), 1) => i32::BITS as _, + (DataType::Utf8, 1) + | (DataType::Binary, 1) + | (DataType::List(_), 1) + | (DataType::Map(_, _), 1) => i32::BITS as _, (DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _, (DataType::List(_), _) | (DataType::Map(_, _), _) => { return Err(ArrowError::CDataInterface(format!( "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented." - ))) + ))); } (DataType::Utf8, _) | (DataType::Binary, _) => { return Err(ArrowError::CDataInterface(format!( "The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented." - ))) + ))); } // Variable-sized binaries: have two buffers. // LargeUtf8: first buffer is i64, second is in bytes - (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => i64::BITS as _, - (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2)=> u8::BITS as _, - (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _)=> { + (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => { + i64::BITS as _ + } + (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2) => { + u8::BITS as _ + } + (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _) => { return Err(ArrowError::CDataInterface(format!( "The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented." - ))) + ))); } // Variable-sized views: have 3 or more buffers. // Buffer 1 are the u128 views // Buffers 2...N-1 are u8 byte buffers - (DataType::Utf8View, 1) | (DataType::BinaryView,1) => u128::BITS as _, - (DataType::Utf8View, _) | (DataType::BinaryView, _) => { - u8::BITS as _ - } + (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => u128::BITS as _, + (DataType::Utf8View, _) | (DataType::BinaryView, _) => u8::BITS as _, // type ids. UnionArray doesn't have null bitmap so buffer index begins with 0. (DataType::Union(_, _), 0) => i8::BITS as _, // Only DenseUnion has 2nd buffer @@ -210,24 +217,24 @@ fn bit_width(data_type: &DataType, i: usize) -> Result { (DataType::Union(_, UnionMode::Sparse), _) => { return Err(ArrowError::CDataInterface(format!( "The datatype \"{data_type:?}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented." - ))) + ))); } (DataType::Union(_, UnionMode::Dense), _) => { return Err(ArrowError::CDataInterface(format!( "The datatype \"{data_type:?}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented." - ))) + ))); } (_, 0) => { // We don't call this `bit_width` to compute buffer length for null buffer. If any types that don't have null buffer like // UnionArray, they should be handled above. return Err(ArrowError::CDataInterface(format!( "The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented." - ))) + ))); } _ => { return Err(ArrowError::CDataInterface(format!( "The datatype \"{data_type:?}\" is still not supported in Rust implementation" - ))) + ))); } }) } @@ -245,11 +252,13 @@ unsafe fn create_buffer( index: usize, len: usize, ) -> Option { - if array.num_buffers() == 0 { - return None; + unsafe { + if array.num_buffers() == 0 { + return None; + } + NonNull::new(array.buffer(index) as _) + .map(|ptr| Buffer::from_custom_allocation(ptr, len, owner)) } - NonNull::new(array.buffer(index) as _) - .map(|ptr| Buffer::from_custom_allocation(ptr, len, owner)) } /// Export to the C Data Interface diff --git a/arrow-array/src/ffi_stream.rs b/arrow-array/src/ffi_stream.rs index 3d4e89e80b89..bf1b91b9e483 100644 --- a/arrow-array/src/ffi_stream.rs +++ b/arrow-array/src/ffi_stream.rs @@ -102,19 +102,21 @@ unsafe impl Send for FFI_ArrowArrayStream {} // callback used to drop [FFI_ArrowArrayStream] when it is exported. unsafe extern "C" fn release_stream(stream: *mut FFI_ArrowArrayStream) { - if stream.is_null() { - return; - } - let stream = &mut *stream; + unsafe { + if stream.is_null() { + return; + } + let stream = &mut *stream; - stream.get_schema = None; - stream.get_next = None; - stream.get_last_error = None; + stream.get_schema = None; + stream.get_next = None; + stream.get_last_error = None; - let private_data = Box::from_raw(stream.private_data as *mut StreamPrivateData); - drop(private_data); + let private_data = Box::from_raw(stream.private_data as *mut StreamPrivateData); + drop(private_data); - stream.release = None; + stream.release = None; + } } struct StreamPrivateData { @@ -188,7 +190,7 @@ impl FFI_ArrowArrayStream { /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) -> Self { - std::ptr::replace(raw_stream, Self::empty()) + unsafe { std::ptr::replace(raw_stream, Self::empty()) } } /// Creates a new empty [FFI_ArrowArrayStream]. Used to import from the C Stream Interface. @@ -330,7 +332,7 @@ impl ArrowArrayStreamReader { /// /// See [`FFI_ArrowArrayStream::from_raw`] pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) -> Result { - Self::try_new(FFI_ArrowArrayStream::from_raw(raw_stream)) + unsafe { Self::try_new(FFI_ArrowArrayStream::from_raw(raw_stream)) } } /// Get the last error from `ArrowArrayStreamReader` diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs index c1023b739081..01886110e986 100644 --- a/arrow-array/src/record_batch.rs +++ b/arrow-array/src/record_batch.rs @@ -360,7 +360,8 @@ impl RecordBatch { if let Some((i, (col_type, field_type))) = not_match { return Err(ArrowError::InvalidArgumentError(format!( - "column types must match schema types, expected {field_type:?} but found {col_type:?} at column index {i}"))); + "column types must match schema types, expected {field_type:?} but found {col_type:?} at column index {i}" + ))); } Ok(RecordBatch { @@ -1100,7 +1101,10 @@ mod tests { let a = Int64Array::from(vec![1, 2, 3, 4, 5]); let err = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap_err(); - assert_eq!(err.to_string(), "Invalid argument error: column types must match schema types, expected Int32 but found Int64 at column index 0"); + assert_eq!( + err.to_string(), + "Invalid argument error: column types must match schema types, expected Int32 but found Int64 at column index 0" + ); } #[test] @@ -1600,7 +1604,10 @@ mod tests { schema, vec![Arc::new(Int32Array::from(vec![Some(1), None]))], ); - assert_eq!("Invalid argument error: Column 'a' is declared as non-nullable but contains null values", format!("{}", maybe_batch.err().unwrap())); + assert_eq!( + "Invalid argument error: Column 'a' is declared as non-nullable but contains null values", + format!("{}", maybe_batch.err().unwrap()) + ); } #[test] fn test_record_batch_options() { diff --git a/arrow-array/src/trusted_len.rs b/arrow-array/src/trusted_len.rs index 781cad38f7e9..1864a8463be7 100644 --- a/arrow-array/src/trusted_len.rs +++ b/arrow-array/src/trusted_len.rs @@ -29,32 +29,34 @@ where P: std::borrow::Borrow>, I: Iterator, { - let (_, upper) = iterator.size_hint(); - let upper = upper.expect("trusted_len_unzip requires an upper limit"); - let len = upper * std::mem::size_of::(); + unsafe { + let (_, upper) = iterator.size_hint(); + let upper = upper.expect("trusted_len_unzip requires an upper limit"); + let len = upper * std::mem::size_of::(); - let mut null = MutableBuffer::from_len_zeroed(upper.saturating_add(7) / 8); - let mut buffer = MutableBuffer::new(len); + let mut null = MutableBuffer::from_len_zeroed(upper.saturating_add(7) / 8); + let mut buffer = MutableBuffer::new(len); - let dst_null = null.as_mut_ptr(); - let mut dst = buffer.as_mut_ptr() as *mut T; - for (i, item) in iterator.enumerate() { - let item = item.borrow(); - if let Some(item) = item { - std::ptr::write(dst, *item); - bit_util::set_bit_raw(dst_null, i); - } else { - std::ptr::write(dst, T::default()); + let dst_null = null.as_mut_ptr(); + let mut dst = buffer.as_mut_ptr() as *mut T; + for (i, item) in iterator.enumerate() { + let item = item.borrow(); + if let Some(item) = item { + std::ptr::write(dst, *item); + bit_util::set_bit_raw(dst_null, i); + } else { + std::ptr::write(dst, T::default()); + } + dst = dst.add(1); } - dst = dst.add(1); + assert_eq!( + dst.offset_from(buffer.as_ptr() as *mut T) as usize, + upper, + "Trusted iterator length was not accurately reported" + ); + buffer.set_len(len); + (null.into(), buffer.into()) } - assert_eq!( - dst.offset_from(buffer.as_ptr() as *mut T) as usize, - upper, - "Trusted iterator length was not accurately reported" - ); - buffer.set_len(len); - (null.into(), buffer.into()) } #[cfg(test)] diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs index 144de8dbecbd..ebc082a55651 100644 --- a/arrow-array/src/types.rs +++ b/arrow-array/src/types.rs @@ -1579,7 +1579,7 @@ pub(crate) mod bytes { #[inline] unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self { - std::str::from_utf8_unchecked(b) + unsafe { std::str::from_utf8_unchecked(b) } } } } diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs index 64fc0488e301..1263241edc7f 100644 --- a/arrow-avro/src/codec.rs +++ b/arrow-avro/src/codec.rs @@ -1545,7 +1545,7 @@ impl<'a> Maker<'a> { _ => { return Err(ArrowError::ParseError(format!( "Illegal promotion {write_primitive:?} to {read_primitive:?}" - ))) + ))); } }; let mut datatype = self.parse_type(reader_schema, None)?; diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index 57f30edf1eb8..c8dd809aab15 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -172,7 +172,7 @@ impl Buffer { len: usize, owner: Arc, ) -> Self { - Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len)) + unsafe { Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len)) } } /// Auxiliary method to create a new Buffer @@ -181,12 +181,14 @@ impl Buffer { len: usize, deallocation: Deallocation, ) -> Self { - let bytes = Bytes::new(ptr, len, deallocation); - let ptr = bytes.as_ptr(); - Buffer { - ptr, - data: Arc::new(bytes), - length: len, + unsafe { + let bytes = Bytes::new(ptr, len, deallocation); + let ptr = bytes.as_ptr(); + Buffer { + ptr, + data: Arc::new(bytes), + length: len, + } } } @@ -561,7 +563,7 @@ impl Buffer { pub unsafe fn from_trusted_len_iter>( iterator: I, ) -> Self { - MutableBuffer::from_trusted_len_iter(iterator).into() + unsafe { MutableBuffer::from_trusted_len_iter(iterator).into() } } /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors @@ -578,7 +580,7 @@ impl Buffer { >( iterator: I, ) -> Result { - Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into()) + unsafe { Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into()) } } } diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs index 63fdbf598bdb..f69bc305bfbc 100644 --- a/arrow-buffer/src/buffer/mutable.rs +++ b/arrow-buffer/src/buffer/mutable.rs @@ -458,11 +458,13 @@ impl MutableBuffer { /// Caller must ensure that the capacity()-len()>=`size_of`() #[inline] pub unsafe fn push_unchecked(&mut self, item: T) { - let additional = std::mem::size_of::(); - let src = item.to_byte_slice().as_ptr(); - let dst = self.data.as_ptr().add(self.len); - std::ptr::copy_nonoverlapping(src, dst, additional); - self.len += additional; + unsafe { + let additional = std::mem::size_of::(); + let src = item.to_byte_slice().as_ptr(); + let dst = self.data.as_ptr().add(self.len); + std::ptr::copy_nonoverlapping(src, dst, additional); + self.len += additional; + } } /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing its capacity if needed. @@ -629,27 +631,29 @@ impl MutableBuffer { pub unsafe fn from_trusted_len_iter>( iterator: I, ) -> Self { - let item_size = std::mem::size_of::(); - let (_, upper) = iterator.size_hint(); - let upper = upper.expect("from_trusted_len_iter requires an upper limit"); - let len = upper * item_size; - - let mut buffer = MutableBuffer::new(len); - - let mut dst = buffer.data.as_ptr(); - for item in iterator { - // note how there is no reserve here (compared with `extend_from_iter`) - let src = item.to_byte_slice().as_ptr(); - std::ptr::copy_nonoverlapping(src, dst, item_size); - dst = dst.add(item_size); + unsafe { + let item_size = std::mem::size_of::(); + let (_, upper) = iterator.size_hint(); + let upper = upper.expect("from_trusted_len_iter requires an upper limit"); + let len = upper * item_size; + + let mut buffer = MutableBuffer::new(len); + + let mut dst = buffer.data.as_ptr(); + for item in iterator { + // note how there is no reserve here (compared with `extend_from_iter`) + let src = item.to_byte_slice().as_ptr(); + std::ptr::copy_nonoverlapping(src, dst, item_size); + dst = dst.add(item_size); + } + assert_eq!( + dst.offset_from(buffer.data.as_ptr()) as usize, + len, + "Trusted iterator length was not accurately reported" + ); + buffer.len = len; + buffer } - assert_eq!( - dst.offset_from(buffer.data.as_ptr()) as usize, - len, - "Trusted iterator length was not accurately reported" - ); - buffer.len = len; - buffer } /// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length. @@ -691,33 +695,37 @@ impl MutableBuffer { >( iterator: I, ) -> Result { - let item_size = std::mem::size_of::(); - let (_, upper) = iterator.size_hint(); - let upper = upper.expect("try_from_trusted_len_iter requires an upper limit"); - let len = upper * item_size; - - let mut buffer = MutableBuffer::new(len); - - let mut dst = buffer.data.as_ptr(); - for item in iterator { - let item = item?; - // note how there is no reserve here (compared with `extend_from_iter`) - let src = item.to_byte_slice().as_ptr(); - std::ptr::copy_nonoverlapping(src, dst, item_size); - dst = dst.add(item_size); - } - // try_from_trusted_len_iter is instantiated a lot, so we extract part of it into a less - // generic method to reduce compile time - unsafe fn finalize_buffer(dst: *mut u8, buffer: &mut MutableBuffer, len: usize) { - assert_eq!( - dst.offset_from(buffer.data.as_ptr()) as usize, - len, - "Trusted iterator length was not accurately reported" - ); - buffer.len = len; + unsafe { + let item_size = std::mem::size_of::(); + let (_, upper) = iterator.size_hint(); + let upper = upper.expect("try_from_trusted_len_iter requires an upper limit"); + let len = upper * item_size; + + let mut buffer = MutableBuffer::new(len); + + let mut dst = buffer.data.as_ptr(); + for item in iterator { + let item = item?; + // note how there is no reserve here (compared with `extend_from_iter`) + let src = item.to_byte_slice().as_ptr(); + std::ptr::copy_nonoverlapping(src, dst, item_size); + dst = dst.add(item_size); + } + // try_from_trusted_len_iter is instantiated a lot, so we extract part of it into a less + // generic method to reduce compile time + unsafe fn finalize_buffer(dst: *mut u8, buffer: &mut MutableBuffer, len: usize) { + unsafe { + assert_eq!( + dst.offset_from(buffer.data.as_ptr()) as usize, + len, + "Trusted iterator length was not accurately reported" + ); + buffer.len = len; + } + } + finalize_buffer(dst, &mut buffer, len); + Ok(buffer) } - finalize_buffer(dst, &mut buffer, len); - Ok(buffer) } } diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs index 4dd516c708ac..c058fbdce986 100644 --- a/arrow-buffer/src/buffer/scalar.rs +++ b/arrow-buffer/src/buffer/scalar.rs @@ -162,8 +162,10 @@ impl From for ScalarBuffer { is_aligned, "Memory pointer is not aligned with the specified scalar type" ), - Deallocation::Custom(_, _) => - assert!(is_aligned, "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."), + Deallocation::Custom(_, _) => assert!( + is_aligned, + "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned." + ), } Self { diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 6030cb4b1b8c..349e94498deb 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -130,21 +130,23 @@ unsafe fn set_upto_64bits( /// The caller must ensure `data` has `offset..(offset + 8)` range, and `count <= 8`. #[inline] unsafe fn read_bytes_to_u64(data: &[u8], offset: usize, count: usize) -> u64 { - debug_assert!(count <= 8); - let mut tmp: u64 = 0; - let src = data.as_ptr().add(offset); unsafe { + debug_assert!(count <= 8); + let mut tmp: u64 = 0; + let src = data.as_ptr().add(offset); std::ptr::copy_nonoverlapping(src, &mut tmp as *mut _ as *mut u8, count); + tmp } - tmp } /// # Safety /// The caller must ensure `data` has `offset..(offset + 8)` range #[inline] unsafe fn write_u64_bytes(data: &mut [u8], offset: usize, chunk: u64) { - let ptr = data.as_mut_ptr().add(offset) as *mut u64; - ptr.write_unaligned(chunk); + unsafe { + let ptr = data.as_mut_ptr().add(offset) as *mut u64; + ptr.write_unaligned(chunk); + } } /// Similar to `write_u64_bytes`, but this method ORs the offset addressed `data` and `chunk` @@ -154,9 +156,11 @@ unsafe fn write_u64_bytes(data: &mut [u8], offset: usize, chunk: u64) { /// The caller must ensure `data` has `offset..(offset + 8)` range #[inline] unsafe fn or_write_u64_bytes(data: &mut [u8], offset: usize, chunk: u64) { - let ptr = data.as_mut_ptr().add(offset); - let chunk = chunk | (*ptr) as u64; - (ptr as *mut u64).write_unaligned(chunk); + unsafe { + let ptr = data.as_mut_ptr().add(offset); + let chunk = chunk | (*ptr) as u64; + (ptr as *mut u64).write_unaligned(chunk); + } } #[cfg(test)] diff --git a/arrow-buffer/src/util/bit_util.rs b/arrow-buffer/src/util/bit_util.rs index c297321bdcf9..f00a33aca96d 100644 --- a/arrow-buffer/src/util/bit_util.rs +++ b/arrow-buffer/src/util/bit_util.rs @@ -47,7 +47,7 @@ pub fn get_bit(data: &[u8], i: usize) -> bool { /// responsible to guarantee that `i` is within bounds. #[inline] pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool { - (*data.add(i / 8) & (1 << (i % 8))) != 0 + unsafe { (*data.add(i / 8) & (1 << (i % 8))) != 0 } } /// Sets bit at position `i` for `data` to 1 @@ -64,7 +64,9 @@ pub fn set_bit(data: &mut [u8], i: usize) { /// responsible to guarantee that `i` is within bounds. #[inline] pub unsafe fn set_bit_raw(data: *mut u8, i: usize) { - *data.add(i / 8) |= 1 << (i % 8); + unsafe { + *data.add(i / 8) |= 1 << (i % 8); + } } /// Sets bit at position `i` for `data` to 0 @@ -81,7 +83,9 @@ pub fn unset_bit(data: &mut [u8], i: usize) { /// responsible to guarantee that `i` is within bounds. #[inline] pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) { - *data.add(i / 8) &= !(1 << (i % 8)); + unsafe { + *data.add(i / 8) &= !(1 << (i % 8)); + } } /// Returns the ceil of `value`/`divisor` diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 095e31274887..99cafdb505ba 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -580,7 +580,7 @@ where other => { return Err(ArrowError::ComputeError(format!( "Cannot cast {other:?} to decimal", - ))) + ))); } }; diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 72b2de99bd40..851edaace7a2 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -868,9 +868,9 @@ pub fn cast_with_options( array.nulls().cloned(), )?)) } - (_, List(ref to)) => cast_values_to_list::(array, to, cast_options), - (_, LargeList(ref to)) => cast_values_to_list::(array, to, cast_options), - (_, FixedSizeList(ref to, size)) if *size == 1 => { + (_, List(to)) => cast_values_to_list::(array, to, cast_options), + (_, LargeList(to)) => cast_values_to_list::(array, to, cast_options), + (_, FixedSizeList(to, size)) if *size == 1 => { cast_values_to_fixed_size_list(array, to, *size, cast_options) } (FixedSizeList(_, size), _) if *size == 1 => { @@ -2921,8 +2921,10 @@ mod tests { }; let result_unsafe = cast_with_options(&array, &DataType::Decimal32(2, 2), &options); - assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal32 of precision 2. Max is 99", - result_unsafe.unwrap_err().to_string()); + assert_eq!( + "Invalid argument error: 12345600 is too large to store in a Decimal32 of precision 2. Max is 99", + result_unsafe.unwrap_err().to_string() + ); } #[test] @@ -2955,8 +2957,10 @@ mod tests { }; let result_unsafe = cast_with_options(&array, &DataType::Decimal64(2, 2), &options); - assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal64 of precision 2. Max is 99", - result_unsafe.unwrap_err().to_string()); + assert_eq!( + "Invalid argument error: 12345600 is too large to store in a Decimal64 of precision 2. Max is 99", + result_unsafe.unwrap_err().to_string() + ); } #[test] @@ -2989,8 +2993,10 @@ mod tests { }; let result_unsafe = cast_with_options(&array, &DataType::Decimal128(2, 2), &options); - assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal128 of precision 2. Max is 99", - result_unsafe.unwrap_err().to_string()); + assert_eq!( + "Invalid argument error: 12345600 is too large to store in a Decimal128 of precision 2. Max is 99", + result_unsafe.unwrap_err().to_string() + ); } #[test] @@ -3146,8 +3152,10 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Cast error: Cannot cast to Decimal128(38, 38). Overflowing on 170141183460469231731687303715884105727", - result.unwrap_err().to_string()); + assert_eq!( + "Cast error: Cannot cast to Decimal128(38, 38). Overflowing on 170141183460469231731687303715884105727", + result.unwrap_err().to_string() + ); } #[test] @@ -3166,8 +3174,10 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Cast error: Cannot cast to Decimal256(76, 76). Overflowing on 170141183460469231731687303715884105727", - result.unwrap_err().to_string()); + assert_eq!( + "Cast error: Cannot cast to Decimal256(76, 76). Overflowing on 170141183460469231731687303715884105727", + result.unwrap_err().to_string() + ); } #[test] @@ -3243,8 +3253,10 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Cast error: Cannot cast to Decimal128(38, 7). Overflowing on 170141183460469231731687303715884105727", - result.unwrap_err().to_string()); + assert_eq!( + "Cast error: Cannot cast to Decimal128(38, 7). Overflowing on 170141183460469231731687303715884105727", + result.unwrap_err().to_string() + ); } #[test] @@ -3262,8 +3274,10 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Cast error: Cannot cast to Decimal256(76, 55). Overflowing on 170141183460469231731687303715884105727", - result.unwrap_err().to_string()); + assert_eq!( + "Cast error: Cannot cast to Decimal256(76, 55). Overflowing on 170141183460469231731687303715884105727", + result.unwrap_err().to_string() + ); } #[test] @@ -4905,7 +4919,10 @@ mod tests { format_options: FormatOptions::default(), }; let err = cast_with_options(array, &to_type, &options).unwrap_err(); - assert_eq!(err.to_string(), "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(Second) type"); + assert_eq!( + err.to_string(), + "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(Second) type" + ); } } @@ -4947,7 +4964,10 @@ mod tests { format_options: FormatOptions::default(), }; let err = cast_with_options(array, &to_type, &options).unwrap_err(); - assert_eq!(err.to_string(), "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(Millisecond) type"); + assert_eq!( + err.to_string(), + "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(Millisecond) type" + ); } } @@ -4981,7 +5001,10 @@ mod tests { format_options: FormatOptions::default(), }; let err = cast_with_options(array, &to_type, &options).unwrap_err(); - assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid time' to value of Time64(Microsecond) type"); + assert_eq!( + err.to_string(), + "Cast error: Cannot cast string 'Not a valid time' to value of Time64(Microsecond) type" + ); } } @@ -5015,7 +5038,10 @@ mod tests { format_options: FormatOptions::default(), }; let err = cast_with_options(array, &to_type, &options).unwrap_err(); - assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid time' to value of Time64(Nanosecond) type"); + assert_eq!( + err.to_string(), + "Cast error: Cannot cast string 'Not a valid time' to value of Time64(Nanosecond) type" + ); } } @@ -9685,7 +9711,10 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal128 of precision 10. Max is 9999999999", err.unwrap_err().to_string()); + assert_eq!( + "Invalid argument error: 100000000000 is too large to store in a Decimal128 of precision 10. Max is 9999999999", + err.unwrap_err().to_string() + ); } #[test] @@ -9768,7 +9797,10 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal256 of precision 10. Max is 9999999999", err.unwrap_err().to_string()); + assert_eq!( + "Invalid argument error: 100000000000 is too large to store in a Decimal256 of precision 10. Max is 9999999999", + err.unwrap_err().to_string() + ); } #[test] @@ -10173,7 +10205,10 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Invalid argument error: 1234567000 is too large to store in a Decimal128 of precision 7. Max is 9999999", err.unwrap_err().to_string()); + assert_eq!( + "Invalid argument error: 1234567000 is too large to store in a Decimal128 of precision 7. Max is 9999999", + err.unwrap_err().to_string() + ); } #[test] @@ -10199,7 +10234,10 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Invalid argument error: 1234567000 is too large to store in a Decimal256 of precision 7. Max is 9999999", err.unwrap_err().to_string()); + assert_eq!( + "Invalid argument error: 1234567000 is too large to store in a Decimal256 of precision 7. Max is 9999999", + err.unwrap_err().to_string() + ); } /// helper function to test casting from duration to interval @@ -11114,8 +11152,10 @@ mod tests { ..Default::default() }; let result = cast_with_options(&array, &output_type, &options); - assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 123456789 is too large to store in a Decimal128 of precision 6. Max is 999999"); + assert_eq!( + result.unwrap_err().to_string(), + "Invalid argument error: 123456789 is too large to store in a Decimal128 of precision 6. Max is 999999" + ); } #[test] @@ -11160,8 +11200,10 @@ mod tests { ..Default::default() }; let result = cast_with_options(&array, &output_type, &options); - assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 1234568 is too large to store in a Decimal128 of precision 6. Max is 999999"); + assert_eq!( + result.unwrap_err().to_string(), + "Invalid argument error: 1234568 is too large to store in a Decimal128 of precision 6. Max is 999999" + ); } #[test] @@ -11177,8 +11219,10 @@ mod tests { ..Default::default() }; let result = cast_with_options(&array, &output_type, &options); - assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 1234567890 is too large to store in a Decimal128 of precision 6. Max is 999999"); + assert_eq!( + result.unwrap_err().to_string(), + "Invalid argument error: 1234567890 is too large to store in a Decimal128 of precision 6. Max is 999999" + ); } #[test] @@ -11194,8 +11238,10 @@ mod tests { ..Default::default() }; let result = cast_with_options(&array, &output_type, &options); - assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 123456789 is too large to store in a Decimal256 of precision 6. Max is 999999"); + assert_eq!( + result.unwrap_err().to_string(), + "Invalid argument error: 123456789 is too large to store in a Decimal256 of precision 6. Max is 999999" + ); } #[test] diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index 890719964d38..0d7d6fce6aff 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -794,7 +794,7 @@ fn parse_e_notation( None => { return Err(ArrowError::ParseError(format!( "can't parse the string value {s} to decimal" - ))) + ))); } }; @@ -2689,26 +2689,10 @@ mod tests { 0i128, 15, ), - ( - "1.016744e-320", - 0i128, - 15, - ), - ( - "-1e3", - -1000000000i128, - 6, - ), - ( - "+1e3", - 1000000000i128, - 6, - ), - ( - "-1e31", - -10000000000000000000000000000000000000i128, - 6, - ), + ("1.016744e-320", 0i128, 15), + ("-1e3", -1000000000i128, 6), + ("+1e3", 1000000000i128, 6), + ("-1e31", -10000000000000000000000000000000000000i128, 6), ]; for (s, i, scale) in edge_tests_128 { let result_128 = parse_decimal::(s, 38, scale); diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index eee1bd959198..1fe5b94957b7 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -60,7 +60,7 @@ use crate::display::{ArrayFormatter, FormatOptions}; /// | 5 | e | /// +---+---+"#); /// ``` -pub fn pretty_format_batches(results: &[RecordBatch]) -> Result { +pub fn pretty_format_batches(results: &[RecordBatch]) -> Result, ArrowError> { let options = FormatOptions::default().with_display_error(true); pretty_format_batches_with_options(results, &options) } @@ -92,7 +92,7 @@ pub fn pretty_format_batches(results: &[RecordBatch]) -> Result Result { +) -> Result, ArrowError> { let options = FormatOptions::default().with_display_error(true); create_table(Some(schema), results, &options) } @@ -130,7 +130,7 @@ pub fn pretty_format_batches_with_schema( pub fn pretty_format_batches_with_options( results: &[RecordBatch], options: &FormatOptions, -) -> Result { +) -> Result, ArrowError> { create_table(None, results, options) } @@ -142,7 +142,7 @@ pub fn pretty_format_batches_with_options( pub fn pretty_format_columns( col_name: &str, results: &[ArrayRef], -) -> Result { +) -> Result, ArrowError> { let options = FormatOptions::default().with_display_error(true); pretty_format_columns_with_options(col_name, results, &options) } @@ -154,7 +154,7 @@ pub fn pretty_format_columns_with_options( col_name: &str, results: &[ArrayRef], options: &FormatOptions, -) -> Result { +) -> Result, ArrowError> { create_column(col_name, results, options) } diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index 7b69df51b541..e197ed9e054b 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -1853,7 +1853,10 @@ mod tests { let file_name = "test/data/various_invalid_types/invalid_float.csv"; let error = invalid_csv_helper(file_name); - assert_eq!("Parser error: Error while parsing value '4.x4' as type 'Float32' for column 1 at line 4. Row data: '[4,4.x4,,false]'", error); + assert_eq!( + "Parser error: Error while parsing value '4.x4' as type 'Float32' for column 1 at line 4. Row data: '[4,4.x4,,false]'", + error + ); } #[test] @@ -1861,7 +1864,10 @@ mod tests { let file_name = "test/data/various_invalid_types/invalid_int.csv"; let error = invalid_csv_helper(file_name); - assert_eq!("Parser error: Error while parsing value '2.3' as type 'UInt64' for column 0 at line 2. Row data: '[2.3,2.2,2.22,false]'", error); + assert_eq!( + "Parser error: Error while parsing value '2.3' as type 'UInt64' for column 0 at line 2. Row data: '[2.3,2.2,2.22,false]'", + error + ); } #[test] @@ -1869,7 +1875,10 @@ mod tests { let file_name = "test/data/various_invalid_types/invalid_bool.csv"; let error = invalid_csv_helper(file_name); - assert_eq!("Parser error: Error while parsing value 'none' as type 'Boolean' for column 3 at line 2. Row data: '[2,2.2,2.22,none]'", error); + assert_eq!( + "Parser error: Error while parsing value 'none' as type 'Boolean' for column 3 at line 2. Row data: '[2,2.2,2.22,none]'", + error + ); } /// Infer the data type of a record @@ -2697,7 +2706,10 @@ mod tests { .infer_schema(&mut read, None); assert!(result.is_err()); // Include line number in the error message to help locate and fix the issue - assert_eq!(result.err().unwrap().to_string(), "Csv error: Encountered unequal lengths between records on CSV file. Expected 3 records, found 2 records at line 3"); + assert_eq!( + result.err().unwrap().to_string(), + "Csv error: Encountered unequal lengths between records on CSV file. Expected 3 records, found 2 records at line 3" + ); } #[test] diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs index e10943a6a91c..30c3aa360138 100644 --- a/arrow-csv/src/writer.rs +++ b/arrow-csv/src/writer.rs @@ -717,7 +717,10 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo for batch in batches { let err = writer.write(batch).unwrap_err().to_string(); - assert_eq!(err, "Csv error: Error processing row 2, col 2: Cast error: Failed to convert 1926632005177685347 to temporal for Date64") + assert_eq!( + err, + "Csv error: Error processing row 2, col 2: Cast error: Failed to convert 1926632005177685347 to temporal for Date64" + ) } drop(writer); } diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index fca19bc3aafe..b95763ae5304 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -279,24 +279,26 @@ impl ArrayData { buffers: Vec, child_data: Vec, ) -> Self { - let mut skip_validation = UnsafeFlag::new(); - // SAFETY: caller responsible for ensuring data is valid - skip_validation.set(true); + unsafe { + let mut skip_validation = UnsafeFlag::new(); + // SAFETY: caller responsible for ensuring data is valid + skip_validation.set(true); - ArrayDataBuilder { - data_type, - len, - null_count, - null_bit_buffer, - nulls: None, - offset, - buffers, - child_data, - align_buffers: false, - skip_validation, + ArrayDataBuilder { + data_type, + len, + null_count, + null_bit_buffer, + nulls: None, + offset, + buffers, + child_data, + align_buffers: false, + skip_validation, + } + .build() + .unwrap() } - .build() - .unwrap() } /// Create a new ArrayData, validating that the provided buffers form a valid @@ -476,21 +478,20 @@ impl ArrayData { result += buffer_size; } BufferSpec::VariableWidth => { - let buffer_len: usize; - match self.data_type { + let buffer_len = match self.data_type { DataType::Utf8 | DataType::Binary => { let offsets = self.typed_offsets::()?; - buffer_len = (offsets[self.len] - offsets[0] ) as usize; + (offsets[self.len] - offsets[0]) as usize } DataType::LargeUtf8 | DataType::LargeBinary => { let offsets = self.typed_offsets::()?; - buffer_len = (offsets[self.len] - offsets[0]) as usize; + (offsets[self.len] - offsets[0]) as usize } _ => { return Err(ArrowError::NotYetImplemented(format!( - "Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}", - self.data_type - ))) + "Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}", + self.data_type + ))); } }; result += buffer_len; @@ -554,7 +555,7 @@ impl ArrayData { if let DataType::Struct(_) = self.data_type() { // Slice into children let new_offset = self.offset + offset; - let new_data = ArrayData { + ArrayData { data_type: self.data_type().clone(), len: length, offset: new_offset, @@ -566,9 +567,7 @@ impl ArrayData { .map(|data| data.slice(offset, length)) .collect(), nulls: self.nulls.as_ref().map(|x| x.slice(offset, length)), - }; - - new_data + } } else { let mut new_data = self.clone(); @@ -784,7 +783,10 @@ impl ArrayData { if buffer.len() < min_buffer_size { return Err(ArrowError::InvalidArgumentError(format!( "Need at least {} bytes in buffers[{}] in array of type {:?}, but got {}", - min_buffer_size, i, self.data_type, buffer.len() + min_buffer_size, + i, + self.data_type, + buffer.len() ))); } @@ -792,7 +794,8 @@ impl ArrayData { if align_offset != 0 { return Err(ArrowError::InvalidArgumentError(format!( "Misaligned buffers[{i}] in array of type {:?}, offset from expected alignment of {alignment} by {}", - self.data_type, align_offset.min(alignment - align_offset) + self.data_type, + align_offset.min(alignment - align_offset) ))); } } @@ -806,7 +809,10 @@ impl ArrayData { if buffer.len() < min_buffer_size { return Err(ArrowError::InvalidArgumentError(format!( "Need at least {} bytes for bitmap in buffers[{}] in array of type {:?}, but got {}", - min_buffer_size, i, self.data_type, buffer.len() + min_buffer_size, + i, + self.data_type, + buffer.len() ))); } } @@ -1058,7 +1064,11 @@ impl ArrayData { if field_data.len < self.len { return Err(ArrowError::InvalidArgumentError(format!( "{} child array #{} for field {} has length smaller than expected for struct array ({} < {})", - self.data_type, i, field.name(), field_data.len, self.len + self.data_type, + i, + field.name(), + field_data.len, + self.len ))); } } @@ -1090,7 +1100,9 @@ impl ArrayData { if mode == &UnionMode::Sparse && field_data.len < (self.len + self.offset) { return Err(ArrowError::InvalidArgumentError(format!( "Sparse union child array #{} has length smaller than expected for union array ({} < {})", - i, field_data.len, self.len + self.offset + i, + field_data.len, + self.len + self.offset ))); } } @@ -1282,7 +1294,7 @@ impl ArrayData { "non-nullable child of type {} contains nulls not present in parent {}", child.data_type, self.data_type ))), - } + }; } }; @@ -1999,7 +2011,7 @@ impl ArrayDataBuilder { /// The same caveats as [`ArrayData::new_unchecked`] /// apply. pub unsafe fn build_unchecked(self) -> ArrayData { - self.skip_validation(true).build().unwrap() + unsafe { self.skip_validation(true).build().unwrap() } } /// Creates an `ArrayData`, consuming `self` @@ -2098,8 +2110,10 @@ impl ArrayDataBuilder { /// If validation is skipped, the buffers must form a valid Arrow array, /// otherwise undefined behavior will result pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self { - self.skip_validation.set(skip_validation); - self + unsafe { + self.skip_validation.set(skip_validation); + self + } } } diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index 3b446ef255fe..8d10599859cf 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -68,21 +68,23 @@ unsafe impl Sync for FFI_ArrowArray {} // callback used to drop [FFI_ArrowArray] when it is exported unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) { - if array.is_null() { - return; - } - let array = &mut *array; + unsafe { + if array.is_null() { + return; + } + let array = &mut *array; - // take ownership of `private_data`, therefore dropping it` - let private = Box::from_raw(array.private_data as *mut ArrayPrivateData); - for child in private.children.iter() { - let _ = Box::from_raw(*child); - } - if !private.dictionary.is_null() { - let _ = Box::from_raw(private.dictionary); - } + // take ownership of `private_data`, therefore dropping it` + let private = Box::from_raw(array.private_data as *mut ArrayPrivateData); + for child in private.children.iter() { + let _ = Box::from_raw(*child); + } + if !private.dictionary.is_null() { + let _ = Box::from_raw(private.dictionary); + } - array.release = None; + array.release = None; + } } /// Aligns the provided `nulls` to the provided `data_offset` @@ -222,7 +224,7 @@ impl FFI_ArrowArray { /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety pub unsafe fn from_raw(array: *mut FFI_ArrowArray) -> Self { - std::ptr::replace(array, Self::empty()) + unsafe { std::ptr::replace(array, Self::empty()) } } /// create an empty `FFI_ArrowArray`, which can be used to import data into diff --git a/arrow-data/src/transform/utils.rs b/arrow-data/src/transform/utils.rs index 5407f68e0d0c..e9fd39087781 100644 --- a/arrow-data/src/transform/utils.rs +++ b/arrow-data/src/transform/utils.rs @@ -46,15 +46,17 @@ pub(super) fn extend_offsets( #[inline] pub(super) unsafe fn get_last_offset(offset_buffer: &MutableBuffer) -> T { - // JUSTIFICATION - // Benefit - // 20% performance improvement extend of variable sized arrays (see bench `mutable_array`) - // Soundness - // * offset buffer is always extended in slices of T and aligned accordingly. - // * Buffer[0] is initialized with one element, 0, and thus `mutable_offsets.len() - 1` is always valid. - let (prefix, offsets, suffix) = offset_buffer.as_slice().align_to::(); - debug_assert!(prefix.is_empty() && suffix.is_empty()); - *offsets.get_unchecked(offsets.len() - 1) + unsafe { + // JUSTIFICATION + // Benefit + // 20% performance improvement extend of variable sized arrays (see bench `mutable_array`) + // Soundness + // * offset buffer is always extended in slices of T and aligned accordingly. + // * Buffer[0] is initialized with one element, 0, and thus `mutable_offsets.len() - 1` is always valid. + let (prefix, offsets, suffix) = offset_buffer.as_slice().align_to::(); + debug_assert!(prefix.is_empty() && suffix.is_empty()); + *offsets.get_unchecked(offsets.len() - 1) + } } #[cfg(test)] diff --git a/arrow-flight/src/client.rs b/arrow-flight/src/client.rs index 9b4c10e9a093..51d63eca4c63 100644 --- a/arrow-flight/src/client.rs +++ b/arrow-flight/src/client.rs @@ -18,7 +18,7 @@ use crate::{ decode::FlightRecordBatchStream, flight_service_client::FlightServiceClient, - gen::{CancelFlightInfoRequest, CancelFlightInfoResult, RenewFlightEndpointRequest}, + r#gen::{CancelFlightInfoRequest, CancelFlightInfoResult, RenewFlightEndpointRequest}, trailers::extract_lazy_trailers, Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, PollInfo, PutResult, Ticket, diff --git a/arrow-flight/src/lib.rs b/arrow-flight/src/lib.rs index 8043d5b4a72b..ebdfc73129c7 100644 --- a/arrow-flight/src/lib.rs +++ b/arrow-flight/src/lib.rs @@ -60,7 +60,7 @@ use std::{fmt, ops::Deref}; type ArrowResult = std::result::Result; #[allow(clippy::all)] -mod gen { +mod r#gen { // Since this file is auto-generated, we suppress all warnings #![allow(missing_docs)] include!("arrow.flight.protocol.rs"); @@ -68,22 +68,22 @@ mod gen { /// Defines a `Flight` for generation or retrieval. pub mod flight_descriptor { - use super::gen; - pub use gen::flight_descriptor::DescriptorType; + use super::r#gen; + pub use r#gen::flight_descriptor::DescriptorType; } /// Low Level [tonic] [`FlightServiceClient`](gen::flight_service_client::FlightServiceClient). pub mod flight_service_client { - use super::gen; - pub use gen::flight_service_client::FlightServiceClient; + use super::r#gen; + pub use r#gen::flight_service_client::FlightServiceClient; } /// Low Level [tonic] [`FlightServiceServer`](gen::flight_service_server::FlightServiceServer) /// and [`FlightService`](gen::flight_service_server::FlightService). pub mod flight_service_server { - use super::gen; - pub use gen::flight_service_server::FlightService; - pub use gen::flight_service_server::FlightServiceServer; + use super::r#gen; + pub use r#gen::flight_service_server::FlightService; + pub use r#gen::flight_service_server::FlightServiceServer; } /// Mid Level [`FlightClient`] @@ -101,27 +101,27 @@ pub mod encode; /// Common error types pub mod error; -pub use gen::Action; -pub use gen::ActionType; -pub use gen::BasicAuth; -pub use gen::CancelFlightInfoRequest; -pub use gen::CancelFlightInfoResult; -pub use gen::CancelStatus; -pub use gen::Criteria; -pub use gen::Empty; -pub use gen::FlightData; -pub use gen::FlightDescriptor; -pub use gen::FlightEndpoint; -pub use gen::FlightInfo; -pub use gen::HandshakeRequest; -pub use gen::HandshakeResponse; -pub use gen::Location; -pub use gen::PollInfo; -pub use gen::PutResult; -pub use gen::RenewFlightEndpointRequest; -pub use gen::Result; -pub use gen::SchemaResult; -pub use gen::Ticket; +pub use r#gen::Action; +pub use r#gen::ActionType; +pub use r#gen::BasicAuth; +pub use r#gen::CancelFlightInfoRequest; +pub use r#gen::CancelFlightInfoResult; +pub use r#gen::CancelStatus; +pub use r#gen::Criteria; +pub use r#gen::Empty; +pub use r#gen::FlightData; +pub use r#gen::FlightDescriptor; +pub use r#gen::FlightEndpoint; +pub use r#gen::FlightInfo; +pub use r#gen::HandshakeRequest; +pub use r#gen::HandshakeResponse; +pub use r#gen::Location; +pub use r#gen::PollInfo; +pub use r#gen::PutResult; +pub use r#gen::RenewFlightEndpointRequest; +pub use r#gen::Result; +pub use r#gen::SchemaResult; +pub use r#gen::Ticket; /// Helper to extract HTTP/gRPC trailers from a tonic stream. mod trailers; diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs index 6791b68b757d..5ced71145f34 100644 --- a/arrow-flight/src/sql/client.rs +++ b/arrow-flight/src/sql/client.rs @@ -28,7 +28,7 @@ use crate::decode::FlightRecordBatchStream; use crate::encode::FlightDataEncoderBuilder; use crate::error::FlightError; use crate::flight_service_client::FlightServiceClient; -use crate::sql::gen::action_end_transaction_request::EndTransaction; +use crate::sql::r#gen::action_end_transaction_request::EndTransaction; use crate::sql::server::{ BEGIN_TRANSACTION, CLOSE_PREPARED_STATEMENT, CREATE_PREPARED_STATEMENT, END_TRANSACTION, }; diff --git a/arrow-flight/src/sql/mod.rs b/arrow-flight/src/sql/mod.rs index 955f1904a6d6..90fd41266dc5 100644 --- a/arrow-flight/src/sql/mod.rs +++ b/arrow-flight/src/sql/mod.rs @@ -44,70 +44,70 @@ use paste::paste; use prost::Message; #[allow(clippy::all)] -mod gen { +mod r#gen { // Since this file is auto-generated, we suppress all warnings #![allow(missing_docs)] include!("arrow.flight.protocol.sql.rs"); } -pub use gen::action_end_transaction_request::EndTransaction; -pub use gen::command_statement_ingest::table_definition_options::{ +pub use r#gen::action_end_transaction_request::EndTransaction; +pub use r#gen::command_statement_ingest::table_definition_options::{ TableExistsOption, TableNotExistOption, }; -pub use gen::command_statement_ingest::TableDefinitionOptions; -pub use gen::ActionBeginSavepointRequest; -pub use gen::ActionBeginSavepointResult; -pub use gen::ActionBeginTransactionRequest; -pub use gen::ActionBeginTransactionResult; -pub use gen::ActionCancelQueryRequest; -pub use gen::ActionCancelQueryResult; -pub use gen::ActionClosePreparedStatementRequest; -pub use gen::ActionCreatePreparedStatementRequest; -pub use gen::ActionCreatePreparedStatementResult; -pub use gen::ActionCreatePreparedSubstraitPlanRequest; -pub use gen::ActionEndSavepointRequest; -pub use gen::ActionEndTransactionRequest; -pub use gen::CommandGetCatalogs; -pub use gen::CommandGetCrossReference; -pub use gen::CommandGetDbSchemas; -pub use gen::CommandGetExportedKeys; -pub use gen::CommandGetImportedKeys; -pub use gen::CommandGetPrimaryKeys; -pub use gen::CommandGetSqlInfo; -pub use gen::CommandGetTableTypes; -pub use gen::CommandGetTables; -pub use gen::CommandGetXdbcTypeInfo; -pub use gen::CommandPreparedStatementQuery; -pub use gen::CommandPreparedStatementUpdate; -pub use gen::CommandStatementIngest; -pub use gen::CommandStatementQuery; -pub use gen::CommandStatementSubstraitPlan; -pub use gen::CommandStatementUpdate; -pub use gen::DoPutPreparedStatementResult; -pub use gen::DoPutUpdateResult; -pub use gen::Nullable; -pub use gen::Searchable; -pub use gen::SqlInfo; -pub use gen::SqlNullOrdering; -pub use gen::SqlOuterJoinsSupportLevel; -pub use gen::SqlSupportedCaseSensitivity; -pub use gen::SqlSupportedElementActions; -pub use gen::SqlSupportedGroupBy; -pub use gen::SqlSupportedPositionedCommands; -pub use gen::SqlSupportedResultSetConcurrency; -pub use gen::SqlSupportedResultSetType; -pub use gen::SqlSupportedSubqueries; -pub use gen::SqlSupportedTransaction; -pub use gen::SqlSupportedTransactions; -pub use gen::SqlSupportedUnions; -pub use gen::SqlSupportsConvert; -pub use gen::SqlTransactionIsolationLevel; -pub use gen::SubstraitPlan; -pub use gen::SupportedSqlGrammar; -pub use gen::TicketStatementQuery; -pub use gen::UpdateDeleteRules; -pub use gen::XdbcDataType; -pub use gen::XdbcDatetimeSubcode; +pub use r#gen::command_statement_ingest::TableDefinitionOptions; +pub use r#gen::ActionBeginSavepointRequest; +pub use r#gen::ActionBeginSavepointResult; +pub use r#gen::ActionBeginTransactionRequest; +pub use r#gen::ActionBeginTransactionResult; +pub use r#gen::ActionCancelQueryRequest; +pub use r#gen::ActionCancelQueryResult; +pub use r#gen::ActionClosePreparedStatementRequest; +pub use r#gen::ActionCreatePreparedStatementRequest; +pub use r#gen::ActionCreatePreparedStatementResult; +pub use r#gen::ActionCreatePreparedSubstraitPlanRequest; +pub use r#gen::ActionEndSavepointRequest; +pub use r#gen::ActionEndTransactionRequest; +pub use r#gen::CommandGetCatalogs; +pub use r#gen::CommandGetCrossReference; +pub use r#gen::CommandGetDbSchemas; +pub use r#gen::CommandGetExportedKeys; +pub use r#gen::CommandGetImportedKeys; +pub use r#gen::CommandGetPrimaryKeys; +pub use r#gen::CommandGetSqlInfo; +pub use r#gen::CommandGetTableTypes; +pub use r#gen::CommandGetTables; +pub use r#gen::CommandGetXdbcTypeInfo; +pub use r#gen::CommandPreparedStatementQuery; +pub use r#gen::CommandPreparedStatementUpdate; +pub use r#gen::CommandStatementIngest; +pub use r#gen::CommandStatementQuery; +pub use r#gen::CommandStatementSubstraitPlan; +pub use r#gen::CommandStatementUpdate; +pub use r#gen::DoPutPreparedStatementResult; +pub use r#gen::DoPutUpdateResult; +pub use r#gen::Nullable; +pub use r#gen::Searchable; +pub use r#gen::SqlInfo; +pub use r#gen::SqlNullOrdering; +pub use r#gen::SqlOuterJoinsSupportLevel; +pub use r#gen::SqlSupportedCaseSensitivity; +pub use r#gen::SqlSupportedElementActions; +pub use r#gen::SqlSupportedGroupBy; +pub use r#gen::SqlSupportedPositionedCommands; +pub use r#gen::SqlSupportedResultSetConcurrency; +pub use r#gen::SqlSupportedResultSetType; +pub use r#gen::SqlSupportedSubqueries; +pub use r#gen::SqlSupportedTransaction; +pub use r#gen::SqlSupportedTransactions; +pub use r#gen::SqlSupportedUnions; +pub use r#gen::SqlSupportsConvert; +pub use r#gen::SqlTransactionIsolationLevel; +pub use r#gen::SubstraitPlan; +pub use r#gen::SupportedSqlGrammar; +pub use r#gen::TicketStatementQuery; +pub use r#gen::UpdateDeleteRules; +pub use r#gen::XdbcDataType; +pub use r#gen::XdbcDatetimeSubcode; pub mod client; pub mod metadata; diff --git a/arrow-flight/src/sql/server.rs b/arrow-flight/src/sql/server.rs index da5dc9945eee..a8746425dba0 100644 --- a/arrow-flight/src/sql/server.rs +++ b/arrow-flight/src/sql/server.rs @@ -34,7 +34,7 @@ use super::{ SqlInfo, TicketStatementQuery, }; use crate::{ - flight_service_server::FlightService, gen::PollInfo, Action, ActionType, Criteria, Empty, + flight_service_server::FlightService, r#gen::PollInfo, Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket, }; @@ -628,7 +628,7 @@ where self.get_flight_info_catalogs(token, request).await } Command::CommandGetDbSchemas(token) => { - return self.get_flight_info_schemas(token, request).await + return self.get_flight_info_schemas(token, request).await; } Command::CommandGetTables(token) => self.get_flight_info_tables(token, request).await, Command::CommandGetTableTypes(token) => { @@ -879,7 +879,7 @@ where let stmt = self .do_action_create_prepared_statement(cmd, request) .await?; - let output = futures::stream::iter(vec![Ok(super::super::gen::Result { + let output = futures::stream::iter(vec![Ok(super::super::r#gen::Result { body: stmt.as_any().encode_to_vec().into(), })]); return Ok(Response::new(Box::pin(output))); @@ -921,7 +921,7 @@ where Status::invalid_argument("Unable to unpack ActionBeginTransactionRequest.") })?; let stmt = self.do_action_begin_transaction(cmd, request).await?; - let output = futures::stream::iter(vec![Ok(super::super::gen::Result { + let output = futures::stream::iter(vec![Ok(super::super::r#gen::Result { body: stmt.as_any().encode_to_vec().into(), })]); return Ok(Response::new(Box::pin(output))); @@ -946,7 +946,7 @@ where Status::invalid_argument("Unable to unpack ActionBeginSavepointRequest.") })?; let stmt = self.do_action_begin_savepoint(cmd, request).await?; - let output = futures::stream::iter(vec![Ok(super::super::gen::Result { + let output = futures::stream::iter(vec![Ok(super::super::r#gen::Result { body: stmt.as_any().encode_to_vec().into(), })]); return Ok(Response::new(Box::pin(output))); @@ -971,7 +971,7 @@ where Status::invalid_argument("Unable to unpack ActionCancelQueryRequest.") })?; let stmt = self.do_action_cancel_query(cmd, request).await?; - let output = futures::stream::iter(vec![Ok(super::super::gen::Result { + let output = futures::stream::iter(vec![Ok(super::super::r#gen::Result { body: stmt.as_any().encode_to_vec().into(), })]); return Ok(Response::new(Box::pin(output))); diff --git a/arrow-integration-test/src/field.rs b/arrow-integration-test/src/field.rs index 4b896ed391be..8b0ca264e02e 100644 --- a/arrow-integration-test/src/field.rs +++ b/arrow-integration-test/src/field.rs @@ -142,7 +142,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result { Some(_) => { return Err(ArrowError::ParseError( "Field 'children' must be an array".to_string(), - )) + )); } None => { return Err(ArrowError::ParseError( @@ -158,7 +158,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result { Some(_) => { return Err(ArrowError::ParseError( "Field 'children' must be an array".to_string(), - )) + )); } None => { return Err(ArrowError::ParseError( @@ -177,15 +177,15 @@ pub fn field_from_json(json: &serde_json::Value) -> Result { } t => { return Err(ArrowError::ParseError(format!( - "Map children should be a struct with 2 fields, found {t:?}" - ))) + "Map children should be a struct with 2 fields, found {t:?}" + ))); } } } Some(_) => { return Err(ArrowError::ParseError( "Field 'children' must be an array with 1 element".to_string(), - )) + )); } None => { return Err(ArrowError::ParseError( @@ -207,7 +207,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result { Some(_) => { return Err(ArrowError::ParseError( "Field 'children' must be an array".to_string(), - )) + )); } None => { return Err(ArrowError::ParseError( @@ -275,7 +275,7 @@ pub fn field_to_json(field: &Field) -> serde_json::Value { }; match field.data_type() { - DataType::Dictionary(ref index_type, ref value_type) => { + DataType::Dictionary(index_type, value_type) => { #[allow(deprecated)] let dict_id = field.dict_id().unwrap(); serde_json::json!({ diff --git a/arrow-integration-test/src/schema.rs b/arrow-integration-test/src/schema.rs index 512f0aed8e54..7777c48c1f4b 100644 --- a/arrow-integration-test/src/schema.rs +++ b/arrow-integration-test/src/schema.rs @@ -40,7 +40,7 @@ pub fn schema_from_json(json: &serde_json::Value) -> Result { _ => { return Err(ArrowError::ParseError( "Schema fields should be an array".to_string(), - )) + )); } }; diff --git a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs index d608a4753723..798f25a23054 100644 --- a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs +++ b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs @@ -378,7 +378,7 @@ async fn save_uploaded_chunks( ipc::MessageHeader::Schema => { return Err(Status::internal( "Not expecting a schema when messages are read", - )) + )); } ipc::MessageHeader::RecordBatch => { send_app_metadata(&mut response_tx, &data.app_metadata).await?; diff --git a/arrow-integration-testing/src/lib.rs b/arrow-integration-testing/src/lib.rs index 10512a00eb9d..533d7d6464d8 100644 --- a/arrow-integration-testing/src/lib.rs +++ b/arrow-integration-testing/src/lib.rs @@ -261,7 +261,7 @@ fn result_to_c_error(result: &std::result::Result /// # Safety /// /// The pointer is assumed to have been obtained using CString::into_raw. -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn arrow_rs_free_error(c_error: *mut c_char) { if !c_error.is_null() { drop(unsafe { CString::from_raw(c_error) }); @@ -269,7 +269,7 @@ pub unsafe extern "C" fn arrow_rs_free_error(c_error: *mut c_char) { } /// A C-ABI for exporting an Arrow schema from a JSON file -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn arrow_rs_cdata_integration_export_schema_from_json( c_json_name: *const c_char, out: *mut FFI_ArrowSchema, @@ -279,7 +279,7 @@ pub extern "C" fn arrow_rs_cdata_integration_export_schema_from_json( } /// A C-ABI to compare an Arrow schema against a JSON file -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn arrow_rs_cdata_integration_import_schema_and_compare_to_json( c_json_name: *const c_char, c_schema: *mut FFI_ArrowSchema, @@ -289,7 +289,7 @@ pub extern "C" fn arrow_rs_cdata_integration_import_schema_and_compare_to_json( } /// A C-ABI for exporting a RecordBatch from a JSON file -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn arrow_rs_cdata_integration_export_batch_from_json( c_json_name: *const c_char, batch_num: c_int, @@ -300,7 +300,7 @@ pub extern "C" fn arrow_rs_cdata_integration_export_batch_from_json( } /// A C-ABI to compare a RecordBatch against a JSON file -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn arrow_rs_cdata_integration_import_batch_and_compare_to_json( c_json_name: *const c_char, batch_num: c_int, diff --git a/arrow-ipc/benches/ipc_reader.rs b/arrow-ipc/benches/ipc_reader.rs index ab77449eeb7d..81ed28d8e7da 100644 --- a/arrow-ipc/benches/ipc_reader.rs +++ b/arrow-ipc/benches/ipc_reader.rs @@ -240,8 +240,10 @@ impl IPCBufferDecoder { } unsafe fn with_skip_validation(mut self, skip_validation: bool) -> Self { - self.decoder = self.decoder.with_skip_validation(skip_validation); - self + unsafe { + self.decoder = self.decoder.with_skip_validation(skip_validation); + self + } } fn num_batches(&self) -> usize { diff --git a/arrow-ipc/regen.sh b/arrow-ipc/regen.sh index b368bd1bc7cc..676ec9933c55 100755 --- a/arrow-ipc/regen.sh +++ b/arrow-ipc/regen.sh @@ -88,9 +88,9 @@ use flatbuffers::EndianScalar; HEREDOC ) -SCHEMA_IMPORT="\nuse crate::gen::Schema::*;" -SPARSE_TENSOR_IMPORT="\nuse crate::gen::SparseTensor::*;" -TENSOR_IMPORT="\nuse crate::gen::Tensor::*;" +SCHEMA_IMPORT="\nuse crate::r#gen::Schema::*;" +SPARSE_TENSOR_IMPORT="\nuse crate::r#gen::SparseTensor::*;" +TENSOR_IMPORT="\nuse crate::r#gen::Tensor::*;" # For flatbuffer(1.12.0+), remove: use crate::${name}::\*; names=("File" "Message" "Schema" "SparseTensor" "Tensor") @@ -129,7 +129,7 @@ for f in `ls *.rs`; do sed --in-place='' 's/TYPE__/TYPE_/g' $f # Some files need prefixes - if [[ $f == "File.rs" ]]; then + if [[ $f == "File.rs" ]]; then # Now prefix the file with the static contents echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f elif [[ $f == "Message.rs" ]]; then diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs index af0bdb1df3eb..8d492d8b594f 100644 --- a/arrow-ipc/src/convert.rs +++ b/arrow-ipc/src/convert.rs @@ -279,9 +279,9 @@ pub fn try_schema_from_ipc_buffer(buffer: &[u8]) -> Result { if buffer.len() < len as usize { let actual_len = buffer.len(); - return Err(ArrowError::ParseError( - format!("The buffer length ({actual_len}) is less than the encapsulated message's reported length ({len})") - )); + return Err(ArrowError::ParseError(format!( + "The buffer length ({actual_len}) is less than the encapsulated message's reported length ({len})" + ))); } let msg = crate::root_as_message(buffer) @@ -760,7 +760,7 @@ pub(crate) fn get_fb_field_type<'a>( children: Some(fbb.create_vector(&empty_fields[..])), } } - List(ref list_type) => { + List(list_type) => { let child = build_field(fbb, dictionary_tracker, list_type); FBFieldType { type_type: crate::Type::List, @@ -769,7 +769,7 @@ pub(crate) fn get_fb_field_type<'a>( } } ListView(_) | LargeListView(_) => unimplemented!("ListView/LargeListView not implemented"), - LargeList(ref list_type) => { + LargeList(list_type) => { let child = build_field(fbb, dictionary_tracker, list_type); FBFieldType { type_type: crate::Type::LargeList, @@ -777,7 +777,7 @@ pub(crate) fn get_fb_field_type<'a>( children: Some(fbb.create_vector(&[child])), } } - FixedSizeList(ref list_type, len) => { + FixedSizeList(list_type, len) => { let child = build_field(fbb, dictionary_tracker, list_type); let mut builder = crate::FixedSizeListBuilder::new(fbb); builder.add_listSize(*len); diff --git a/arrow-ipc/src/gen/File.rs b/arrow-ipc/src/gen/File.rs index 427cf75de096..ab2273614759 100644 --- a/arrow-ipc/src/gen/File.rs +++ b/arrow-ipc/src/gen/File.rs @@ -18,7 +18,7 @@ #![allow(dead_code)] #![allow(unused_imports)] -use crate::gen::Schema::*; +use crate::r#gen::Schema::*; use flatbuffers::EndianScalar; use std::{cmp::Ordering, mem}; // automatically generated by the FlatBuffers compiler, do not modify @@ -49,21 +49,26 @@ impl<'a> flatbuffers::Follow<'a> for Block { type Inner = &'a Block; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - <&'a Block>::follow(buf, loc) + unsafe { <&'a Block>::follow(buf, loc) } } } impl<'a> flatbuffers::Follow<'a> for &'a Block { type Inner = &'a Block; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - flatbuffers::follow_cast_ref::(buf, loc) + unsafe { flatbuffers::follow_cast_ref::(buf, loc) } } } impl<'b> flatbuffers::Push for Block { type Output = Block; #[inline] unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { - let src = ::core::slice::from_raw_parts(self as *const Block as *const u8, Self::size()); + let src = unsafe { + ::core::slice::from_raw_parts( + self as *const Block as *const u8, + ::size(), + ) + }; dst.copy_from_slice(src); } #[inline] @@ -200,7 +205,7 @@ impl<'a> flatbuffers::Follow<'a> for Footer<'a> { #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { Self { - _tab: flatbuffers::Table::new(buf, loc), + _tab: unsafe { flatbuffers::Table::new(buf, loc) }, } } } @@ -470,14 +475,14 @@ pub fn size_prefixed_root_as_footer_with_opts<'b, 'o>( /// # Safety /// Callers must trust the given bytes do indeed contain a valid `Footer`. pub unsafe fn root_as_footer_unchecked(buf: &[u8]) -> Footer { - flatbuffers::root_unchecked::