Skip to content

Commit 2d900a4

Browse files
authored
Migrate parquet-variant-compute to Rust 2024 (#8511)
# Which issue does this PR close? - Contribute to #6827 # Rationale for this change Splitting up #8227. # What changes are included in this PR? Migrate `parquet-variant-compute` to Rust 2024 # Are these changes tested? CI # Are there any user-facing changes? Yes
1 parent f88921c commit 2d900a4

File tree

9 files changed

+51
-45
lines changed

9 files changed

+51
-45
lines changed

parquet-variant-compute/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ homepage = { workspace = true }
2626
repository = { workspace = true }
2727
authors = { workspace = true }
2828
keywords = ["arrow", "parquet", "variant"]
29-
edition = { workspace = true }
29+
edition = "2024"
3030
rust-version = { workspace = true }
3131

3232

parquet-variant-compute/benches/variant_kernels.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717

1818
use arrow::array::{Array, ArrayRef, StringArray};
1919
use arrow::util::test_util::seedable_rng;
20-
use criterion::{criterion_group, criterion_main, Criterion};
20+
use criterion::{Criterion, criterion_group, criterion_main};
2121
use parquet_variant::{Variant, VariantBuilder};
22-
use parquet_variant_compute::variant_get::{variant_get, GetOptions};
23-
use parquet_variant_compute::{json_to_variant, VariantArray, VariantArrayBuilder};
24-
use rand::distr::Alphanumeric;
25-
use rand::rngs::StdRng;
22+
use parquet_variant_compute::variant_get::{GetOptions, variant_get};
23+
use parquet_variant_compute::{VariantArray, VariantArrayBuilder, json_to_variant};
2624
use rand::Rng;
2725
use rand::SeedableRng;
26+
use rand::distr::Alphanumeric;
27+
use rand::rngs::StdRng;
2828
use std::fmt::Write;
2929
use std::sync::Arc;
3030
fn benchmark_batch_json_string_to_variant(c: &mut Criterion) {

parquet-variant-compute/src/arrow_to_variant.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,25 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::type_conversion::{decimal_to_variant_decimal, CastOptions};
18+
use crate::type_conversion::{CastOptions, decimal_to_variant_decimal};
1919
use arrow::array::{
2020
Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
2121
GenericStringArray, OffsetSizeTrait, PrimitiveArray,
2222
};
2323
use arrow::compute::kernels::cast;
2424
use arrow::datatypes::{
2525
ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType, Date32Type,
26-
Date64Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
26+
Date64Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
2727
RunEndIndexType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
2828
Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType,
29-
TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
29+
TimestampNanosecondType, TimestampSecondType, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
3030
};
3131
use arrow::temporal_conversions::{as_date, as_datetime, as_time};
3232
use arrow_schema::{ArrowError, DataType, TimeUnit};
3333
use chrono::{DateTime, TimeZone, Utc};
3434
use parquet_variant::{
35-
ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal16, VariantDecimal4,
36-
VariantDecimal8,
35+
ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
36+
VariantDecimal16,
3737
};
3838
use std::collections::HashMap;
3939
use std::ops::Range;
@@ -213,7 +213,7 @@ pub(crate) fn make_arrow_to_variant_row_builder<'a>(
213213
_ => {
214214
return Err(ArrowError::CastError(format!(
215215
"Unsupported Time32 unit: {time_unit:?}"
216-
)))
216+
)));
217217
}
218218
},
219219
DataType::Time64(time_unit) => match time_unit {
@@ -226,15 +226,15 @@ pub(crate) fn make_arrow_to_variant_row_builder<'a>(
226226
_ => {
227227
return Err(ArrowError::CastError(format!(
228228
"Unsupported Time64 unit: {time_unit:?}"
229-
)))
229+
)));
230230
}
231231
},
232232
DataType::Duration(_) | DataType::Interval(_) => {
233233
return Err(ArrowError::InvalidArgumentError(
234234
"Casting duration/interval types to Variant is not supported. \
235235
The Variant format does not define duration/interval types."
236236
.to_string(),
237-
))
237+
));
238238
}
239239
DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
240240
DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),

parquet-variant-compute/src/cast_to_variant.rs

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -84,33 +84,34 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
8484
mod tests {
8585
use super::*;
8686
use arrow::array::{
87-
ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array,
88-
Decimal256Array, Decimal32Array, Decimal64Array, DictionaryArray, DurationMicrosecondArray,
89-
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray,
90-
FixedSizeBinaryBuilder, FixedSizeListBuilder, Float16Array, Float32Array, Float64Array,
91-
GenericByteBuilder, GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array,
92-
IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeListArray,
93-
LargeListViewBuilder, LargeStringArray, ListArray, ListViewBuilder, MapArray, NullArray,
94-
StringArray, StringRunBuilder, StringViewArray, StructArray, Time32MillisecondArray,
95-
Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
96-
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
97-
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, UnionArray,
87+
ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal32Array,
88+
Decimal64Array, Decimal128Array, Decimal256Array, DictionaryArray,
89+
DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
90+
DurationSecondArray, FixedSizeBinaryBuilder, FixedSizeListBuilder, Float16Array,
91+
Float32Array, Float64Array, GenericByteBuilder, GenericByteViewBuilder, Int8Array,
92+
Int16Array, Int32Array, Int64Array, IntervalDayTimeArray, IntervalMonthDayNanoArray,
93+
IntervalYearMonthArray, LargeListArray, LargeListViewBuilder, LargeStringArray, ListArray,
94+
ListViewBuilder, MapArray, NullArray, StringArray, StringRunBuilder, StringViewArray,
95+
StructArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
96+
Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
97+
TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array,
98+
UInt64Array, UnionArray,
9899
};
99100
use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
100101
use arrow::datatypes::{
101-
i256, BinaryType, BinaryViewType, Date32Type, Date64Type, Int32Type, Int64Type, Int8Type,
102-
IntervalDayTime, IntervalMonthDayNano, LargeBinaryType,
102+
BinaryType, BinaryViewType, Date32Type, Date64Type, Int8Type, Int32Type, Int64Type,
103+
IntervalDayTime, IntervalMonthDayNano, LargeBinaryType, i256,
103104
};
104105
use arrow::temporal_conversions::timestamp_s_to_datetime;
105-
use arrow_schema::{DataType, Field, Fields, UnionFields};
106106
use arrow_schema::{
107-
DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
107+
DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, DECIMAL128_MAX_PRECISION,
108108
};
109+
use arrow_schema::{DataType, Field, Fields, UnionFields};
109110
use chrono::{DateTime, NaiveDate, NaiveTime};
110111
use half::f16;
111112
use parquet_variant::{
112-
Variant, VariantBuilder, VariantBuilderExt, VariantDecimal16, VariantDecimal4,
113-
VariantDecimal8,
113+
Variant, VariantBuilder, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
114+
VariantDecimal16,
114115
};
115116
use std::{sync::Arc, vec};
116117

parquet-variant-compute/src/shred_variant.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
2020
use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
2121
use crate::variant_to_arrow::{
22-
make_primitive_variant_to_arrow_row_builder, PrimitiveVariantToArrowRowBuilder,
22+
PrimitiveVariantToArrowRowBuilder, make_primitive_variant_to_arrow_row_builder,
2323
};
2424
use crate::{VariantArray, VariantValueArrayBuilder};
2525
use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder};

parquet-variant-compute/src/to_json.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ pub fn variant_to_json(input: &ArrayRef) -> Result<StringArray, ArrowError> {
4949
_ => {
5050
return Err(ArrowError::CastError(
5151
"Expected StructArray with known fields".into(),
52-
))
52+
));
5353
}
5454
}
5555

parquet-variant-compute/src/unshred_variant.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ use arrow::array::{
2525
};
2626
use arrow::buffer::NullBuffer;
2727
use arrow::datatypes::{
28-
ArrowPrimitiveType, DataType, Date32Type, Float32Type, Float64Type, Int16Type, Int32Type,
29-
Int64Type, Int8Type, Time64MicrosecondType, TimeUnit, TimestampMicrosecondType,
28+
ArrowPrimitiveType, DataType, Date32Type, Float32Type, Float64Type, Int8Type, Int16Type,
29+
Int32Type, Int64Type, Time64MicrosecondType, TimeUnit, TimestampMicrosecondType,
3030
TimestampNanosecondType,
3131
};
3232
use arrow::error::{ArrowError, Result};

parquet-variant-compute/src/variant_array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
2222
use arrow::buffer::NullBuffer;
2323
use arrow::compute::cast;
2424
use arrow::datatypes::{
25-
Date32Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
25+
Date32Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
2626
TimestampMicrosecondType, TimestampNanosecondType,
2727
};
2828
use arrow_schema::extension::ExtensionType;

parquet-variant-compute/src/variant_get.rs

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ use arrow::{
2323
use arrow_schema::{ArrowError, DataType, FieldRef};
2424
use parquet_variant::{VariantPath, VariantPathElement};
2525

26+
use crate::VariantArray;
2627
use crate::variant_array::BorrowedShreddingState;
2728
use crate::variant_to_arrow::make_variant_to_arrow_row_builder;
28-
use crate::VariantArray;
2929

3030
use arrow::array::AsArray;
3131
use std::sync::Arc;
@@ -295,20 +295,20 @@ impl<'a> GetOptions<'a> {
295295
mod test {
296296
use std::sync::Arc;
297297

298-
use super::{variant_get, GetOptions};
298+
use super::{GetOptions, variant_get};
299+
use crate::VariantArray;
299300
use crate::json_to_variant;
300301
use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
301-
use crate::VariantArray;
302302
use arrow::array::{
303303
Array, ArrayRef, AsArray, BinaryViewArray, Date32Array, Float32Array, Float64Array,
304-
Int16Array, Int32Array, Int64Array, Int8Array, StringArray, StructArray,
304+
Int8Array, Int16Array, Int32Array, Int64Array, StringArray, StructArray,
305305
};
306306
use arrow::buffer::NullBuffer;
307307
use arrow::compute::CastOptions;
308308
use arrow::datatypes::DataType::{Int16, Int32, Int64};
309309
use arrow_schema::{DataType, Field, FieldRef, Fields};
310310
use chrono::DateTime;
311-
use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES};
311+
use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, Variant, VariantPath};
312312

313313
fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
314314
// Create input array from JSON string
@@ -602,7 +602,10 @@ mod test {
602602

603603
let err = variant_get(&array, options).unwrap_err();
604604
// TODO make this error message nicer (not Debug format)
605-
assert_eq!(err.to_string(), "Cast error: Failed to extract primitive of type Int32 from variant ShortString(ShortString(\"n/a\")) at path VariantPath([])");
605+
assert_eq!(
606+
err.to_string(),
607+
"Cast error: Failed to extract primitive of type Int32 from variant ShortString(ShortString(\"n/a\")) at path VariantPath([])"
608+
);
606609
}
607610

608611
/// Perfect Shredding: extract the typed value as a VariantArray
@@ -1926,9 +1929,11 @@ mod test {
19261929
assert!(result.is_err());
19271930
let error = result.unwrap_err();
19281931
assert!(matches!(error, ArrowError::CastError(_)));
1929-
assert!(error
1930-
.to_string()
1931-
.contains("Cannot access field 'nonexistent_field' on non-struct type"));
1932+
assert!(
1933+
error
1934+
.to_string()
1935+
.contains("Cannot access field 'nonexistent_field' on non-struct type")
1936+
);
19321937
}
19331938

19341939
#[test]

0 commit comments

Comments
 (0)