Skip to content

Commit 883380b

Browse files
authored
Migrate arrow-schema to Rust 2024 (#8489)
# Which issue does this PR close? - Contribute to #6827 # Rationale for this change Splitting up #8227. # What changes are included in this PR? Migrate `arrow-schema` to Rust 2024 # Are these changes tested? CI # Are there any user-facing changes? Yes
1 parent 422da15 commit 883380b

File tree

13 files changed

+225
-159
lines changed

13 files changed

+225
-159
lines changed

arrow-schema/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ authors = { workspace = true }
2525
license = { workspace = true }
2626
keywords = { workspace = true }
2727
include = { workspace = true }
28-
edition = { workspace = true }
28+
edition = "2024"
2929
rust-version = { workspace = true }
3030

3131
[lib]

arrow-schema/src/datatype_parse.rs

Lines changed: 63 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ pub(crate) fn parse_data_type(val: &str) -> ArrowResult<DataType> {
2626
type ArrowResult<T> = Result<T, ArrowError>;
2727

2828
fn make_error(val: &str, msg: &str) -> ArrowError {
29-
let msg = format!("Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error {msg}" );
29+
let msg = format!(
30+
"Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error {msg}"
31+
);
3032
ArrowError::ParseError(msg)
3133
}
3234

@@ -246,7 +248,7 @@ impl<'a> Parser<'a> {
246248
return Err(make_error(
247249
self.val,
248250
&format!("finding IntervalUnit for Interval, got {tok}"),
249-
))
251+
));
250252
}
251253
};
252254
self.expect_token(Token::RParen)?;
@@ -328,7 +330,7 @@ impl<'a> Parser<'a> {
328330
return Err(make_error(
329331
self.val,
330332
&format!("Expected a quoted string for a field name; got {tok:?}"),
331-
))
333+
));
332334
}
333335
};
334336
self.expect_token(Token::Colon)?;
@@ -345,8 +347,10 @@ impl<'a> Parser<'a> {
345347
tok => {
346348
return Err(make_error(
347349
self.val,
348-
&format!("Unexpected token while parsing Struct fields. Expected ',' or ')', but got '{tok}'"),
349-
))
350+
&format!(
351+
"Unexpected token while parsing Struct fields. Expected ',' or ')', but got '{tok}'"
352+
),
353+
));
350354
}
351355
}
352356
}
@@ -841,18 +845,12 @@ mod test {
841845
("", "Error finding next token"),
842846
("null", "Unsupported type 'null'"),
843847
("Nu", "Unsupported type 'Nu'"),
844-
(
845-
r#"Timestamp(ns, +00:00)"#,
846-
"Error unknown token: +00",
847-
),
848+
(r#"Timestamp(ns, +00:00)"#, "Error unknown token: +00"),
848849
(
849850
r#"Timestamp(ns, "+00:00)"#,
850851
r#"Unterminated string at: "+00:00)"#,
851852
),
852-
(
853-
r#"Timestamp(ns, "")"#,
854-
r#"empty strings aren't allowed"#,
855-
),
853+
(r#"Timestamp(ns, "")"#, r#"empty strings aren't allowed"#),
856854
(
857855
r#"Timestamp(ns, "+00:00"")"#,
858856
r#"Parser error: Unterminated string at: ")"#,
@@ -864,22 +862,58 @@ mod test {
864862
),
865863
("Int32, ", "trailing content after parsing 'Int32'"),
866864
("Int32(3), ", "trailing content after parsing 'Int32'"),
867-
("FixedSizeBinary(Int32), ", "Error finding i64 for FixedSizeBinary, got 'Int32'"),
868-
("FixedSizeBinary(3.0), ", "Error parsing 3.0 as integer: invalid digit found in string"),
865+
(
866+
"FixedSizeBinary(Int32), ",
867+
"Error finding i64 for FixedSizeBinary, got 'Int32'",
868+
),
869+
(
870+
"FixedSizeBinary(3.0), ",
871+
"Error parsing 3.0 as integer: invalid digit found in string",
872+
),
869873
// too large for i32
870-
("FixedSizeBinary(4000000000), ", "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted"),
874+
(
875+
"FixedSizeBinary(4000000000), ",
876+
"Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted",
877+
),
871878
// can't have negative precision
872-
("Decimal32(-3, 5)", "Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted"),
873-
("Decimal64(-3, 5)", "Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted"),
874-
("Decimal128(-3, 5)", "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted"),
875-
("Decimal256(-3, 5)", "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted"),
876-
("Decimal32(3, 500)", "Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted"),
877-
("Decimal64(3, 500)", "Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted"),
878-
("Decimal128(3, 500)", "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted"),
879-
("Decimal256(3, 500)", "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted"),
879+
(
880+
"Decimal32(-3, 5)",
881+
"Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted",
882+
),
883+
(
884+
"Decimal64(-3, 5)",
885+
"Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted",
886+
),
887+
(
888+
"Decimal128(-3, 5)",
889+
"Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted",
890+
),
891+
(
892+
"Decimal256(-3, 5)",
893+
"Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted",
894+
),
895+
(
896+
"Decimal32(3, 500)",
897+
"Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted",
898+
),
899+
(
900+
"Decimal64(3, 500)",
901+
"Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted",
902+
),
903+
(
904+
"Decimal128(3, 500)",
905+
"Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted",
906+
),
907+
(
908+
"Decimal256(3, 500)",
909+
"Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted",
910+
),
880911
("Struct(f1 Int64)", "Error unknown token: f1"),
881912
("Struct(\"f1\" Int64)", "Expected ':'"),
882-
("Struct(\"f1\": )", "Error finding next type, got unexpected ')'"),
913+
(
914+
"Struct(\"f1\": )",
915+
"Error finding next type, got unexpected ')'",
916+
),
883917
];
884918

885919
for (data_type_string, expected_message) in cases {
@@ -906,6 +940,9 @@ mod test {
906940
fn parse_error_type() {
907941
let err = parse_data_type("foobar").unwrap_err();
908942
assert!(matches!(err, ArrowError::ParseError(_)));
909-
assert_eq!(err.to_string(), "Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error unknown token: foobar");
943+
assert_eq!(
944+
err.to_string(),
945+
"Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error unknown token: foobar"
946+
);
910947
}
911948
}

arrow-schema/src/extension/canonical/bool8.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
//!
2020
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#bit-boolean>
2121
22-
use crate::{extension::ExtensionType, ArrowError, DataType};
22+
use crate::{ArrowError, DataType, extension::ExtensionType};
2323

2424
/// The extension type for `8-bit Boolean`.
2525
///
@@ -75,8 +75,8 @@ mod tests {
7575
#[cfg(feature = "canonical_extension_types")]
7676
use crate::extension::CanonicalExtensionType;
7777
use crate::{
78-
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
7978
Field,
79+
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
8080
};
8181

8282
use super::*;

arrow-schema/src/extension/canonical/fixed_shape_tensor.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
2222
use serde::{Deserialize, Serialize};
2323

24-
use crate::{extension::ExtensionType, ArrowError, DataType};
24+
use crate::{ArrowError, DataType, extension::ExtensionType};
2525

2626
/// The extension type for fixed shape tensor.
2727
///
@@ -297,8 +297,8 @@ mod tests {
297297
#[cfg(feature = "canonical_extension_types")]
298298
use crate::extension::CanonicalExtensionType;
299299
use crate::{
300-
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
301300
Field,
301+
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
302302
};
303303

304304
use super::*;

arrow-schema/src/extension/canonical/json.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
2222
use serde::{Deserialize, Serialize};
2323

24-
use crate::{extension::ExtensionType, ArrowError, DataType};
24+
use crate::{ArrowError, DataType, extension::ExtensionType};
2525

2626
/// The extension type for `JSON`.
2727
///
@@ -109,8 +109,8 @@ mod tests {
109109
#[cfg(feature = "canonical_extension_types")]
110110
use crate::extension::CanonicalExtensionType;
111111
use crate::{
112-
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
113112
Field,
113+
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
114114
};
115115

116116
use super::*;

arrow-schema/src/extension/canonical/mod.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,20 +87,28 @@ impl TryFrom<&Field> for CanonicalExtensionType {
8787
match value.extension_type_name() {
8888
// An extension type name with an `arrow.` prefix
8989
Some(name) if name.starts_with("arrow.") => match name {
90-
FixedShapeTensor::NAME => value.try_extension_type::<FixedShapeTensor>().map(Into::into),
91-
VariableShapeTensor::NAME => value.try_extension_type::<VariableShapeTensor>().map(Into::into),
90+
FixedShapeTensor::NAME => value
91+
.try_extension_type::<FixedShapeTensor>()
92+
.map(Into::into),
93+
VariableShapeTensor::NAME => value
94+
.try_extension_type::<VariableShapeTensor>()
95+
.map(Into::into),
9296
Json::NAME => value.try_extension_type::<Json>().map(Into::into),
9397
Uuid::NAME => value.try_extension_type::<Uuid>().map(Into::into),
9498
Opaque::NAME => value.try_extension_type::<Opaque>().map(Into::into),
9599
Bool8::NAME => value.try_extension_type::<Bool8>().map(Into::into),
96-
_ => Err(ArrowError::InvalidArgumentError(format!("Unsupported canonical extension type: {name}"))),
100+
_ => Err(ArrowError::InvalidArgumentError(format!(
101+
"Unsupported canonical extension type: {name}"
102+
))),
97103
},
98104
// Name missing the expected prefix
99105
Some(name) => Err(ArrowError::InvalidArgumentError(format!(
100106
"Field extension type name mismatch, expected a name with an `arrow.` prefix, found {name}"
101107
))),
102108
// Name missing
103-
None => Err(ArrowError::InvalidArgumentError("Field extension type name missing".to_owned())),
109+
None => Err(ArrowError::InvalidArgumentError(
110+
"Field extension type name missing".to_owned(),
111+
)),
104112
}
105113
}
106114
}

arrow-schema/src/extension/canonical/opaque.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
2222
use serde::{Deserialize, Serialize};
2323

24-
use crate::{extension::ExtensionType, ArrowError, DataType};
24+
use crate::{ArrowError, DataType, extension::ExtensionType};
2525

2626
/// The extension type for `Opaque`.
2727
///
@@ -135,8 +135,8 @@ mod tests {
135135
#[cfg(feature = "canonical_extension_types")]
136136
use crate::extension::CanonicalExtensionType;
137137
use crate::{
138-
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
139138
Field,
139+
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
140140
};
141141

142142
use super::*;

arrow-schema/src/extension/canonical/uuid.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
//!
2020
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#uuid>
2121
22-
use crate::{extension::ExtensionType, ArrowError, DataType};
22+
use crate::{ArrowError, DataType, extension::ExtensionType};
2323

2424
/// The extension type for `UUID`.
2525
///
@@ -80,8 +80,8 @@ mod tests {
8080
#[cfg(feature = "canonical_extension_types")]
8181
use crate::extension::CanonicalExtensionType;
8282
use crate::{
83-
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
8483
Field,
84+
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
8585
};
8686

8787
use super::*;

arrow-schema/src/extension/canonical/variable_shape_tensor.rs

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
2222
use serde::{Deserialize, Serialize};
2323

24-
use crate::{extension::ExtensionType, ArrowError, DataType, Field};
24+
use crate::{ArrowError, DataType, Field, extension::ExtensionType};
2525

2626
/// The extension type for `VariableShapeTensor`.
2727
///
@@ -310,16 +310,19 @@ impl ExtensionType for VariableShapeTensor {
310310
DataType::FixedSizeList(_, list_size) => {
311311
let dimensions = usize::try_from(*list_size).expect("conversion failed");
312312
// Make sure the metadata is valid.
313-
let metadata = VariableShapeTensorMetadata::try_new(dimensions, metadata.dim_names, metadata.permutations, metadata.uniform_shape)?;
313+
let metadata = VariableShapeTensorMetadata::try_new(
314+
dimensions,
315+
metadata.dim_names,
316+
metadata.permutations,
317+
metadata.uniform_shape,
318+
)?;
314319
let data_field = &fields[0];
315320
match data_field.data_type() {
316-
DataType::List(field) => {
317-
Ok(Self {
318-
value_type: field.data_type().clone(),
319-
dimensions,
320-
metadata
321-
})
322-
}
321+
DataType::List(field) => Ok(Self {
322+
value_type: field.data_type().clone(),
323+
dimensions,
324+
metadata,
325+
}),
323326
data_type => Err(ArrowError::InvalidArgumentError(format!(
324327
"VariableShapeTensor data type mismatch, expected List for data field, found {data_type}"
325328
))),
@@ -342,8 +345,8 @@ mod tests {
342345
#[cfg(feature = "canonical_extension_types")]
343346
use crate::extension::CanonicalExtensionType;
344347
use crate::{
345-
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
346348
Field,
349+
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
347350
};
348351

349352
use super::*;

0 commit comments

Comments
 (0)