Skip to content

Commit 8baaa8b

Browse files
mbrobbelalamb
andauthored
Add ExtensionType trait and CanonicalExtensionType enum (#5822)
* Add `ExtensionType` for `uuid` and map to parquet logical type * Fix docs * Use an `ExtensionType` trait instead * Fix clippy warnings * Add type annotation to fix build * Update `ExtensionType` trait to support more canonical extension types * Add `Json` support to parquet, schema roundtrip not working yet * Fix some clippy warnings * Add explicit lifetime, resolving elided lifetime to static in assoc const was added in 1.81 * Replace use of deprecated method, mark roundtrip as todo * Add more tests and missing impls * Add missing type annotations * Fix doc warning * Add the feature to the `arrow` crate and use underscores * Update feature name in `parquet` crate * Add experimental warning to `extensions` module docs * Add a note about the associated metadata type * Fix `Json` canonical extension type empty string metadata * Simplify `Bool8::deserialize_metadata` * Use `Empty` instead of `serde_json::Map` in `JsonMetadata` * Use `map_or` instead of `is_some_and` (msrv) --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 43617b2 commit 8baaa8b

File tree

17 files changed

+2350
-12
lines changed

17 files changed

+2350
-12
lines changed

arrow-array/src/array/list_view_array.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -895,8 +895,8 @@ mod tests {
895895
.build()
896896
.unwrap(),
897897
);
898-
assert_eq!(string.value_offsets(), &[]);
899-
assert_eq!(string.value_sizes(), &[]);
898+
assert_eq!(string.value_offsets(), &[] as &[i32; 0]);
899+
assert_eq!(string.value_sizes(), &[] as &[i32; 0]);
900900

901901
let string = LargeListViewArray::from(
902902
ArrayData::builder(DataType::LargeListView(f))
@@ -906,8 +906,8 @@ mod tests {
906906
.unwrap(),
907907
);
908908
assert_eq!(string.len(), 0);
909-
assert_eq!(string.value_offsets(), &[]);
910-
assert_eq!(string.value_sizes(), &[]);
909+
assert_eq!(string.value_offsets(), &[] as &[i64; 0]);
910+
assert_eq!(string.value_sizes(), &[] as &[i64; 0]);
911911
}
912912

913913
#[test]

arrow-schema/Cargo.toml

+9-3
Original file line numberDiff line numberDiff line change
@@ -34,21 +34,27 @@ path = "src/lib.rs"
3434
bench = false
3535

3636
[dependencies]
37-
serde = { version = "1.0", default-features = false, features = ["derive", "std", "rc"], optional = true }
37+
serde = { version = "1.0", default-features = false, features = [
38+
"derive",
39+
"std",
40+
"rc",
41+
], optional = true }
3842
bitflags = { version = "2.0.0", default-features = false, optional = true }
43+
serde_json = { version = "1.0", optional = true }
3944

4045
[features]
46+
canonical_extension_types = ["dep:serde", "dep:serde_json"]
4147
# Enable ffi support
4248
ffi = ["bitflags"]
49+
serde = ["dep:serde"]
4350

4451
[package.metadata.docs.rs]
4552
features = ["ffi"]
4653

4754
[dev-dependencies]
48-
serde_json = "1.0"
4955
bincode = { version = "1.3.3", default-features = false }
5056
criterion = { version = "0.5", default-features = false }
5157

5258
[[bench]]
5359
name = "ffi"
54-
harness = false
60+
harness = false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! 8-bit Boolean
19+
//!
20+
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#bit-boolean>
21+
22+
use crate::{extension::ExtensionType, ArrowError, DataType};
23+
24+
/// The extension type for `8-bit Boolean`.
25+
///
26+
/// Extension name: `arrow.bool8`.
27+
///
28+
/// The storage type of the extension is `Int8` where:
29+
/// - false is denoted by the value 0.
30+
/// - true can be specified using any non-zero value. Preferably 1.
31+
///
32+
/// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#bit-boolean>
33+
#[derive(Debug, Default, Clone, Copy, PartialEq)]
34+
pub struct Bool8;
35+
36+
impl ExtensionType for Bool8 {
37+
const NAME: &'static str = "arrow.bool8";
38+
39+
type Metadata = &'static str;
40+
41+
fn metadata(&self) -> &Self::Metadata {
42+
&""
43+
}
44+
45+
fn serialize_metadata(&self) -> Option<String> {
46+
Some(String::default())
47+
}
48+
49+
fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
50+
if metadata.map_or(false, str::is_empty) {
51+
Ok("")
52+
} else {
53+
Err(ArrowError::InvalidArgumentError(
54+
"Bool8 extension type expects an empty string as metadata".to_owned(),
55+
))
56+
}
57+
}
58+
59+
fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
60+
match data_type {
61+
DataType::Int8 => Ok(()),
62+
data_type => Err(ArrowError::InvalidArgumentError(format!(
63+
"Bool8 data type mismatch, expected Int8, found {data_type}"
64+
))),
65+
}
66+
}
67+
68+
fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
69+
Self.supports_data_type(data_type).map(|_| Self)
70+
}
71+
}
72+
73+
#[cfg(test)]
74+
mod tests {
75+
#[cfg(feature = "canonical_extension_types")]
76+
use crate::extension::CanonicalExtensionType;
77+
use crate::{
78+
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
79+
Field,
80+
};
81+
82+
use super::*;
83+
84+
#[test]
85+
fn valid() -> Result<(), ArrowError> {
86+
let mut field = Field::new("", DataType::Int8, false);
87+
field.try_with_extension_type(Bool8)?;
88+
field.try_extension_type::<Bool8>()?;
89+
#[cfg(feature = "canonical_extension_types")]
90+
assert_eq!(
91+
field.try_canonical_extension_type()?,
92+
CanonicalExtensionType::Bool8(Bool8)
93+
);
94+
95+
Ok(())
96+
}
97+
98+
#[test]
99+
#[should_panic(expected = "Field extension type name missing")]
100+
fn missing_name() {
101+
let field = Field::new("", DataType::Int8, false).with_metadata(
102+
[(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
103+
.into_iter()
104+
.collect(),
105+
);
106+
field.extension_type::<Bool8>();
107+
}
108+
109+
#[test]
110+
#[should_panic(expected = "expected Int8, found Boolean")]
111+
fn invalid_type() {
112+
Field::new("", DataType::Boolean, false).with_extension_type(Bool8);
113+
}
114+
115+
#[test]
116+
#[should_panic(expected = "Bool8 extension type expects an empty string as metadata")]
117+
fn missing_metadata() {
118+
let field = Field::new("", DataType::Int8, false).with_metadata(
119+
[(EXTENSION_TYPE_NAME_KEY.to_owned(), Bool8::NAME.to_owned())]
120+
.into_iter()
121+
.collect(),
122+
);
123+
field.extension_type::<Bool8>();
124+
}
125+
126+
#[test]
127+
#[should_panic(expected = "Bool8 extension type expects an empty string as metadata")]
128+
fn invalid_metadata() {
129+
let field = Field::new("", DataType::Int8, false).with_metadata(
130+
[
131+
(EXTENSION_TYPE_NAME_KEY.to_owned(), Bool8::NAME.to_owned()),
132+
(
133+
EXTENSION_TYPE_METADATA_KEY.to_owned(),
134+
"non-empty".to_owned(),
135+
),
136+
]
137+
.into_iter()
138+
.collect(),
139+
);
140+
field.extension_type::<Bool8>();
141+
}
142+
}

0 commit comments

Comments
 (0)