Skip to content

Commit 12fa7d8

Browse files
committed
use a single trait for extension types
1 parent 6132502 commit 12fa7d8

File tree

3 files changed

+86
-87
lines changed

3 files changed

+86
-87
lines changed

extensions/geo/src/arrow.rs

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use vortex_array::arrow::ArrowArray;
1919
use vortex_array::arrow::compute::{ToArrowArgs, ToArrowKernelRef};
2020
use vortex_array::compute::{InvocationArgs, Kernel, Output, cast};
2121
use vortex_array::{Array, ToCanonical, register_kernel};
22-
use vortex_dtype::arrow::{ArrowMetadata, ArrowMetadataRef, ArrowToDType, ArrowToDTypeRef};
22+
use vortex_dtype::arrow::{TypeConversion, TypeConversionRef};
2323
use vortex_dtype::{DType, ExtDType, PType, register_extension_type};
2424
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
2525

@@ -114,26 +114,25 @@ fn to_arrow_linestring(
114114
pub struct GeoArrowConversion;
115115

116116
// Conversion between Vortex geospatial extension types and GeoArrow extension types.
117-
impl ArrowToDType for GeoArrowConversion {
118-
fn can_convert(&self, field: &Field) -> bool {
119-
// The field needs to support all of our types
120-
if let Some(ext_type) = field.extension_type_name() {
121-
if ext_type == PointType::NAME
122-
|| ext_type == LineStringType::NAME
123-
|| ext_type == PolygonType::NAME
124-
|| ext_type == WkbType::NAME
125-
{
126-
return true;
127-
}
128-
}
129-
false
130-
}
131-
132-
fn to_vortex(&self, field: &Field) -> VortexResult<DType> {
117+
impl TypeConversion for GeoArrowConversion {
118+
fn to_vortex(&self, field: &Field) -> VortexResult<Option<DType>> {
119+
// Validate that the field is one of the supported geospatial
120+
// extension types.
133121
let Some(ext_type) = field.extension_type_name() else {
134-
vortex_bail!("field does not have an extension type")
122+
println!("no extension meta on field {field:?}, bailing");
123+
return Ok(None);
135124
};
136125

126+
println!("ext_type = {}", ext_type);
127+
128+
if ext_type != PointType::NAME
129+
&& ext_type != LineStringType::NAME
130+
&& ext_type != PolygonType::NAME
131+
&& ext_type != WkbType::NAME
132+
{
133+
return Ok(None);
134+
}
135+
137136
macro_rules! dim_from_fields {
138137
($fields:expr) => {{
139138
match $fields.len() {
@@ -232,13 +231,14 @@ impl ArrowToDType for GeoArrowConversion {
232231
_ => vortex_bail!("extension type {} not supported", ext_type),
233232
};
234233

235-
Ok(DType::Extension(Arc::new(ext_dtype)))
234+
Ok(Some(DType::Extension(Arc::new(ext_dtype))))
236235
}
237-
}
238236

239-
impl ArrowMetadata for GeoArrowConversion {
240-
#[allow(clippy::disallowed_types)]
241-
fn arrow_metadata(&self, vortex_extension_type: &ExtDType) -> Option<HashMap<String, String>> {
237+
fn to_arrow(
238+
&self,
239+
vortex_extension_type: &ExtDType,
240+
field: &mut Field,
241+
) -> VortexResult<Option<()>> {
242242
if let Ok(geometry) = GeometryType::try_from(vortex_extension_type) {
243243
let mut extension_metadata = HashMap::new();
244244
let ext_type_name = match geometry {
@@ -249,7 +249,7 @@ impl ArrowMetadata for GeoArrowConversion {
249249
};
250250
extension_metadata.insert(EXTENSION_TYPE_NAME_KEY.to_string(), ext_type_name);
251251

252-
match geometry {
252+
let extension_metadata = match geometry {
253253
GeometryType::Point(meta)
254254
| GeometryType::LineString(meta)
255255
| GeometryType::Polygon(meta)
@@ -264,19 +264,23 @@ impl ArrowMetadata for GeoArrowConversion {
264264
.vortex_expect("failed to serialize geoarrow metadata");
265265
extension_metadata
266266
.insert(EXTENSION_TYPE_METADATA_KEY.to_string(), metadata_json);
267+
extension_metadata
268+
} else {
269+
extension_metadata
267270
}
268271
}
269272
};
270273

271-
Some(extension_metadata)
274+
field.set_metadata(extension_metadata);
275+
276+
Ok(Some(()))
272277
} else {
273-
None
278+
Ok(None)
274279
}
275280
}
276281
}
277282

278-
register_extension_type!(ArrowToDTypeRef(ArcRef::new_ref(&GeoArrowConversion)));
279-
register_extension_type!(ArrowMetadataRef(ArcRef::new_ref(&GeoArrowConversion)));
283+
register_extension_type!(TypeConversionRef::new(ArcRef::new_ref(&GeoArrowConversion)));
280284

281285
/// Unpack the geoarrow CoordBuffer. Errors if the dimensions specified in the metadata do not
282286
/// match the actual encoding.
@@ -400,7 +404,7 @@ mod tests {
400404
dimension: crate::Dimension::XY,
401405
crs: None,
402406
})
403-
.into_ext_dtype(false.into());
407+
.into_ext_dtype(true.into());
404408
assert_eq!(dtype, DType::Extension(Arc::new(owned_type)));
405409

406410
// round trip back to Arrow type.
@@ -424,7 +428,4 @@ mod tests {
424428
let exported = imported.into_arrow_preferred().unwrap();
425429
assert_eq!(exported.data_type(), struct_array.data_type());
426430
}
427-
428-
#[test]
429-
fn test_roundtrip() {}
430431
}

vortex-array/src/arrow/convert.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use arrow_buffer::buffer::{NullBuffer, OffsetBuffer};
1818
use arrow_buffer::{ArrowNativeType, BooleanBuffer, Buffer as ArrowBuffer, ScalarBuffer};
1919
use arrow_schema::{DataType, TimeUnit as ArrowTimeUnit};
2020
use vortex_buffer::{Alignment, Buffer, ByteBuffer};
21-
use vortex_dtype::arrow::ArrowToDTypeRef;
21+
use vortex_dtype::arrow::geo_field_to_dtype;
2222
use vortex_dtype::datetime::TimeUnit;
2323
use vortex_dtype::{DType, DecimalDType, FieldNames, NativePType, PType};
2424
use vortex_error::{VortexExpect as _, vortex_panic};
@@ -243,7 +243,7 @@ impl FromArrowArray<&ArrowStructArray> for ArrayRef {
243243
for (field, column) in value.fields().iter().zip(value.columns()) {
244244
if field.extension_type_name().is_some() {
245245
// Check if we have a conversion function. Wrap using the ExtensionArray.
246-
if let Some(Ok(DType::Extension(ext_dtype))) = ArrowToDTypeRef::convert(field) {
246+
if let Ok(Some(DType::Extension(ext_dtype))) = geo_field_to_dtype(field) {
247247
let storage = Self::from_arrow(column.clone(), field.is_nullable());
248248
field_names.push(field.name().to_string().into());
249249
field_arrays.push(ExtensionArray::new(ext_dtype, storage).into_array());

vortex-dtype/src/arrow.rs

Lines changed: 51 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
//! For this reason, it's recommended to do as much computation as possible within Vortex, and then
1111
//! materialize an Arrow ArrayRef at the very end of the processing chain.
1212
13-
#[allow(clippy::disallowed_types)]
14-
use std::collections::HashMap;
1513
use std::sync::Arc;
1614

1715
use arcref::ArcRef;
@@ -137,12 +135,13 @@ impl FromArrowType<&Field> for DType {
137135
Some(ext_type) => {
138136
// Check the registry for any Vortex extension types that represent the Arrow
139137
// extension type named here.
140-
for converter in inventory::iter::<ArrowToDTypeRef> {
141-
if converter.0.can_convert(field) {
142-
return converter
143-
.0
144-
.to_vortex(field)
145-
.vortex_expect("arrow extension type to vortex dtype");
138+
for converter in inventory::iter::<TypeConversionRef> {
139+
if let Some(converted) = converter
140+
.0
141+
.to_vortex(field)
142+
.vortex_expect("Conversion from GeoArrow type to GeoVortex")
143+
{
144+
return converted;
146145
}
147146
}
148147

@@ -210,9 +209,8 @@ impl DType {
210209
// Optionally: attach any Arrow extension type metadata, if an ArrowMetadata
211210
// kernel is defined for the extension type.
212211
if let DType::Extension(ext_type) = field_dt {
213-
for kernel in inventory::iter::<ArrowMetadataRef>() {
214-
if let Some(datum) = kernel.0.arrow_metadata(ext_type.as_ref()) {
215-
field = field.with_metadata(datum);
212+
for kernel in inventory::iter::<TypeConversionRef> {
213+
if kernel.0.to_arrow(ext_type.as_ref(), &mut field)?.is_some() {
216214
break;
217215
}
218216
}
@@ -242,60 +240,60 @@ impl DType {
242240
}
243241
}
244242

245-
/// Type-erased pointer to a [`ArrowToDType`] implementation.
246-
pub struct ArrowToDTypeRef(pub ArcRef<dyn ArrowToDType>);
247-
inventory::collect!(ArrowToDTypeRef);
248-
249-
impl ArrowToDTypeRef {
250-
/// Attempt to convert a field to a DType. If the there is no registered converter that
251-
/// can handle the field type, `None` is returned.
252-
///
253-
/// If a converter is resolved, it is used to convert the Field and the result is returned in
254-
/// a `Some`.
255-
pub fn convert(field: impl AsRef<Field>) -> Option<VortexResult<DType>> {
256-
for converter in inventory::iter::<ArrowToDTypeRef> {
257-
if converter.0.can_convert(field.as_ref()) {
258-
return Some(converter.0.to_vortex(field.as_ref()));
259-
}
243+
/// Attempt to convert a field to a DType. If the there is no registered converter that
244+
/// can handle the field type, `None` is returned.
245+
///
246+
/// If a converter is resolved, it is used to convert the Field and the result is returned in
247+
/// a `Some`.
248+
pub fn geo_field_to_dtype(field: impl AsRef<Field>) -> VortexResult<Option<DType>> {
249+
for converter in inventory::iter::<TypeConversionRef> {
250+
if let Some(converted) = converter.0.to_vortex(field.as_ref())? {
251+
return Ok(Some(converted));
260252
}
261-
262-
None
263253
}
264-
}
265254

266-
/// Get Arrow extension type metadata for a type.
267-
pub trait ArrowMetadata: 'static + Send + Sync {
268-
/// Optionally get the Arrow metadata for this type. None indicates it is not an Arrow
269-
/// extension type, Some indicates that it is.
270-
#[allow(clippy::disallowed_types)]
271-
fn arrow_metadata(&self, vortex_extension_type: &ExtDType) -> Option<HashMap<String, String>>;
255+
Ok(None)
272256
}
273257

274-
/// Type-erased pointer to a thing that can implement the DType conversion instead.
275-
pub struct ArrowMetadataRef(pub ArcRef<dyn ArrowMetadata>);
276-
inventory::collect!(ArrowMetadataRef);
277-
278-
/// Conversion for extension types.
258+
/// Conversions between Arrow [`Field`] type and Vortex logical `DType`.
279259
///
280-
/// If we have custom conversions we can register them via a plugin. This is something that is
281-
/// determined elsewhere however.
282-
pub trait ArrowToDType: Send + Sync {
283-
/// If this returns `true`, the implementor is able to convert the given Vortex [`DType`] to
284-
/// an Arrow [`Field`]. The caller can then call the `to_vortex` method with the argument.
285-
fn can_convert(&self, data_type: &Field) -> bool;
286-
260+
/// This crate provides infra for registering and discovering extension types.
261+
/// Once you implement this trait, you need to register it using [`register_extension_type`]
262+
/// to have it get discovered.
263+
///
264+
/// See also: [`register_extension_type`]
265+
pub trait TypeConversion: 'static + Send + Sync {
287266
/// Convert the given Arrow [`Field`] to a Vortex [`DType`].
288-
fn to_vortex(&self, field: &Field) -> VortexResult<DType>;
267+
fn to_vortex(&self, _field: &Field) -> VortexResult<Option<DType>> {
268+
Ok(None)
269+
}
270+
271+
/// Convert a Vortex [`ExtDType`] to an Arrow schema [`Field`].
272+
///
273+
/// The returned field will have the type, nullability, and metadata
274+
/// that should be propagated forward, but its name may be changed.
275+
fn to_arrow(&self, _dtype: &ExtDType, _field_builder: &mut Field) -> VortexResult<Option<()>> {
276+
Ok(None)
277+
}
289278
}
290279

291-
/// Register an extension type globally. This should be a type that implements
292-
/// the [`ArrowToDType`] trait.
280+
/// Conversion token
281+
pub struct TypeConversionRef(ArcRef<dyn TypeConversion>);
282+
inventory::collect!(TypeConversionRef);
283+
284+
impl TypeConversionRef {
285+
/// Create a new `TypeConversionRef` from a pointer to the implementation.
286+
pub const fn new(conversion: ArcRef<dyn TypeConversion>) -> Self {
287+
Self(conversion)
288+
}
289+
}
290+
291+
/// Register an extension type for lookup.
293292
#[macro_export]
294293
macro_rules! register_extension_type {
295294
($extension:expr) => {
296-
$crate::inventory::submit! {
297-
$extension
298-
}
295+
const _: $crate::arrow::TypeConversionRef = $extension;
296+
$crate::inventory::submit! { $extension }
299297
};
300298
}
301299

0 commit comments

Comments
 (0)