Skip to content

Commit e7f8cd7

Browse files
committed
work around arrow cast feature gap
1 parent dca498a commit e7f8cd7

File tree

1 file changed

+36
-3
lines changed

1 file changed

+36
-3
lines changed

datafusion/core/src/datasource/schema_adapter.rs

+36-3
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@
2121
//! physical format into how they should be used by DataFusion. For instance, a schema
2222
//! can be stored external to a parquet file that maps parquet logical types to arrow types.
2323
24-
use arrow::compute::{can_cast_types, cast};
25-
use arrow_array::{new_null_array, RecordBatch, RecordBatchOptions};
26-
use arrow_schema::{Schema, SchemaRef};
24+
use arrow_array::builder::StringBuilder;
25+
use arrow_array::cast::AsArray;
26+
use arrow_array::{new_null_array, Array, ArrayRef, RecordBatch, RecordBatchOptions};
27+
use arrow_schema::{ArrowError, DataType, Schema, SchemaRef};
2728
use datafusion_common::plan_err;
2829
use std::fmt::Debug;
2930
use std::sync::Arc;
@@ -165,6 +166,38 @@ impl SchemaAdapter for DefaultSchemaAdapter {
165166
}
166167
}
167168

169+
// Workaround arrow-rs bug in can_cast_types
170+
// External error: query failed: DataFusion error: Arrow error: Cast error: Casting from BinaryView to Utf8 not supported
171+
fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
172+
arrow::compute::can_cast_types(from_type, to_type)
173+
|| matches!(
174+
(from_type, to_type),
175+
(DataType::BinaryView, DataType::Utf8 | DataType::LargeUtf8)
176+
| (DataType::Utf8 | DataType::LargeUtf8, DataType::BinaryView)
177+
)
178+
}
179+
180+
// Work around arrow-rs casting bug
181+
// External error: query failed: DataFusion error: Arrow error: Cast error: Casting from BinaryView to Utf8 not supported
182+
fn cast(array: &dyn Array, to_type: &DataType) -> Result<ArrayRef, ArrowError> {
183+
match (array.data_type(), to_type) {
184+
(DataType::BinaryView, DataType::Utf8) => {
185+
let array = array.as_binary_view();
186+
let mut builder = StringBuilder::with_capacity(array.len(), 8 * 1024);
187+
for value in array.iter() {
188+
// check if the value is valid utf8 (should do this once, not each value)
189+
let value = value.map(|value| std::str::from_utf8(value)).transpose()?;
190+
191+
builder.append_option(value);
192+
}
193+
194+
Ok(Arc::new(builder.finish()))
195+
}
196+
// fallback to arrow kernel
197+
(_, _) => arrow::compute::cast(array, to_type),
198+
}
199+
}
200+
168201
/// The SchemaMapping struct holds a mapping from the file schema to the table schema
169202
/// and any necessary type conversions that need to be applied.
170203
#[derive(Debug)]

0 commit comments

Comments
 (0)