|
21 | 21 | //! physical format into how they should be used by DataFusion. For instance, a schema
|
22 | 22 | //! can be stored external to a parquet file that maps parquet logical types to arrow types.
|
23 | 23 |
|
24 |
| -use arrow::compute::{can_cast_types, cast}; |
25 |
| -use arrow_array::{new_null_array, RecordBatch, RecordBatchOptions}; |
26 |
| -use arrow_schema::{Schema, SchemaRef}; |
| 24 | +use arrow_array::builder::StringBuilder; |
| 25 | +use arrow_array::cast::AsArray; |
| 26 | +use arrow_array::{new_null_array, Array, ArrayRef, RecordBatch, RecordBatchOptions}; |
| 27 | +use arrow_schema::{ArrowError, DataType, Schema, SchemaRef}; |
27 | 28 | use datafusion_common::plan_err;
|
28 | 29 | use std::fmt::Debug;
|
29 | 30 | use std::sync::Arc;
|
@@ -165,6 +166,38 @@ impl SchemaAdapter for DefaultSchemaAdapter {
|
165 | 166 | }
|
166 | 167 | }
|
167 | 168 |
|
| 169 | +// Workaround arrow-rs bug in can_cast_types |
| 170 | +// External error: query failed: DataFusion error: Arrow error: Cast error: Casting from BinaryView to Utf8 not supported |
| 171 | +fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { |
| 172 | + arrow::compute::can_cast_types(from_type, to_type) |
| 173 | + || matches!( |
| 174 | + (from_type, to_type), |
| 175 | + (DataType::BinaryView, DataType::Utf8 | DataType::LargeUtf8) |
| 176 | + | (DataType::Utf8 | DataType::LargeUtf8, DataType::BinaryView) |
| 177 | + ) |
| 178 | +} |
| 179 | + |
| 180 | +// Work around arrow-rs casting bug |
| 181 | +// External error: query failed: DataFusion error: Arrow error: Cast error: Casting from BinaryView to Utf8 not supported |
| 182 | +fn cast(array: &dyn Array, to_type: &DataType) -> Result<ArrayRef, ArrowError> { |
| 183 | + match (array.data_type(), to_type) { |
| 184 | + (DataType::BinaryView, DataType::Utf8) => { |
| 185 | + let array = array.as_binary_view(); |
| 186 | + let mut builder = StringBuilder::with_capacity(array.len(), 8 * 1024); |
| 187 | + for value in array.iter() { |
| 188 | + // check if the value is valid utf8 (should do this once, not each value) |
| 189 | + let value = value.map(|value| std::str::from_utf8(value)).transpose()?; |
| 190 | + |
| 191 | + builder.append_option(value); |
| 192 | + } |
| 193 | + |
| 194 | + Ok(Arc::new(builder.finish())) |
| 195 | + } |
| 196 | + // fallback to arrow kernel |
| 197 | + (_, _) => arrow::compute::cast(array, to_type), |
| 198 | + } |
| 199 | +} |
| 200 | + |
168 | 201 | /// The SchemaMapping struct holds a mapping from the file schema to the table schema
|
169 | 202 | /// and any necessary type conversions that need to be applied.
|
170 | 203 | #[derive(Debug)]
|
|
0 commit comments