Skip to content

Commit

Permalink
refactor: remove uses of arrow_buffer & arrow_array and use reexp…
Browse files Browse the repository at this point in the history
…ort in arrow instead (#14503)

* refactor: replace uses of arrow_buffer and arrow_array with reexport in arrow

* Remove arrow-buffer in common

* Remove dependency in core

* remove another ne

* remove from functions-nested

* remove from physical-expr

* remove from physical-expr-common

* Remove from physical-plan

* Remove from substrait

* fix datafusion-cli/Cargo.lock

---------

Co-authored-by: Ian Lai <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
3 people authored Feb 5, 2025
1 parent fe8ab01 commit d5ff3e7
Show file tree
Hide file tree
Showing 60 changed files with 114 additions and 124 deletions.
6 changes: 0 additions & 6 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ apache-avro = { version = "0.17", default-features = false, features = [
], optional = true }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-ipc = { workspace = true }
arrow-schema = { workspace = true }
base64 = "0.22.1"
Expand Down
5 changes: 2 additions & 3 deletions datafusion/common/src/hash_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@
use std::sync::Arc;

use ahash::RandomState;
use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
use arrow::array::*;
use arrow::datatypes::*;
#[cfg(not(feature = "force_hash_collisions"))]
use arrow::{downcast_dictionary_array, downcast_primitive_array};
use arrow_buffer::IntervalDayTime;
use arrow_buffer::IntervalMonthDayNano;

#[cfg(not(feature = "force_hash_collisions"))]
use crate::cast::{
Expand Down Expand Up @@ -700,7 +699,7 @@ mod tests {
// Tests actual values of hashes, which are different if forcing collisions
#[cfg(not(feature = "force_hash_collisions"))]
fn create_hashes_for_struct_arrays() {
use arrow_buffer::Buffer;
use arrow::buffer::Buffer;

let boolarr = Arc::new(BooleanArray::from(vec![
false, false, true, true, true, true,
Expand Down
8 changes: 4 additions & 4 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ use crate::cast::{
use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
use crate::hash_utils::create_hashes;
use crate::utils::SingleRowListArrayBuilder;
use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
use arrow::buffer::ScalarBuffer;
use arrow::compute::kernels::numeric::*;
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
use arrow::{
Expand All @@ -54,7 +56,6 @@ use arrow::{
UInt16Type, UInt32Type, UInt64Type, UInt8Type, DECIMAL128_MAX_PRECISION,
},
};
use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer};
use arrow_schema::{UnionFields, UnionMode};

use crate::format::DEFAULT_CAST_OPTIONS;
Expand Down Expand Up @@ -3958,12 +3959,11 @@ mod tests {
};

use crate::assert_batches_eq;
use arrow::buffer::OffsetBuffer;
use arrow::array::{types::Float64Type, NullBufferBuilder};
use arrow::buffer::{Buffer, OffsetBuffer};
use arrow::compute::{is_null, kernels};
use arrow::error::ArrowError;
use arrow::util::pretty::pretty_format_columns;
use arrow_array::types::Float64Type;
use arrow_buffer::{Buffer, NullBufferBuilder};
use arrow_schema::Fields;
use chrono::NaiveDate;
use rand::Rng;
Expand Down
1 change: 0 additions & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ xz2 = { version = "0.1", optional = true, features = ["static"] }
zstd = { version = "0.13", optional = true, default-features = false }

[dev-dependencies]
arrow-buffer = { workspace = true }
async-trait = { workspace = true }
criterion = { version = "0.5", features = ["async_tokio"] }
ctor = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
mod dataframe_functions;
mod describe;

use arrow::buffer::ScalarBuffer;
use arrow::datatypes::{DataType, Field, Float32Type, Int32Type, Schema, UInt64Type};
use arrow::util::pretty::pretty_format_batches;
use arrow::{
Expand All @@ -33,7 +34,6 @@ use arrow_array::{
record_batch, Array, BooleanArray, DictionaryArray, Float32Array, Float64Array,
Int8Array, UnionArray,
};
use arrow_buffer::ScalarBuffer;
use arrow_schema::{ArrowError, SchemaRef, UnionFields, UnionMode};
use datafusion_functions_aggregate::count::count_udaf;
use datafusion_functions_aggregate::expr_fn::{
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/expr_api/simplification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

//! This program demonstrates the DataFusion expression simplification API.
use arrow::array::types::IntervalDayTime;
use arrow::datatypes::{DataType, Field, Schema};
use arrow_array::{ArrayRef, Int32Array};
use arrow_buffer::IntervalDayTime;
use chrono::{DateTime, TimeZone, Utc};
use datafusion::{error::Result, execution::context::ExecutionProps, prelude::*};
use datafusion_common::cast::as_int32_array;
Expand Down
1 change: 0 additions & 1 deletion datafusion/functions-aggregate/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ path = "src/lib.rs"
[dependencies]
ahash = { workspace = true }
arrow = { workspace = true }
arrow-buffer = { workspace = true }
arrow-schema = { workspace = true }
datafusion-common = { workspace = true }
datafusion-doc = { workspace = true }
Expand Down
6 changes: 4 additions & 2 deletions datafusion/functions-aggregate/benches/array_agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,18 @@

use std::sync::Arc;

use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray};
use arrow::array::{
Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray, NullBufferBuilder,
};
use arrow::datatypes::Int64Type;
use arrow::util::bench_util::create_primitive_array;
use arrow_schema::Field;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::Accumulator;
use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator;

use arrow::buffer::OffsetBuffer;
use arrow::util::test_util::seedable_rng;
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use rand::distributions::{Distribution, Standard};
use rand::Rng;

Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions-aggregate/src/correlation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ use std::mem::size_of_val;
use std::sync::Arc;

use arrow::array::{
downcast_array, Array, AsArray, BooleanArray, Float64Array, UInt64Array,
downcast_array, Array, AsArray, BooleanArray, Float64Array, NullBufferBuilder,
UInt64Array,
};
use arrow::compute::{and, filter, is_not_null, kernels::cast};
use arrow::datatypes::{Float64Type, UInt64Type};
use arrow::{
array::ArrayRef,
datatypes::{DataType, Field},
};
use arrow_buffer::NullBufferBuilder;
use datafusion_expr::{EmitTo, GroupsAccumulator};
use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate_multiple;
use log::debug;
Expand Down
1 change: 0 additions & 1 deletion datafusion/functions-nested/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ path = "src/lib.rs"
[dependencies]
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-ord = { workspace = true }
arrow-schema = { workspace = true }
datafusion-common = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-nested/benches/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

extern crate criterion;

use arrow::buffer::{OffsetBuffer, ScalarBuffer};
use arrow_array::{Int32Array, ListArray, StringArray};
use arrow_buffer::{OffsetBuffer, ScalarBuffer};
use arrow_schema::{DataType, Field};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::prelude::ThreadRng;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-nested/src/array_has.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
//! [`ScalarUDFImpl`] definitions for array_has, array_has_all and array_has_any functions.
use arrow::array::{Array, ArrayRef, BooleanArray, OffsetSizeTrait};
use arrow::buffer::BooleanBuffer;
use arrow::datatypes::DataType;
use arrow::row::{RowConverter, Rows, SortField};
use arrow_array::{Datum, GenericListArray, Scalar};
use arrow_buffer::BooleanBuffer;
use datafusion_common::cast::as_generic_list_array;
use datafusion_common::utils::string_utils::string_array_to_vec;
use datafusion_common::{exec_err, Result, ScalarValue};
Expand Down
8 changes: 5 additions & 3 deletions datafusion/functions-nested/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@
use std::sync::Arc;
use std::{any::Any, cmp::Ordering};

use arrow::array::{Capacities, MutableArrayData};
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use arrow::array::{
Array, ArrayRef, Capacities, GenericListArray, MutableArrayData, NullBufferBuilder,
OffsetSizeTrait,
};
use arrow::buffer::OffsetBuffer;
use arrow_schema::{DataType, Field};
use datafusion_common::Result;
use datafusion_common::{
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-nested/src/except.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
//! [`ScalarUDFImpl`] definitions for array_except function.
use crate::utils::{check_datatypes, make_scalar_function};
use arrow::buffer::OffsetBuffer;
use arrow::row::{RowConverter, SortField};
use arrow_array::cast::AsArray;
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
use arrow_buffer::OffsetBuffer;
use arrow_schema::{DataType, FieldRef};
use datafusion_common::{exec_err, internal_err, HashSet, Result};
use datafusion_expr::{
Expand Down
13 changes: 4 additions & 9 deletions datafusion/functions-nested/src/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,12 @@

//! [`ScalarUDFImpl`] definitions for array_element, array_slice, array_pop_front, array_pop_back, and array_any_value functions.
use arrow::array::Array;
use arrow::array::ArrayRef;
use arrow::array::ArrowNativeTypeOp;
use arrow::array::Capacities;
use arrow::array::GenericListArray;
use arrow::array::Int64Array;
use arrow::array::MutableArrayData;
use arrow::array::OffsetSizeTrait;
use arrow::array::{
Array, ArrayRef, ArrowNativeTypeOp, Capacities, GenericListArray, Int64Array,
MutableArrayData, NullBufferBuilder, OffsetSizeTrait,
};
use arrow::buffer::OffsetBuffer;
use arrow::datatypes::DataType;
use arrow_buffer::NullBufferBuilder;
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
use arrow_schema::Field;
use datafusion_common::cast::as_int64_array;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-nested/src/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
//! [`ScalarUDFImpl`] definitions for flatten function.
use crate::utils::make_scalar_function;
use arrow::buffer::OffsetBuffer;
use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait};
use arrow_buffer::OffsetBuffer;
use arrow_schema::DataType;
use arrow_schema::DataType::{FixedSizeList, LargeList, List, Null};
use datafusion_common::cast::{
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-nested/src/make_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ use std::vec;

use crate::utils::make_scalar_function;
use arrow::array::{ArrayData, Capacities, MutableArrayData};
use arrow::buffer::OffsetBuffer;
use arrow_array::{
new_null_array, Array, ArrayRef, GenericListArray, NullArray, OffsetSizeTrait,
};
use arrow_buffer::OffsetBuffer;
use arrow_schema::DataType::{List, Null};
use arrow_schema::{DataType, Field};
use datafusion_common::utils::SingleRowListArrayBuilder;
Expand Down
3 changes: 2 additions & 1 deletion datafusion/functions-nested/src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ use std::collections::VecDeque;
use std::sync::Arc;

use arrow::array::ArrayData;
use arrow::buffer::Buffer;
use arrow::datatypes::ToByteSlice;
use arrow_array::{Array, ArrayRef, MapArray, OffsetSizeTrait, StructArray};
use arrow_buffer::{Buffer, ToByteSlice};
use arrow_schema::{DataType, Field, SchemaBuilder};

use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-nested/src/map_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
use arrow::array::{ArrayRef, Capacities, MutableArrayData};
use arrow_array::{make_array, ListArray};

use arrow::buffer::OffsetBuffer;
use arrow::datatypes::DataType;
use arrow_array::{Array, MapArray};
use arrow_buffer::OffsetBuffer;
use arrow_schema::Field;

use datafusion_common::{cast::as_map_array, exec_err, Result};
Expand Down
18 changes: 9 additions & 9 deletions datafusion/functions-nested/src/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@
//! [`ScalarUDFImpl`] definitions for range and gen_series functions.
use crate::utils::make_scalar_function;
use arrow::array::{Array, ArrayRef, Int64Array, ListArray, ListBuilder};
use arrow::datatypes::{DataType, Field};
use arrow_array::builder::{Date32Builder, TimestampNanosecondBuilder};
use arrow_array::temporal_conversions::as_datetime_with_timezone;
use arrow_array::timezone::Tz;
use arrow_array::types::{
Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT,
use arrow::array::{
builder::{Date32Builder, TimestampNanosecondBuilder},
temporal_conversions::as_datetime_with_timezone,
timezone::Tz,
types::{Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT},
Array, ArrayRef, Int64Array, ListArray, ListBuilder, NullArray, NullBufferBuilder,
TimestampNanosecondArray,
};
use arrow_array::{NullArray, TimestampNanosecondArray};
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use arrow::buffer::OffsetBuffer;
use arrow::datatypes::{DataType, Field};
use arrow_schema::DataType::*;
use arrow_schema::IntervalUnit::MonthDayNano;
use arrow_schema::TimeUnit::Nanosecond;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-nested/src/remove.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
use crate::utils;
use crate::utils::make_scalar_function;
use arrow::buffer::OffsetBuffer;
use arrow_array::cast::AsArray;
use arrow_array::{
new_empty_array, Array, ArrayRef, BooleanArray, GenericListArray, OffsetSizeTrait,
};
use arrow_buffer::OffsetBuffer;
use arrow_schema::{DataType, Field};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions-nested/src/repeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
use crate::utils::make_scalar_function;
use arrow::array::{Capacities, MutableArrayData};
use arrow::buffer::OffsetBuffer;
use arrow::compute;
use arrow::compute::cast;
use arrow_array::{
new_null_array, Array, ArrayRef, GenericListArray, ListArray, OffsetSizeTrait,
UInt64Array,
};
use arrow_buffer::OffsetBuffer;
use arrow_schema::DataType::{LargeList, List};
use arrow_schema::{DataType, Field};
use datafusion_common::cast::{as_large_list_array, as_list_array, as_uint64_array};
Expand Down
6 changes: 3 additions & 3 deletions datafusion/functions-nested/src/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
//! [`ScalarUDFImpl`] definitions for array_replace, array_replace_n and array_replace_all functions.
use arrow::array::{
Array, ArrayRef, AsArray, Capacities, MutableArrayData, OffsetSizeTrait,
Array, ArrayRef, AsArray, Capacities, GenericListArray, MutableArrayData,
NullBufferBuilder, OffsetSizeTrait,
};
use arrow::datatypes::DataType;

use arrow_array::GenericListArray;
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
use arrow::buffer::OffsetBuffer;
use arrow_schema::Field;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
Expand Down
Loading

0 comments on commit d5ff3e7

Please sign in to comment.