Skip to content

Commit f0323f2

Browse files
authored
Upgrade to datafusion 47 (#3016)
Getting ready for the [datafusion 47 release](apache/datafusion#15072). ## Current issues - [x] apache/datafusion#15072 - [x] duckdb-rs depends on arrow 54, opened a PR to fix - duckdb/duckdb-rs#496. - [ ] object-store 0.12 has a regression on Azure, not sure what's the priority here but shouldn't be too hard to find the root cause if we care. apache/arrow-rs-object-store#320
1 parent 57b9ccb commit f0323f2

File tree

24 files changed

+390
-397
lines changed

24 files changed

+390
-397
lines changed

Cargo.lock

+309-152
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+24-24
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,17 @@ version = "0.32.0"
5555
anyhow = "1.0.95"
5656
arbitrary = "1.3.2"
5757
arrayref = "0.3.7"
58-
arrow = "54.3.1"
59-
arrow-arith = "54.3.1"
60-
arrow-array = "54.3.1"
61-
arrow-buffer = "54.3.1"
62-
arrow-cast = "54.3.1"
63-
arrow-ord = "54.3.1"
64-
arrow-schema = "54.3.1"
65-
arrow-select = "54.3.1"
66-
arrow-string = "54.3.1"
58+
arrow = "55"
59+
arrow-arith = "55"
60+
arrow-array = "55"
61+
arrow-buffer = "55"
62+
arrow-cast = "55"
63+
arrow-ord = "55"
64+
arrow-schema = "55"
65+
arrow-select = "55"
66+
arrow-string = "55"
6767
async-once-cell = "0.5.4"
68-
async-trait = "0.1.86"
68+
async-trait = "0.1.88"
6969
backtrace = "0.3.74"
7070
bit-vec = "0.8.0"
7171
bytes = "1.10"
@@ -76,12 +76,13 @@ clap = "4.5"
7676
compio = { version = "0.14", features = ["io-uring"], default-features = false }
7777
crossterm = "0.28"
7878
dashmap = "6.1.0"
79-
datafusion = { version = "46", default-features = false }
80-
datafusion-common = { version = "46" }
81-
datafusion-execution = { version = "46" }
82-
datafusion-expr = { version = "46" }
83-
datafusion-physical-expr = { version = "46" }
84-
datafusion-physical-plan = { version = "46" }
79+
datafusion = { version = "47", default-features = false }
80+
datafusion-common = { version = "47" }
81+
datafusion-datasource = { version = "47" }
82+
datafusion-execution = { version = "47" }
83+
datafusion-expr = { version = "47" }
84+
datafusion-physical-expr = { version = "47" }
85+
datafusion-physical-plan = { version = "47" }
8586
divan = { package = "codspeed-divan-compat", version = "2.8.0" }
8687
duckdb = { path = "duckdb-vortex/duckdb-rs/crates/duckdb", features = [
8788
"vtab-full",
@@ -90,16 +91,16 @@ duckdb = { path = "duckdb-vortex/duckdb-rs/crates/duckdb", features = [
9091
dyn-hash = "0.2.0"
9192
enum-iterator = "2.0.0"
9293
fastlanes = "0.1.8"
93-
flatbuffers = "25"
94-
flexbuffers = "25"
94+
flatbuffers = "25.2.10"
95+
flexbuffers = "25.2.10"
9596
flume = "0.11"
9697
fsst-rs = "0.5.2"
9798
futures = { version = "0.3.31", default-features = false }
9899
futures-util = "0.3.31"
99100
getrandom = "0.3"
100101
goldenfile = "1"
101102
governor = "0.10"
102-
half = { version = "2.2.1", features = ["std", "num-traits"] }
103+
half = { version = "2.5", features = ["std", "num-traits"] }
103104
hashbrown = "0.15.1"
104105
homedir = "0.3.3"
105106
humansize = "2.1.3"
@@ -116,20 +117,19 @@ mimalloc = "0.1.42"
116117
moka = { version = "0.12.10", default-features = false }
117118
num-traits = "0.2.19"
118119
num_enum = "0.7.2"
119-
object_store = "0.11.0"
120+
object_store = "0.12"
120121
opentelemetry = "0.29.0"
121122
opentelemetry-otlp = "0.29.0"
122123
opentelemetry_sdk = "0.29.0"
123124
parking_lot = "0.12.3"
124-
parquet = "54.3.1"
125+
parquet = "55"
125126
paste = "1.0.15"
126127
pin-project = "1.1.5"
127128
pin-project-lite = "0.2.15"
128129
prost = "0.13.4"
129130
prost-build = "0.13.4"
130131
prost-types = "0.13.4"
131-
# bump pyo3 to >0.24 on next arrow release, for RUSTSEC-2025-0020
132-
pyo3 = { version = "0.23.4", features = ["extension-module", "abi3-py310"] }
132+
pyo3 = { version = "0.24.1", features = ["extension-module", "abi3-py310"] }
133133
pyo3-log = "0.12.1"
134134
rancor = "0.1.0"
135135
rand = "0.9.0"
@@ -165,7 +165,7 @@ tracing-chrome = "0.7.2"
165165
tracing-futures = "0.2.5"
166166
tracing-subscriber = "0.3.19"
167167
url = "2.5.4"
168-
uuid = { version = "1.15", features = ["js"] }
168+
uuid = { version = "1.16", features = ["js"] }
169169
wasm-bindgen-futures = "0.4.39"
170170
witchcraft-metrics = "1.0.1"
171171

bench-vortex/src/bin/clickbench.rs

+1-6
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,6 @@ struct Args {
5858
#[arg(long)]
5959
queries_file: Option<PathBuf>,
6060
#[arg(long, default_value_t = false)]
61-
emulate_object_store: bool,
62-
#[arg(long, default_value_t = false)]
6361
disable_datafusion_cache: bool,
6462
#[arg(long)]
6563
export_spans: bool,
@@ -218,10 +216,7 @@ fn main() -> anyhow::Result<()> {
218216

219217
let mut engine_ctx = match engine {
220218
Engine::DataFusion => {
221-
let session_ctx = df::get_session_context(
222-
args.emulate_object_store,
223-
args.disable_datafusion_cache,
224-
);
219+
let session_ctx = df::get_session_context(args.disable_datafusion_cache);
225220
// Register object store to the session.
226221
df::make_object_store(&session_ctx, &base_url)?;
227222

bench-vortex/src/bin/public_bi.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ struct Args {
3939
#[arg(long, default_value_t, value_enum)]
4040
display_format: DisplayFormat,
4141
#[arg(long, default_value_t = false)]
42-
emulate_object_store: bool,
43-
#[arg(long, default_value_t = false)]
4442
disable_datafusion_cache: bool,
4543
#[arg(short, long, value_delimiter = ',')]
4644
dataset: PBIDataset,
@@ -107,8 +105,7 @@ fn main() -> anyhow::Result<()> {
107105

108106
for target in &args.targets {
109107
let format = target.format();
110-
let session =
111-
df::get_session_context(args.emulate_object_store, args.disable_datafusion_cache);
108+
let session = df::get_session_context(args.disable_datafusion_cache);
112109

113110
let file_type = match format {
114111
Format::Csv => FileType::Csv,

bench-vortex/src/bin/tpch.rs

+1-7
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ struct Args {
6565
#[arg(short, long, default_value_t, value_enum)]
6666
display_format: DisplayFormat,
6767
#[arg(long, default_value_t = false)]
68-
emulate_object_store: bool,
69-
#[arg(long, default_value_t = false)]
7068
disable_datafusion_cache: bool,
7169
#[arg(long, default_value_t, value_enum)]
7270
data_generator: DataGenerator,
@@ -176,7 +174,6 @@ fn main() -> anyhow::Result<()> {
176174
args.iterations,
177175
args.targets,
178176
args.display_format,
179-
args.emulate_object_store,
180177
args.disable_datafusion_cache,
181178
args.scale_factor,
182179
url,
@@ -295,7 +292,6 @@ async fn bench_main(
295292
iterations: usize,
296293
targets: Vec<Target>,
297294
display_format: DisplayFormat,
298-
emulate_object_store: bool,
299295
disable_datafusion_cache: bool,
300296
scale_factor: u8,
301297
url: Url,
@@ -351,9 +347,7 @@ async fn bench_main(
351347
let format = target.format();
352348
match engine {
353349
Engine::DataFusion => {
354-
let ctx =
355-
load_datasets(&url, format, emulate_object_store, disable_datafusion_cache)
356-
.await?;
350+
let ctx = load_datasets(&url, format, disable_datafusion_cache).await?;
357351

358352
let mut plans = Vec::new();
359353

bench-vortex/src/blob.rs

-157
This file was deleted.

bench-vortex/src/engines/df/mod.rs

+2-14
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use datafusion::datasource::provider::DefaultTableFactory;
77
use datafusion::execution::SessionStateBuilder;
88
use datafusion::execution::cache::cache_manager::CacheManagerConfig;
99
use datafusion::execution::cache::cache_unit::{DefaultFileStatisticsCache, DefaultListFilesCache};
10-
use datafusion::execution::object_store::DefaultObjectStoreRegistry;
1110
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
1211
use datafusion::physical_plan::collect;
1312
use datafusion::physical_plan::execution_plan::ExecutionPlan;
@@ -22,8 +21,6 @@ use url::Url;
2221
use vortex::error::VortexResult;
2322
use vortex_datafusion::persistent::VortexFormatFactory;
2423

25-
use crate::blob::SlowObjectStoreRegistry;
26-
2724
pub static GIT_COMMIT_ID: LazyLock<String> = LazyLock::new(|| {
2825
String::from_utf8(
2926
Command::new("git")
@@ -37,17 +34,8 @@ pub static GIT_COMMIT_ID: LazyLock<String> = LazyLock::new(|| {
3734
.to_string()
3835
});
3936

40-
pub fn get_session_context(
41-
emulate_object_store: bool,
42-
disable_datafusion_cache: bool,
43-
) -> SessionContext {
44-
let registry = if emulate_object_store {
45-
Arc::new(SlowObjectStoreRegistry::default()) as _
46-
} else {
47-
Arc::new(DefaultObjectStoreRegistry::new()) as _
48-
};
49-
50-
let mut rt_builder = RuntimeEnvBuilder::new().with_object_store_registry(registry);
37+
pub fn get_session_context(disable_datafusion_cache: bool) -> SessionContext {
38+
let mut rt_builder = RuntimeEnvBuilder::new();
5139

5240
if !disable_datafusion_cache {
5341
let file_static_cache = Arc::new(DefaultFileStatisticsCache::default());

bench-vortex/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use itertools::Itertools;
1111
use serde::Serialize;
1212

1313
pub mod bench_run;
14-
pub mod blob;
1514
pub mod clickbench;
1615
pub mod compress;
1716
pub mod conversions;

bench-vortex/src/tpch/mod.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,9 @@ pub const EXPECTED_ROW_COUNTS_SF10: [usize; TPC_H_ROW_COUNT_ARRAY_LENGTH] = [
4040
pub async fn load_datasets(
4141
base_dir: &Url,
4242
format: Format,
43-
emulate_object_store: bool,
4443
disable_datafusion_cache: bool,
4544
) -> anyhow::Result<SessionContext> {
46-
let context = get_session_context(emulate_object_store, disable_datafusion_cache);
45+
let context = get_session_context(disable_datafusion_cache);
4746

4847
let object_store = make_object_store(&context, base_dir)?;
4948

0 commit comments

Comments
 (0)