diff --git a/Cargo.toml b/Cargo.toml index f73ada89c7..5987eafd0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,5 +26,5 @@ keywords = ["deltalake", "delta", "datalake"] license = "Apache-2.0" repository = "https://github.com/delta-io/delta-kernel-rs" readme = "README.md" -rust-version = "1.84" +rust-version = "1.85" version = "0.16.0" diff --git a/README.md b/README.md index 19e6388401..503afae559 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ -# Delta Kernel (rust)   [![build-status]][actions] [![latest-version]][crates.io] [![docs]][docs.rs] [![rustc-version-1.84+]][rustc] +# Delta Kernel (rust)   [![build-status]][actions] [![latest-version]][crates.io] [![docs]][docs.rs] [![rustc-version-1.85+]][rustc] [build-status]: https://img.shields.io/github/actions/workflow/status/delta-io/delta-kernel-rs/build.yml?branch=main [actions]: https://github.com/delta-io/delta-kernel-rs/actions/workflows/build.yml?query=branch%3Amain [latest-version]: https://img.shields.io/crates/v/delta_kernel.svg [crates.io]: https://crates.io/crates/delta\_kernel -[rustc-version-1.84+]: https://img.shields.io/badge/rustc-1.84+-lightgray.svg -[rustc]: https://blog.rust-lang.org/2025/01/09/Rust-1.84.0/ +[rustc-version-1.85+]: https://img.shields.io/badge/rustc-1.85+-lightgray.svg +[rustc]: https://blog.rust-lang.org/2025/02/20/Rust-1.85.0/ [docs]: https://img.shields.io/docsrs/delta_kernel [docs.rs]: https://docs.rs/delta_kernel/latest/delta_kernel/ @@ -85,8 +85,8 @@ arrow versions as we can. We allow selecting the version of arrow to use via feature flags. Currently we support the following flags: -- `arrow-55`: Use arrow version 55 - `arrow-56`: Use arrow version 56 +- `arrow-57`: Use arrow version 57 - `arrow`: Use the latest arrow version. Note that this is an _unstable_ flag: we will bump this to the latest arrow version at every arrow version release. Only removing old arrow versions will cause a breaking change for kernel. If you require a specific version N of arrow, you should diff --git a/ffi/src/transaction/mod.rs b/ffi/src/transaction/mod.rs index 9242719242..80b0f9c8bd 100644 --- a/ffi/src/transaction/mod.rs +++ b/ffi/src/transaction/mod.rs @@ -241,7 +241,7 @@ mod tests { // writer must be closed to write footer let res = writer.close().unwrap(); - create_file_metadata(file_path, res.num_rows, metadata_schema) + create_file_metadata(file_path, res.file_metadata().num_rows(), metadata_schema) } #[tokio::test] diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 8e5190754d..dedc56a610 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -67,27 +67,27 @@ object_store = { version = "0.12.3", optional = true, features = ["aws", "azure" # TODO: Remove this once https://github.com/apache/arrow-rs/pull/8244 ships comfy-table = { version = "~7.1", optional = true } -# arrow 55 -[dependencies.arrow_55] +# arrow 56 +[dependencies.arrow_56] package = "arrow" -version = "55" +version = "56" features = ["chrono-tz", "ffi", "json", "prettyprint"] optional = true -[dependencies.parquet_55] +[dependencies.parquet_56] package = "parquet" -version = "55" +version = "56" features = ["async", "object_store"] optional = true -# arrow 56 -[dependencies.arrow_56] +# arrow 57 +[dependencies.arrow_57] package = "arrow" -version = "56" +version = "57" features = ["chrono-tz", "ffi", "json", "prettyprint"] optional = true -[dependencies.parquet_56] +[dependencies.parquet_57] package = "parquet" -version = "56" +version = "57" features = ["async", "object_store"] optional = true @@ -100,11 +100,11 @@ internal-api = [] integration-test = ["hdfs-native-object-store/integration-test"] # The default versions for arrow/parquet/object_store -arrow = ["arrow-56"] # latest arrow version +arrow = ["arrow-57"] # latest arrow version need-arrow = [] # need-arrow is a marker that the feature needs arrow dep -arrow-55 = ["dep:arrow_55", "dep:parquet_55", "object_store", "comfy-table"] arrow-56 = ["dep:arrow_56", "dep:parquet_56", "object_store", "comfy-table"] +arrow-57 = ["dep:arrow_57", "dep:parquet_57", "object_store", "comfy-table"] arrow-conversion = ["need-arrow"] arrow-expression = ["need-arrow"] diff --git a/kernel/examples/read-table-multi-threaded/Cargo.toml b/kernel/examples/read-table-multi-threaded/Cargo.toml index 17253b98dc..0dbbae99ab 100644 --- a/kernel/examples/read-table-multi-threaded/Cargo.toml +++ b/kernel/examples/read-table-multi-threaded/Cargo.toml @@ -5,11 +5,12 @@ edition = "2021" publish = false [dependencies] -arrow = { version = "56", features = ["prettyprint", "chrono-tz"] } +arrow = { version = "57", features = ["prettyprint", "chrono-tz"] } clap = { version = "4.5", features = ["derive"] } +# common pulls in arrow latest so we have to keep all these in sync here common = { path = "../common" } delta_kernel = { path = "../../../kernel", features = [ - "arrow-56", + "arrow", "default-engine-rustls", "internal-api", ] } diff --git a/kernel/examples/read-table-single-threaded/Cargo.toml b/kernel/examples/read-table-single-threaded/Cargo.toml index d13457a5a1..8ecc5733b5 100644 --- a/kernel/examples/read-table-single-threaded/Cargo.toml +++ b/kernel/examples/read-table-single-threaded/Cargo.toml @@ -5,11 +5,11 @@ edition = "2021" publish = false [dependencies] -arrow = { version = "56", features = ["prettyprint", "chrono-tz"] } +arrow = { version = "57", features = ["prettyprint", "chrono-tz"] } clap = { version = "4.5", features = ["derive"] } common = { path = "../common" } delta_kernel = { path = "../../../kernel", features = [ - "arrow-56", + "arrow", "default-engine-rustls", "internal-api", ] } diff --git a/kernel/examples/write-table/Cargo.toml b/kernel/examples/write-table/Cargo.toml index 3291944f63..8e1200d390 100644 --- a/kernel/examples/write-table/Cargo.toml +++ b/kernel/examples/write-table/Cargo.toml @@ -5,11 +5,12 @@ edition = "2021" publish = false [dependencies] -arrow = { version = "56", features = ["prettyprint", "chrono-tz"] } +arrow = { version = "57", features = ["prettyprint", "chrono-tz"] } clap = { version = "4.5", features = ["derive"] } +# NB: common depends on 'arrow' (latest) so have to match here common = { path = "../common" } delta_kernel = { path = "../../../kernel", features = [ - "arrow-56", + "arrow", "default-engine-rustls", "internal-api", ] } diff --git a/kernel/src/arrow_compat.rs b/kernel/src/arrow_compat.rs index e57e634045..b0cafa7f72 100644 --- a/kernel/src/arrow_compat.rs +++ b/kernel/src/arrow_compat.rs @@ -1,25 +1,25 @@ //! This module re-exports the different versions of arrow, parquet, and object_store we support. -#[cfg(feature = "arrow-56")] +#[cfg(feature = "arrow-57")] mod arrow_compat_shims { - pub use arrow_56 as arrow; - pub use parquet_56 as parquet; + pub use arrow_57 as arrow; + pub use parquet_57 as parquet; } -#[cfg(all(feature = "arrow-55", not(feature = "arrow-56")))] +#[cfg(all(feature = "arrow-56", not(feature = "arrow-57")))] mod arrow_compat_shims { - pub use arrow_55 as arrow; - pub use parquet_55 as parquet; + pub use arrow_56 as arrow; + pub use parquet_56 as parquet; } // if nothing is enabled but we need arrow because of some other feature flag, throw compile-time // error #[cfg(all( feature = "need-arrow", - not(feature = "arrow-55"), - not(feature = "arrow-56") + not(feature = "arrow-56"), + not(feature = "arrow-57") ))] -compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow-55` or `arrow-56` feature"); +compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow-56` or `arrow-57` feature"); -#[cfg(any(feature = "arrow-55", feature = "arrow-56"))] +#[cfg(any(feature = "arrow-56", feature = "arrow-57"))] pub use arrow_compat_shims::*; diff --git a/kernel/src/checkpoint/tests.rs b/kernel/src/checkpoint/tests.rs index bcae3fcbbf..fa04c40292 100644 --- a/kernel/src/checkpoint/tests.rs +++ b/kernel/src/checkpoint/tests.rs @@ -6,6 +6,10 @@ use crate::action_reconciliation::{ use crate::actions::{Add, Metadata, Protocol, Remove}; use crate::arrow::array::{ArrayRef, StructArray}; use crate::arrow::datatypes::{DataType, Schema}; +use crate::arrow::{ + array::{create_array, RecordBatch}, + datatypes::Field, +}; use crate::checkpoint::create_last_checkpoint_data; use crate::engine::arrow_data::ArrowEngineData; use crate::engine::default::{executor::tokio::TokioBackgroundExecutor, DefaultEngine}; @@ -14,11 +18,6 @@ use crate::schema::{DataType as KernelDataType, StructField, StructType}; use crate::utils::test_utils::Action; use crate::{DeltaResult, FileMeta, LogPath, Snapshot}; -use arrow_56::{ - array::{create_array, RecordBatch}, - datatypes::Field, -}; - use object_store::{memory::InMemory, path::Path, ObjectStore}; use serde_json::{from_slice, json, Value}; use test_utils::delta_path_for_version; diff --git a/kernel/src/engine/ensure_data_types.rs b/kernel/src/engine/ensure_data_types.rs index 2d5a660c36..54a16d03a8 100644 --- a/kernel/src/engine/ensure_data_types.rs +++ b/kernel/src/engine/ensure_data_types.rs @@ -352,7 +352,7 @@ mod tests { &incorrect_variant_arrow_type(), true, ), - "Invalid argument error: Incorrect datatype. Expected Struct(metadata Binary, value Binary), got Struct(field_1 Binary, field_2 Binary)", + "Invalid argument error: Incorrect datatype. Expected Struct(\"metadata\": Binary, \"value\": Binary), got Struct(\"field_1\": nullable Binary, \"field_2\": nullable Binary)", ) } diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 1c36da8c11..2230c7b278 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -108,7 +108,7 @@ pub use log_path::LogPath; mod row_tracking; mod arrow_compat; -#[cfg(any(feature = "arrow-55", feature = "arrow-56"))] +#[cfg(any(feature = "arrow-56", feature = "arrow-57"))] pub use arrow_compat::*; pub mod kernel_predicates; diff --git a/mem-test/Cargo.toml b/mem-test/Cargo.toml index 81e3d2724e..d79e773705 100644 --- a/mem-test/Cargo.toml +++ b/mem-test/Cargo.toml @@ -14,7 +14,6 @@ version.workspace = true release = false [dependencies] -arrow = "56" delta_kernel = { path = "../kernel", features = ["arrow", "default-engine-rustls"] } dhat = "0.3" object_store = "0.12.3" diff --git a/mem-test/tests/dhat_large_table_data.rs b/mem-test/tests/dhat_large_table_data.rs index 1096e64908..8df54c38ab 100644 --- a/mem-test/tests/dhat_large_table_data.rs +++ b/mem-test/tests/dhat_large_table_data.rs @@ -45,9 +45,9 @@ fn write_large_parquet_to(path: &Path) -> Result<(), Box> let metadata = std::fs::metadata(&path)?; let file_size = metadata.len(); let total_row_group_size: i64 = parquet_metadata - .row_groups + .row_groups() .iter() - .map(|rg| rg.total_byte_size) + .map(|rg| rg.total_byte_size()) .sum(); println!("File size (compressed file size): {} bytes", file_size); println!(