Skip to content

Commit

Permalink
minor: Move file compression to datafusion-catalog-listing (#14555)
Browse files Browse the repository at this point in the history
* move file_compression

* old removal

* fix taplo

* proper format

* fix: some Ci

* for CI

* fix: CI

* add compression feature to sqllogictest

* fix: CI

* cargo lock + toml

* proper way of doing things
  • Loading branch information
logan-keede authored Feb 10, 2025
1 parent 603721d commit 7fde24a
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 19 deletions.
9 changes: 6 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ ctor = "0.2.9"
dashmap = "6.0.1"
datafusion = { path = "datafusion/core", version = "45.0.0", default-features = false }
datafusion-catalog = { path = "datafusion/catalog", version = "45.0.0" }
datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "45.0.0" }
datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "45.0.0", default-features = false }
datafusion-common = { path = "datafusion/common", version = "45.0.0", default-features = false }
datafusion-common-runtime = { path = "datafusion/common-runtime", version = "45.0.0" }
datafusion-doc = { path = "datafusion/doc", version = "45.0.0" }
Expand Down
12 changes: 11 additions & 1 deletion datafusion/catalog-listing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ repository.workspace = true
rust-version.workspace = true
version.workspace = true

[features]
compression = ["async-compression", "xz2", "bzip2", "flate2", "zstd", "tokio-util"]
default = ["compression"]

[dependencies]
arrow = { workspace = true }
arrow-schema = { workspace = true }
Expand All @@ -37,6 +41,8 @@ async-compression = { version = "0.4.0", features = [
"zstd",
"tokio",
], optional = true }
bytes = { workspace = true }
bzip2 = { version = "0.5.0", optional = true }
chrono = { workspace = true }
datafusion-catalog = { workspace = true }
datafusion-common = { workspace = true, features = ["object_store"] }
Expand All @@ -45,17 +51,21 @@ datafusion-expr = { workspace = true }
datafusion-physical-expr = { workspace = true }
datafusion-physical-expr-common = { workspace = true }
datafusion-physical-plan = { workspace = true }
flate2 = { version = "1.0.24", optional = true }
futures = { workspace = true }
glob = "0.3.0"
itertools = { workspace = true }
log = { workspace = true }
object_store = { workspace = true }
tokio = { workspace = true }
tokio-util = { version = "0.7.4", features = ["io"], optional = true }
url = { workspace = true }
xz2 = { version = "0.1", optional = true, features = ["static"] }
zstd = { version = "0.13", optional = true, default-features = false }

[dev-dependencies]
async-trait = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true }

[lints]
workspace = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
use std::str::FromStr;

use crate::error::{DataFusionError, Result};
use datafusion_common::error::{DataFusionError, Result};

use datafusion_common::parsers::CompressionTypeVariant::{self, *};
use datafusion_common::GetExt;
Expand Down Expand Up @@ -254,8 +254,8 @@ pub trait FileTypeExt {
mod tests {
use std::str::FromStr;

use crate::datasource::file_format::file_compression_type::FileCompressionType;
use crate::error::DataFusionError;
use super::FileCompressionType;
use datafusion_common::error::DataFusionError;

use bytes::Bytes;
use futures::StreamExt;
Expand Down
1 change: 1 addition & 0 deletions datafusion/catalog-listing/src/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! A table that uses the `ObjectStore` listing capability
//! to get the list of files to process.
pub mod file_compression_type;
pub mod file_groups;
pub mod helpers;
pub mod url;
Expand Down
11 changes: 1 addition & 10 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ array_expressions = ["nested_expressions"]
# Used to enable the avro format
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
backtrace = ["datafusion-common/backtrace"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
compression = ["xz2", "bzip2", "flate2", "zstd", "datafusion-catalog-listing/compression"]
crypto_expressions = ["datafusion-functions/crypto_expressions"]
datetime_expressions = ["datafusion-functions/datetime_expressions"]
default = [
Expand Down Expand Up @@ -87,13 +87,6 @@ apache-avro = { version = "0.17", optional = true }
arrow = { workspace = true }
arrow-ipc = { workspace = true }
arrow-schema = { workspace = true }
async-compression = { version = "0.4.0", features = [
"bzip2",
"gzip",
"xz",
"zstd",
"tokio",
], optional = true }
async-trait = { workspace = true }
bytes = { workspace = true }
bzip2 = { version = "0.5.0", optional = true }
Expand All @@ -117,7 +110,6 @@ datafusion-physical-plan = { workspace = true }
datafusion-sql = { workspace = true }
flate2 = { version = "1.0.24", optional = true }
futures = { workspace = true }
glob = "0.3.0"
itertools = { workspace = true }
log = { workspace = true }
num-traits = { version = "0.2", optional = true }
Expand All @@ -129,7 +121,6 @@ regex = { workspace = true }
sqlparser = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true }
tokio-util = { version = "0.7.4", features = ["io"], optional = true }
url = { workspace = true }
uuid = { version = "1.7", features = ["v4", "js"] }
xz2 = { version = "0.1", optional = true, features = ["static"] }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ pub const DEFAULT_SCHEMA_INFER_MAX_RECORD: usize = 1000;
pub mod arrow;
pub mod avro;
pub mod csv;
pub mod file_compression_type;
pub mod json;
pub mod options;
#[cfg(feature = "parquet")]
pub mod parquet;
pub mod write;
pub use datafusion_catalog_listing::file_compression_type;

use std::any::Any;
use std::collections::{HashMap, VecDeque};
Expand Down

0 comments on commit 7fde24a

Please sign in to comment.