Skip to content

Commit 979d07a

Browse files
committed
Make statistics computation opt-in for IcebergTableProvider
1 parent 98bdd8a commit 979d07a

File tree

5 files changed

+20
-12
lines changed

5 files changed

+20
-12
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/integration_tests/tests/datafusion.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ async fn test_statistics() -> Result<()> {
3838

3939
let stats = IcebergTableProvider::try_new_from_table(table)
4040
.await?
41+
.with_computed_statistics()
42+
.await
4143
.statistics();
4244

4345
assert_eq!(

crates/integrations/datafusion/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ async-trait = { workspace = true }
3434
datafusion = { version = "44" }
3535
futures = { workspace = true }
3636
iceberg = { workspace = true }
37+
log = { workspace = true }
3738
tokio = { workspace = true }
3839

3940
[dev-dependencies]

crates/integrations/datafusion/src/table/mod.rs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,12 @@ pub struct IcebergTableProvider {
5050
}
5151

5252
impl IcebergTableProvider {
53-
pub(crate) async fn new(table: Table, schema: ArrowSchemaRef) -> Self {
54-
let statistics = compute_statistics(&table, None).await.ok();
53+
pub(crate) fn new(table: Table, schema: ArrowSchemaRef) -> Self {
5554
IcebergTableProvider {
5655
table,
5756
snapshot_id: None,
5857
schema,
59-
statistics,
58+
statistics: None,
6059
}
6160
}
6261
/// Asynchronously tries to construct a new [`IcebergTableProvider`]
@@ -72,25 +71,23 @@ impl IcebergTableProvider {
7271

7372
let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?);
7473

75-
let statistics = compute_statistics(&table, None).await.ok();
7674
Ok(IcebergTableProvider {
7775
table,
7876
snapshot_id: None,
7977
schema,
80-
statistics,
78+
statistics: None,
8179
})
8280
}
8381

8482
/// Asynchronously tries to construct a new [`IcebergTableProvider`]
8583
/// using the given table. Can be used to create a table provider from an existing table regardless of the catalog implementation.
8684
pub async fn try_new_from_table(table: Table) -> Result<Self> {
8785
let schema = Arc::new(schema_to_arrow_schema(table.metadata().current_schema())?);
88-
let statistics = compute_statistics(&table, None).await.ok();
8986
Ok(IcebergTableProvider {
9087
table,
9188
snapshot_id: None,
9289
schema,
93-
statistics,
90+
statistics: None,
9491
})
9592
}
9693

@@ -111,14 +108,23 @@ impl IcebergTableProvider {
111108
})?;
112109
let schema = snapshot.schema(table.metadata())?;
113110
let schema = Arc::new(schema_to_arrow_schema(&schema)?);
114-
let statistics = compute_statistics(&table, Some(snapshot_id)).await.ok();
115111
Ok(IcebergTableProvider {
116112
table,
117113
snapshot_id: Some(snapshot_id),
118114
schema,
119-
statistics,
115+
statistics: None,
120116
})
121117
}
118+
119+
// Try to compute the underlying table statistics directly from the manifest/data files
120+
pub async fn with_computed_statistics(mut self) -> Self {
121+
let statistics = compute_statistics(&self.table, self.snapshot_id)
122+
.await
123+
.inspect_err(|err| log::warn!("Failed computing table statistics: {err}"))
124+
.ok();
125+
self.statistics = statistics;
126+
self
127+
}
122128
}
123129

124130
#[async_trait]

crates/integrations/datafusion/src/table/table_provider_factory.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,7 @@ impl TableProviderFactory for IcebergTableProviderFactory {
129129
let schema = schema_to_arrow_schema(table.metadata().current_schema())
130130
.map_err(to_datafusion_error)?;
131131

132-
Ok(Arc::new(
133-
IcebergTableProvider::new(table, Arc::new(schema)).await,
134-
))
132+
Ok(Arc::new(IcebergTableProvider::new(table, Arc::new(schema))))
135133
}
136134
}
137135

0 commit comments

Comments
 (0)