Skip to content

Commit c94163f

Browse files
committed
Implement crude stats for mem
1 parent 96af0af commit c94163f

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

datafusion/core/src/datasource/memory.rs

+13-1
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@ use crate::physical_planner::create_physical_sort_exprs;
3737
use arrow::datatypes::SchemaRef;
3838
use arrow::record_batch::RecordBatch;
3939
use datafusion_catalog::Session;
40-
use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt};
40+
use datafusion_common::stats::Precision;
41+
use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt, Statistics};
4142
pub use datafusion_datasource::memory::MemorySourceConfig;
4243
pub use datafusion_datasource::source::DataSourceExec;
44+
4345
use datafusion_execution::TaskContext;
4446
use datafusion_expr::dml::InsertOp;
4547
use datafusion_expr::SortExpr;
@@ -67,11 +69,13 @@ pub struct MemTable {
6769
/// Optional pre-known sort order(s). Must be `SortExpr`s.
6870
/// inserting data into this table removes the order
6971
pub sort_order: Arc<Mutex<Vec<Vec<SortExpr>>>>,
72+
num_rows: usize,
7073
}
7174

7275
impl MemTable {
7376
/// Create a new in-memory table from the provided schema and record batches
7477
pub fn try_new(schema: SchemaRef, partitions: Vec<Vec<RecordBatch>>) -> Result<Self> {
78+
let mut num_rows = 0_usize;
7579
for batches in partitions.iter().flatten() {
7680
let batches_schema = batches.schema();
7781
if !schema.contains(&batches_schema) {
@@ -81,6 +85,7 @@ impl MemTable {
8185
);
8286
return plan_err!("Mismatch between schema and batches");
8387
}
88+
num_rows += batches.num_rows();
8489
}
8590

8691
Ok(Self {
@@ -92,6 +97,7 @@ impl MemTable {
9297
constraints: Constraints::empty(),
9398
column_defaults: HashMap::new(),
9499
sort_order: Arc::new(Mutex::new(vec![])),
100+
num_rows
95101
})
96102
}
97103

@@ -215,6 +221,12 @@ impl TableProvider for MemTable {
215221
TableType::Base
216222
}
217223

224+
fn statistics(&self) -> Option<Statistics> {
225+
let mut stats = Statistics::new_unknown(&self.schema);
226+
stats.num_rows = Precision::Inexact(self.num_rows);
227+
Some(stats)
228+
}
229+
218230
async fn scan(
219231
&self,
220232
state: &dyn Session,

0 commit comments

Comments
 (0)