Skip to content

Commit

Permalink
Add debug print statement to DataFrame.collect for improved traceability
Browse files Browse the repository at this point in the history
  • Loading branch information
kosiew committed Jan 13, 2025
1 parent a8d4077 commit 42f4e7d
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 0 deletions.
1 change: 1 addition & 0 deletions datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1208,6 +1208,7 @@ impl DataFrame {
/// # }
/// ```
pub async fn collect(self) -> Result<Vec<RecordBatch>> {
println!("==> Datafrrame.collect");
let task_ctx = Arc::new(self.task_ctx());
let plan = self.create_physical_plan().await?;
collect(plan, task_ctx).await
Expand Down
2 changes: 2 additions & 0 deletions datafusion/physical-plan/src/execution_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,8 @@ pub fn execute_stream(
plan: Arc<dyn ExecutionPlan>,
context: Arc<TaskContext>,
) -> Result<SendableRecordBatchStream> {
let num_partitions = plan.output_partitioning().partition_count();
println!("==> num_partitions: {}", num_partitions);
match plan.output_partitioning().partition_count() {
0 => Ok(Box::pin(EmptyRecordBatchStream::new(plan.schema()))),
1 => plan.execute(0, context),
Expand Down

0 comments on commit 42f4e7d

Please sign in to comment.