Skip to content

Commit afadd93

Browse files
committed
Add comments
1 parent 7255837 commit afadd93

File tree

4 files changed

+28
-4
lines changed

4 files changed

+28
-4
lines changed

datafusion/datasource-avro/src/file_format.rs

+4
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ impl FileFormat for AvroFormat {
135135
Ok(Arc::new(merged_schema))
136136
}
137137

138+
async fn transform_schema(&self, schema: SchemaRef) -> Result<SchemaRef> {
139+
Ok(schema)
140+
}
141+
138142
async fn infer_stats(
139143
&self,
140144
_state: &dyn Session,

datafusion/datasource-csv/src/file_format.rs

+4
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,10 @@ impl FileFormat for CsvFormat {
393393
Ok(Arc::new(merged_schema))
394394
}
395395

396+
async fn transform_schema(&self, schema: SchemaRef) -> Result<SchemaRef> {
397+
Ok(schema)
398+
}
399+
396400
async fn infer_stats(
397401
&self,
398402
_state: &dyn Session,

datafusion/datasource-json/src/file_format.rs

+4
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,10 @@ impl FileFormat for JsonFormat {
233233
Ok(Arc::new(schema))
234234
}
235235

236+
async fn transform_schema(&self, schema: SchemaRef) -> Result<SchemaRef> {
237+
Ok(schema)
238+
}
239+
236240
async fn infer_stats(
237241
&self,
238242
_state: &dyn Session,

datafusion/datasource-parquet/src/file_format.rs

+16-4
Original file line numberDiff line numberDiff line change
@@ -359,19 +359,29 @@ impl FileFormat for ParquetFormat {
359359
Schema::try_merge(schemas)
360360
}?;
361361

362+
self.transform_schema(Arc::new(schema)).await
363+
}
364+
365+
/// transform_schema for parquet format now contains two steps:
366+
///
367+
/// 1. Transform a schema so that any binary types are strings
368+
/// see [transform_binary_to_string]
369+
///
370+
/// 2. Transform a schema to use view types for Utf8 and Binary
371+
/// See [transform_schema_to_view] for details
372+
async fn transform_schema(&self, schema: SchemaRef) -> Result<SchemaRef> {
362373
let schema = if self.binary_as_string() {
363-
transform_binary_to_string(&schema)
374+
Arc::new(transform_binary_to_string(schema.as_ref()))
364375
} else {
365376
schema
366377
};
367378

368379
let schema = if self.force_view_types() {
369-
transform_schema_to_view(&schema)
380+
Arc::new(transform_schema_to_view(schema.as_ref()))
370381
} else {
371382
schema
372383
};
373-
374-
Ok(Arc::new(schema))
384+
Ok(schema)
375385
}
376386

377387
async fn infer_stats(
@@ -598,6 +608,8 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema {
598608
}
599609

600610
/// Transform a schema so that any binary types are strings
611+
///
612+
/// See [ParquetFormat::binary_as_string] for details
601613
pub fn transform_binary_to_string(schema: &Schema) -> Schema {
602614
let transformed_fields: Vec<Arc<Field>> = schema
603615
.fields

0 commit comments

Comments
 (0)