File tree 4 files changed +28
-4
lines changed
4 files changed +28
-4
lines changed Original file line number Diff line number Diff line change @@ -135,6 +135,10 @@ impl FileFormat for AvroFormat {
135
135
Ok ( Arc :: new ( merged_schema) )
136
136
}
137
137
138
+ async fn transform_schema ( & self , schema : SchemaRef ) -> Result < SchemaRef > {
139
+ Ok ( schema)
140
+ }
141
+
138
142
async fn infer_stats (
139
143
& self ,
140
144
_state : & dyn Session ,
Original file line number Diff line number Diff line change @@ -393,6 +393,10 @@ impl FileFormat for CsvFormat {
393
393
Ok ( Arc :: new ( merged_schema) )
394
394
}
395
395
396
+ async fn transform_schema ( & self , schema : SchemaRef ) -> Result < SchemaRef > {
397
+ Ok ( schema)
398
+ }
399
+
396
400
async fn infer_stats (
397
401
& self ,
398
402
_state : & dyn Session ,
Original file line number Diff line number Diff line change @@ -233,6 +233,10 @@ impl FileFormat for JsonFormat {
233
233
Ok ( Arc :: new ( schema) )
234
234
}
235
235
236
+ async fn transform_schema ( & self , schema : SchemaRef ) -> Result < SchemaRef > {
237
+ Ok ( schema)
238
+ }
239
+
236
240
async fn infer_stats (
237
241
& self ,
238
242
_state : & dyn Session ,
Original file line number Diff line number Diff line change @@ -359,19 +359,29 @@ impl FileFormat for ParquetFormat {
359
359
Schema :: try_merge ( schemas)
360
360
} ?;
361
361
362
+ self . transform_schema ( Arc :: new ( schema) ) . await
363
+ }
364
+
365
+ /// transform_schema for parquet format now contains two steps:
366
+ ///
367
+ /// 1. Transform a schema so that any binary types are strings
368
+ /// see [transform_binary_to_string]
369
+ ///
370
+ /// 2. Transform a schema to use view types for Utf8 and Binary
371
+ /// See [transform_schema_to_view] for details
372
+ async fn transform_schema ( & self , schema : SchemaRef ) -> Result < SchemaRef > {
362
373
let schema = if self . binary_as_string ( ) {
363
- transform_binary_to_string ( & schema)
374
+ Arc :: new ( transform_binary_to_string ( schema. as_ref ( ) ) )
364
375
} else {
365
376
schema
366
377
} ;
367
378
368
379
let schema = if self . force_view_types ( ) {
369
- transform_schema_to_view ( & schema)
380
+ Arc :: new ( transform_schema_to_view ( schema. as_ref ( ) ) )
370
381
} else {
371
382
schema
372
383
} ;
373
-
374
- Ok ( Arc :: new ( schema) )
384
+ Ok ( schema)
375
385
}
376
386
377
387
async fn infer_stats (
@@ -598,6 +608,8 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema {
598
608
}
599
609
600
610
/// Transform a schema so that any binary types are strings
611
+ ///
612
+ /// See [ParquetFormat::binary_as_string] for details
601
613
pub fn transform_binary_to_string ( schema : & Schema ) -> Schema {
602
614
let transformed_fields: Vec < Arc < Field > > = schema
603
615
. fields
You can’t perform that action at this time.
0 commit comments