@@ -758,7 +758,7 @@ macro_rules! get_data_page_statistics {
758
758
( $stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
759
759
paste! {
760
760
match $data_type {
761
- Some ( DataType :: Boolean ) => {
761
+ DataType :: Boolean => {
762
762
let iterator = [ <$stat_type_prefix BooleanDataPageStatsIterator >] :: new( $iterator) ;
763
763
let mut builder = BooleanBuilder :: new( ) ;
764
764
for x in iterator {
@@ -772,7 +772,7 @@ macro_rules! get_data_page_statistics {
772
772
}
773
773
Ok ( Arc :: new( builder. finish( ) ) )
774
774
} ,
775
- Some ( DataType :: UInt8 ) => Ok ( Arc :: new(
775
+ DataType :: UInt8 => Ok ( Arc :: new(
776
776
UInt8Array :: from_iter(
777
777
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
778
778
. map( |x| {
@@ -783,7 +783,7 @@ macro_rules! get_data_page_statistics {
783
783
. flatten( )
784
784
)
785
785
) ) ,
786
- Some ( DataType :: UInt16 ) => Ok ( Arc :: new(
786
+ DataType :: UInt16 => Ok ( Arc :: new(
787
787
UInt16Array :: from_iter(
788
788
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
789
789
. map( |x| {
@@ -794,7 +794,7 @@ macro_rules! get_data_page_statistics {
794
794
. flatten( )
795
795
)
796
796
) ) ,
797
- Some ( DataType :: UInt32 ) => Ok ( Arc :: new(
797
+ DataType :: UInt32 => Ok ( Arc :: new(
798
798
UInt32Array :: from_iter(
799
799
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
800
800
. map( |x| {
@@ -804,7 +804,7 @@ macro_rules! get_data_page_statistics {
804
804
} )
805
805
. flatten( )
806
806
) ) ) ,
807
- Some ( DataType :: UInt64 ) => Ok ( Arc :: new(
807
+ DataType :: UInt64 => Ok ( Arc :: new(
808
808
UInt64Array :: from_iter(
809
809
[ <$stat_type_prefix Int64DataPageStatsIterator >] :: new( $iterator)
810
810
. map( |x| {
@@ -814,7 +814,7 @@ macro_rules! get_data_page_statistics {
814
814
} )
815
815
. flatten( )
816
816
) ) ) ,
817
- Some ( DataType :: Int8 ) => Ok ( Arc :: new(
817
+ DataType :: Int8 => Ok ( Arc :: new(
818
818
Int8Array :: from_iter(
819
819
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
820
820
. map( |x| {
@@ -825,7 +825,7 @@ macro_rules! get_data_page_statistics {
825
825
. flatten( )
826
826
)
827
827
) ) ,
828
- Some ( DataType :: Int16 ) => Ok ( Arc :: new(
828
+ DataType :: Int16 => Ok ( Arc :: new(
829
829
Int16Array :: from_iter(
830
830
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
831
831
. map( |x| {
@@ -836,9 +836,9 @@ macro_rules! get_data_page_statistics {
836
836
. flatten( )
837
837
)
838
838
) ) ,
839
- Some ( DataType :: Int32 ) => Ok ( Arc :: new( Int32Array :: from_iter( [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
840
- Some ( DataType :: Int64 ) => Ok ( Arc :: new( Int64Array :: from_iter( [ <$stat_type_prefix Int64DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
841
- Some ( DataType :: Float16 ) => Ok ( Arc :: new(
839
+ DataType :: Int32 => Ok ( Arc :: new( Int32Array :: from_iter( [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
840
+ DataType :: Int64 => Ok ( Arc :: new( Int64Array :: from_iter( [ <$stat_type_prefix Int64DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
841
+ DataType :: Float16 => Ok ( Arc :: new(
842
842
Float16Array :: from_iter(
843
843
[ <$stat_type_prefix Float16DataPageStatsIterator >] :: new( $iterator)
844
844
. map( |x| {
@@ -849,11 +849,11 @@ macro_rules! get_data_page_statistics {
849
849
. flatten( )
850
850
)
851
851
) ) ,
852
- Some ( DataType :: Float32 ) => Ok ( Arc :: new( Float32Array :: from_iter( [ <$stat_type_prefix Float32DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
853
- Some ( DataType :: Float64 ) => Ok ( Arc :: new( Float64Array :: from_iter( [ <$stat_type_prefix Float64DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
854
- Some ( DataType :: Binary ) => Ok ( Arc :: new( BinaryArray :: from_iter( [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
855
- Some ( DataType :: LargeBinary ) => Ok ( Arc :: new( LargeBinaryArray :: from_iter( [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
856
- Some ( DataType :: Utf8 ) => {
852
+ DataType :: Float32 => Ok ( Arc :: new( Float32Array :: from_iter( [ <$stat_type_prefix Float32DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
853
+ DataType :: Float64 => Ok ( Arc :: new( Float64Array :: from_iter( [ <$stat_type_prefix Float64DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
854
+ DataType :: Binary => Ok ( Arc :: new( BinaryArray :: from_iter( [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
855
+ DataType :: LargeBinary => Ok ( Arc :: new( LargeBinaryArray :: from_iter( [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
856
+ DataType :: Utf8 => {
857
857
let mut builder = StringBuilder :: new( ) ;
858
858
let iterator = [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) ;
859
859
for x in iterator {
@@ -873,7 +873,7 @@ macro_rules! get_data_page_statistics {
873
873
}
874
874
Ok ( Arc :: new( builder. finish( ) ) )
875
875
} ,
876
- Some ( DataType :: LargeUtf8 ) => {
876
+ DataType :: LargeUtf8 => {
877
877
let mut builder = LargeStringBuilder :: new( ) ;
878
878
let iterator = [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) ;
879
879
for x in iterator {
@@ -893,10 +893,10 @@ macro_rules! get_data_page_statistics {
893
893
}
894
894
Ok ( Arc :: new( builder. finish( ) ) )
895
895
} ,
896
- Some ( DataType :: Dictionary ( _, value_type) ) => {
897
- [ <$stat_type_prefix: lower _ page_statistics>] ( Some ( value_type) , $iterator)
896
+ DataType :: Dictionary ( _, value_type) => {
897
+ [ <$stat_type_prefix: lower _ page_statistics>] ( value_type, $iterator)
898
898
} ,
899
- Some ( DataType :: Timestamp ( unit, timezone) ) => {
899
+ DataType :: Timestamp ( unit, timezone) => {
900
900
let iter = [ <$stat_type_prefix Int64DataPageStatsIterator >] :: new( $iterator) . flatten( ) ;
901
901
Ok ( match unit {
902
902
TimeUnit :: Second => Arc :: new( TimestampSecondArray :: from_iter( iter) . with_timezone_opt( timezone. clone( ) ) ) ,
@@ -905,8 +905,8 @@ macro_rules! get_data_page_statistics {
905
905
TimeUnit :: Nanosecond => Arc :: new( TimestampNanosecondArray :: from_iter( iter) . with_timezone_opt( timezone. clone( ) ) ) ,
906
906
} )
907
907
} ,
908
- Some ( DataType :: Date32 ) => Ok ( Arc :: new( Date32Array :: from_iter( [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
909
- Some ( DataType :: Date64 ) => Ok (
908
+ DataType :: Date32 => Ok ( Arc :: new( Date32Array :: from_iter( [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) ) ) ,
909
+ DataType :: Date64 => Ok (
910
910
Arc :: new(
911
911
Date64Array :: from_iter( [ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator)
912
912
. map( |x| {
@@ -919,11 +919,11 @@ macro_rules! get_data_page_statistics {
919
919
)
920
920
)
921
921
) ,
922
- Some ( DataType :: Decimal128 ( precision, scale) ) => Ok ( Arc :: new(
922
+ DataType :: Decimal128 ( precision, scale) => Ok ( Arc :: new(
923
923
Decimal128Array :: from_iter( [ <$stat_type_prefix Decimal128DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) . with_precision_and_scale( * precision, * scale) ?) ) ,
924
- Some ( DataType :: Decimal256 ( precision, scale) ) => Ok ( Arc :: new(
924
+ DataType :: Decimal256 ( precision, scale) => Ok ( Arc :: new(
925
925
Decimal256Array :: from_iter( [ <$stat_type_prefix Decimal256DataPageStatsIterator >] :: new( $iterator) . flatten( ) ) . with_precision_and_scale( * precision, * scale) ?) ) ,
926
- Some ( DataType :: Time32 ( unit) ) => {
926
+ DataType :: Time32 ( unit) => {
927
927
Ok ( match unit {
928
928
TimeUnit :: Second => Arc :: new( Time32SecondArray :: from_iter(
929
929
[ <$stat_type_prefix Int32DataPageStatsIterator >] :: new( $iterator) . flatten( ) ,
@@ -937,7 +937,7 @@ macro_rules! get_data_page_statistics {
937
937
}
938
938
} )
939
939
}
940
- Some ( DataType :: Time64 ( unit) ) => {
940
+ DataType :: Time64 ( unit) => {
941
941
Ok ( match unit {
942
942
TimeUnit :: Microsecond => Arc :: new( Time64MicrosecondArray :: from_iter(
943
943
[ <$stat_type_prefix Int64DataPageStatsIterator >] :: new( $iterator) . flatten( ) ,
@@ -951,7 +951,7 @@ macro_rules! get_data_page_statistics {
951
951
}
952
952
} )
953
953
} ,
954
- Some ( DataType :: FixedSizeBinary ( size) ) => {
954
+ DataType :: FixedSizeBinary ( size) => {
955
955
let mut builder = FixedSizeBinaryBuilder :: new( * size) ;
956
956
let iterator = [ <$stat_type_prefix FixedLenByteArrayDataPageStatsIterator >] :: new( $iterator) ;
957
957
for x in iterator {
@@ -964,18 +964,13 @@ macro_rules! get_data_page_statistics {
964
964
if x. len( ) == * size as usize {
965
965
let _ = builder. append_value( x. data( ) ) ;
966
966
} else {
967
- // log::debug!(
968
- // "FixedSizeBinary({}) statistics is a binary of size {}, ignoring it.",
969
- // size,
970
- // x.len(),
971
- // );
972
967
builder. append_null( ) ;
973
968
}
974
969
}
975
970
}
976
971
Ok ( Arc :: new( builder. finish( ) ) )
977
972
} ,
978
- Some ( DataType :: Utf8View ) => {
973
+ DataType :: Utf8View => {
979
974
let mut builder = StringViewBuilder :: new( ) ;
980
975
let iterator = [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) ;
981
976
for x in iterator {
@@ -995,7 +990,7 @@ macro_rules! get_data_page_statistics {
995
990
}
996
991
Ok ( Arc :: new( builder. finish( ) ) )
997
992
} ,
998
- Some ( DataType :: BinaryView ) => {
993
+ DataType :: BinaryView => {
999
994
let mut builder = BinaryViewBuilder :: new( ) ;
1000
995
let iterator = [ <$stat_type_prefix ByteArrayDataPageStatsIterator >] :: new( $iterator) ;
1001
996
for x in iterator {
@@ -1010,23 +1005,22 @@ macro_rules! get_data_page_statistics {
1010
1005
}
1011
1006
Ok ( Arc :: new( builder. finish( ) ) )
1012
1007
} ,
1013
- Some ( DataType :: Null ) |
1014
- Some ( DataType :: Duration ( _) ) |
1015
- Some ( DataType :: Interval ( _) ) |
1016
- Some ( DataType :: List ( _) ) |
1017
- Some ( DataType :: ListView ( _) ) |
1018
- Some ( DataType :: FixedSizeList ( _, _) ) |
1019
- Some ( DataType :: LargeList ( _) ) |
1020
- Some ( DataType :: LargeListView ( _) ) |
1021
- Some ( DataType :: Struct ( _) ) |
1022
- Some ( DataType :: Union ( _, _) ) |
1023
- Some ( DataType :: Map ( _, _) ) |
1024
- Some ( DataType :: RunEndEncoded ( _, _) ) => {
1008
+ DataType :: Null |
1009
+ DataType :: Duration ( _) |
1010
+ DataType :: Interval ( _) |
1011
+ DataType :: List ( _) |
1012
+ DataType :: ListView ( _) |
1013
+ DataType :: FixedSizeList ( _, _) |
1014
+ DataType :: LargeList ( _) |
1015
+ DataType :: LargeListView ( _) |
1016
+ DataType :: Struct ( _) |
1017
+ DataType :: Union ( _, _) |
1018
+ DataType :: Map ( _, _) |
1019
+ DataType :: RunEndEncoded ( _, _) => {
1025
1020
let len = $iterator. count( ) ;
1026
1021
// don't know how to extract statistics, so return a null array
1027
- Ok ( new_null_array( $data_type. unwrap ( ) , len) )
1022
+ Ok ( new_null_array( $data_type, len) )
1028
1023
} ,
1029
- None => unimplemented!( ) // not sure how to handle this
1030
1024
}
1031
1025
}
1032
1026
}
@@ -1054,10 +1048,7 @@ fn max_statistics<'a, I: Iterator<Item = Option<&'a ParquetStatistics>>>(
1054
1048
1055
1049
/// Extracts the min statistics from an iterator
1056
1050
/// of parquet page [`Index`]'es to an [`ArrayRef`]
1057
- pub ( crate ) fn min_page_statistics < ' a , I > (
1058
- data_type : Option < & DataType > ,
1059
- iterator : I ,
1060
- ) -> Result < ArrayRef >
1051
+ pub ( crate ) fn min_page_statistics < ' a , I > ( data_type : & DataType , iterator : I ) -> Result < ArrayRef >
1061
1052
where
1062
1053
I : Iterator < Item = ( usize , & ' a Index ) > ,
1063
1054
{
@@ -1066,10 +1057,7 @@ where
1066
1057
1067
1058
/// Extracts the max statistics from an iterator
1068
1059
/// of parquet page [`Index`]'es to an [`ArrayRef`]
1069
- pub ( crate ) fn max_page_statistics < ' a , I > (
1070
- data_type : Option < & DataType > ,
1071
- iterator : I ,
1072
- ) -> Result < ArrayRef >
1060
+ pub ( crate ) fn max_page_statistics < ' a , I > ( data_type : & DataType , iterator : I ) -> Result < ArrayRef >
1073
1061
where
1074
1062
I : Iterator < Item = ( usize , & ' a Index ) > ,
1075
1063
{
@@ -1439,7 +1427,7 @@ impl<'a> StatisticsConverter<'a> {
1439
1427
( * num_data_pages, column_page_index_per_row_group_per_column)
1440
1428
} ) ;
1441
1429
1442
- min_page_statistics ( Some ( data_type) , iter)
1430
+ min_page_statistics ( data_type, iter)
1443
1431
}
1444
1432
1445
1433
/// Extract the maximum values from Data Page statistics.
@@ -1470,7 +1458,7 @@ impl<'a> StatisticsConverter<'a> {
1470
1458
( * num_data_pages, column_page_index_per_row_group_per_column)
1471
1459
} ) ;
1472
1460
1473
- max_page_statistics ( Some ( data_type) , iter)
1461
+ max_page_statistics ( data_type, iter)
1474
1462
}
1475
1463
1476
1464
/// Returns a [`UInt64Array`] with null counts for each data page.
0 commit comments