|
21 | 21 |
|
22 | 22 | use arrow::array::{
|
23 | 23 | BooleanBuilder, FixedSizeBinaryBuilder, LargeStringBuilder, StringBuilder,
|
| 24 | + StringViewBuilder, |
24 | 25 | };
|
25 | 26 | use arrow::datatypes::i256;
|
26 | 27 | use arrow::{array::ArrayRef, datatypes::DataType};
|
@@ -438,6 +439,25 @@ macro_rules! get_statistics {
|
438 | 439 | }
|
439 | 440 | Ok(Arc::new(builder.finish()))
|
440 | 441 | },
|
| 442 | + DataType::Utf8View => { |
| 443 | + let iterator = [<$stat_type_prefix ByteArrayStatsIterator>]::new($iterator); |
| 444 | + let mut builder = StringViewBuilder::new(); |
| 445 | + for x in iterator { |
| 446 | + let Some(x) = x else { |
| 447 | + builder.append_null(); // no statistics value |
| 448 | + continue; |
| 449 | + }; |
| 450 | + |
| 451 | + let Ok(x) = std::str::from_utf8(x) else { |
| 452 | + log::debug!("Utf8 statistics is a non-UTF8 value, ignoring it."); |
| 453 | + builder.append_null(); |
| 454 | + continue; |
| 455 | + }; |
| 456 | + |
| 457 | + builder.append_value(x); |
| 458 | + } |
| 459 | + Ok(Arc::new(builder.finish())) |
| 460 | + }, |
441 | 461 | DataType::FixedSizeBinary(size) => {
|
442 | 462 | let iterator = [<$stat_type_prefix FixedLenByteArrayStatsIterator>]::new($iterator);
|
443 | 463 | let mut builder = FixedSizeBinaryBuilder::new(*size);
|
@@ -482,8 +502,8 @@ macro_rules! get_statistics {
|
482 | 502 | DataType::Duration(_) |
|
483 | 503 | DataType::Interval(_) |
|
484 | 504 | DataType::Null |
|
| 505 | + // TODO binary view |
485 | 506 | DataType::BinaryView |
|
486 |
| - DataType::Utf8View | |
487 | 507 | DataType::List(_) |
|
488 | 508 | DataType::ListView(_) |
|
489 | 509 | DataType::FixedSizeList(_, _) |
|
@@ -901,6 +921,29 @@ macro_rules! get_data_page_statistics {
|
901 | 921 | }
|
902 | 922 | Ok(Arc::new(builder.finish()))
|
903 | 923 | },
|
| 924 | + // TODO file upstream in Arrowrs -- |
| 925 | + // support Utf8View and BinaryView in statistics |
| 926 | + Some(DataType::Utf8View) => { |
| 927 | + let mut builder = StringViewBuilder::new(); |
| 928 | + let iterator = [<$stat_type_prefix ByteArrayDataPageStatsIterator>]::new($iterator); |
| 929 | + for x in iterator { |
| 930 | + for x in x.into_iter() { |
| 931 | + let Some(x) = x else { |
| 932 | + builder.append_null(); // no statistics value |
| 933 | + continue; |
| 934 | + }; |
| 935 | + |
| 936 | + let Ok(x) = std::str::from_utf8(x.data()) else { |
| 937 | + log::debug!("Utf8 statistics is a non-UTF8 value, ignoring it."); |
| 938 | + builder.append_null(); |
| 939 | + continue; |
| 940 | + }; |
| 941 | + |
| 942 | + builder.append_value(x); |
| 943 | + } |
| 944 | + } |
| 945 | + Ok(Arc::new(builder.finish())) |
| 946 | + }, |
904 | 947 | Some(DataType::Dictionary(_, value_type)) => {
|
905 | 948 | [<$stat_type_prefix:lower _ page_statistics>](Some(value_type), $iterator)
|
906 | 949 | },
|
@@ -983,6 +1026,7 @@ macro_rules! get_data_page_statistics {
|
983 | 1026 | }
|
984 | 1027 | Ok(Arc::new(builder.finish()))
|
985 | 1028 | },
|
| 1029 | + // TODO file upstream in arrow-rs -- return not implemented for unsupported types rather than panic |
986 | 1030 | _ => unimplemented!()
|
987 | 1031 | }
|
988 | 1032 | }
|
@@ -1104,6 +1148,7 @@ where
|
1104 | 1148 | .iter()
|
1105 | 1149 | .map(|x| x.null_count.map(|x| x as u64))
|
1106 | 1150 | .collect::<Vec<_>>(),
|
| 1151 | + // TODO file upstream in Arrow-rs -- return not implemented |
1107 | 1152 | _ => unimplemented!(),
|
1108 | 1153 | });
|
1109 | 1154 |
|
|
0 commit comments