@@ -28,7 +28,10 @@ use arrow::compute::{cast_with_options, CastOptions};
28
28
use arrow:: datatypes:: DataType ;
29
29
use arrow:: datatypes:: { IntervalUnit , TimeUnit } ;
30
30
use datafusion_common:: rounding:: { alter_fp_rounding_mode, next_down, next_up} ;
31
- use datafusion_common:: { internal_err, Result , ScalarValue } ;
31
+ use datafusion_common:: stats:: Precision ;
32
+ use datafusion_common:: {
33
+ internal_err, ColumnStatistics , DataFusionError , Result , ScalarValue ,
34
+ } ;
32
35
33
36
macro_rules! get_extreme_value {
34
37
( $extreme: ident, $value: expr) => {
@@ -1469,6 +1472,8 @@ pub enum NullableInterval {
1469
1472
MaybeNull { values : Interval } ,
1470
1473
/// The value is definitely not null, and is within the specified range.
1471
1474
NotNull { values : Interval } ,
1475
+ /// Added to handle cases with insufficient statistics
1476
+ Unknown ,
1472
1477
}
1473
1478
1474
1479
impl Display for NullableInterval {
@@ -1479,6 +1484,7 @@ impl Display for NullableInterval {
1479
1484
write ! ( f, "NullableInterval: {} U {{NULL}}" , values)
1480
1485
}
1481
1486
Self :: NotNull { values } => write ! ( f, "NullableInterval: {}" , values) ,
1487
+ Self :: Unknown => write ! ( f, "NullableInterval: Unknown" ) ,
1482
1488
}
1483
1489
}
1484
1490
}
@@ -1501,36 +1507,82 @@ impl From<ScalarValue> for NullableInterval {
1501
1507
}
1502
1508
}
1503
1509
1510
+ impl From < ColumnStatistics > for NullableInterval {
1511
+ fn from ( stats : ColumnStatistics ) -> Self {
1512
+ let null_count = match stats. null_count {
1513
+ Precision :: Exact ( value) | Precision :: Inexact ( value) => value,
1514
+ Precision :: Absent => return NullableInterval :: Unknown , // Insufficient data
1515
+ } ;
1516
+
1517
+ let distinct_count = match stats. distinct_count {
1518
+ Precision :: Exact ( value) | Precision :: Inexact ( value) => value,
1519
+ Precision :: Absent => return NullableInterval :: Unknown , // Insufficient data
1520
+ } ;
1521
+
1522
+ let lower_value = match stats. min_value {
1523
+ Precision :: Exact ( ref value) | Precision :: Inexact ( ref value) => value. clone ( ) ,
1524
+ Precision :: Absent => return NullableInterval :: Unknown , // Insufficient data
1525
+ } ;
1526
+
1527
+ let upper_value = match stats. max_value {
1528
+ Precision :: Exact ( ref value) | Precision :: Inexact ( ref value) => value. clone ( ) ,
1529
+ Precision :: Absent => return NullableInterval :: Unknown ,
1530
+ } ;
1531
+
1532
+ let datatype = lower_value. data_type ( ) ;
1533
+
1534
+ if null_count == 0 {
1535
+ NullableInterval :: NotNull {
1536
+ values : Interval {
1537
+ lower : lower_value,
1538
+ upper : upper_value,
1539
+ } ,
1540
+ }
1541
+ } else if null_count == distinct_count {
1542
+ NullableInterval :: Null { datatype }
1543
+ } else {
1544
+ NullableInterval :: MaybeNull {
1545
+ values : Interval {
1546
+ lower : lower_value,
1547
+ upper : upper_value,
1548
+ } ,
1549
+ }
1550
+ }
1551
+ }
1552
+ }
1553
+
1504
1554
impl NullableInterval {
1505
1555
/// Get the values interval, or None if this interval is definitely null.
1506
1556
pub fn values ( & self ) -> Option < & Interval > {
1507
1557
match self {
1508
- Self :: Null { .. } => None ,
1558
+ Self :: Null { .. } | Self :: Unknown => None ,
1509
1559
Self :: MaybeNull { values } | Self :: NotNull { values } => Some ( values) ,
1510
1560
}
1511
1561
}
1512
1562
1513
1563
/// Get the data type
1514
- pub fn data_type ( & self ) -> DataType {
1564
+ pub fn data_type ( & self ) -> Option < DataType > {
1515
1565
match self {
1516
- Self :: Null { datatype } => datatype. clone ( ) ,
1517
- Self :: MaybeNull { values } | Self :: NotNull { values } => values. data_type ( ) ,
1566
+ Self :: Null { datatype } => Some ( datatype. clone ( ) ) ,
1567
+ Self :: MaybeNull { values } | Self :: NotNull { values } => {
1568
+ Some ( values. data_type ( ) )
1569
+ }
1570
+ Self :: Unknown => None ,
1518
1571
}
1519
1572
}
1520
1573
1521
1574
/// Return true if the value is definitely true (and not null).
1522
1575
pub fn is_certainly_true ( & self ) -> bool {
1523
1576
match self {
1524
- Self :: Null { .. } | Self :: MaybeNull { .. } => false ,
1577
+ Self :: Null { .. } | Self :: MaybeNull { .. } | Self :: Unknown => false ,
1525
1578
Self :: NotNull { values } => values == & Interval :: CERTAINLY_TRUE ,
1526
1579
}
1527
1580
}
1528
1581
1529
1582
/// Return true if the value is definitely false (and not null).
1530
1583
pub fn is_certainly_false ( & self ) -> bool {
1531
1584
match self {
1532
- Self :: Null { .. } => false ,
1533
- Self :: MaybeNull { .. } => false ,
1585
+ Self :: Null { .. } | Self :: MaybeNull { .. } | Self :: Unknown => false ,
1534
1586
Self :: NotNull { values } => values == & Interval :: CERTAINLY_FALSE ,
1535
1587
}
1536
1588
}
@@ -1547,6 +1599,7 @@ impl NullableInterval {
1547
1599
Self :: NotNull { values } => Ok ( Self :: NotNull {
1548
1600
values : values. not ( ) ?,
1549
1601
} ) ,
1602
+ Self :: Unknown => Ok ( Self :: Unknown ) ,
1550
1603
}
1551
1604
}
1552
1605
@@ -1640,9 +1693,12 @@ impl NullableInterval {
1640
1693
datatype : DataType :: Boolean ,
1641
1694
} )
1642
1695
} else {
1643
- Ok ( Self :: Null {
1644
- datatype : self . data_type ( ) ,
1645
- } )
1696
+ match self . data_type ( ) {
1697
+ Some ( datatype) => Ok ( Self :: Null { datatype } ) ,
1698
+ None => Err ( DataFusionError :: NotImplemented (
1699
+ "Cannot determine data type for operation" . to_string ( ) ,
1700
+ ) ) ,
1701
+ }
1646
1702
}
1647
1703
}
1648
1704
}
@@ -1714,10 +1770,13 @@ impl NullableInterval {
1714
1770
1715
1771
#[ cfg( test) ]
1716
1772
mod tests {
1717
- use crate :: interval_arithmetic:: { next_value, prev_value, satisfy_greater, Interval } ;
1773
+ use crate :: interval_arithmetic:: {
1774
+ next_value, prev_value, satisfy_greater, Interval , NullableInterval ,
1775
+ } ;
1718
1776
1719
1777
use arrow:: datatypes:: DataType ;
1720
- use datafusion_common:: { Result , ScalarValue } ;
1778
+ use datafusion_common:: stats:: Precision ;
1779
+ use datafusion_common:: { ColumnStatistics , Result , ScalarValue } ;
1721
1780
1722
1781
#[ test]
1723
1782
fn test_next_prev_value ( ) -> Result < ( ) > {
@@ -3212,7 +3271,64 @@ mod tests {
3212
3271
3213
3272
Ok ( ( ) )
3214
3273
}
3274
+ #[ test]
3275
+ fn test_interval_from_column_statistics ( ) {
3276
+ let stats_null = ColumnStatistics {
3277
+ null_count : Precision :: Exact ( 10 ) ,
3278
+ max_value : Precision :: Exact ( ScalarValue :: Int32 ( Some ( 100 ) ) ) ,
3279
+ min_value : Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ,
3280
+ distinct_count : Precision :: Exact ( 10 ) ,
3281
+ } ;
3282
+ assert_eq ! (
3283
+ NullableInterval :: from( stats_null) ,
3284
+ NullableInterval :: Null {
3285
+ datatype: DataType :: Int32
3286
+ }
3287
+ ) ;
3288
+
3289
+ let stats_not_null = ColumnStatistics {
3290
+ null_count : Precision :: Exact ( 0 ) ,
3291
+ max_value : Precision :: Exact ( ScalarValue :: Int32 ( Some ( 100 ) ) ) ,
3292
+ min_value : Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ,
3293
+ distinct_count : Precision :: Exact ( 20 ) ,
3294
+ } ;
3295
+ assert_eq ! (
3296
+ NullableInterval :: from( stats_not_null) ,
3297
+ NullableInterval :: NotNull {
3298
+ values: Interval {
3299
+ lower: ScalarValue :: Int32 ( Some ( 1 ) ) ,
3300
+ upper: ScalarValue :: Int32 ( Some ( 100 ) ) ,
3301
+ }
3302
+ }
3303
+ ) ;
3215
3304
3305
+ let stats_maybe_null = ColumnStatistics {
3306
+ null_count : Precision :: Exact ( 5 ) ,
3307
+ max_value : Precision :: Exact ( ScalarValue :: Int32 ( Some ( 100 ) ) ) ,
3308
+ min_value : Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ,
3309
+ distinct_count : Precision :: Exact ( 20 ) ,
3310
+ } ;
3311
+ assert_eq ! (
3312
+ NullableInterval :: from( stats_maybe_null) ,
3313
+ NullableInterval :: MaybeNull {
3314
+ values: Interval {
3315
+ lower: ScalarValue :: Int32 ( Some ( 1 ) ) ,
3316
+ upper: ScalarValue :: Int32 ( Some ( 100 ) ) ,
3317
+ }
3318
+ }
3319
+ ) ;
3320
+
3321
+ let stats_unknown = ColumnStatistics {
3322
+ null_count : Precision :: Absent ,
3323
+ max_value : Precision :: Exact ( ScalarValue :: Int32 ( Some ( 100 ) ) ) ,
3324
+ min_value : Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1 ) ) ) ,
3325
+ distinct_count : Precision :: Exact ( 20 ) ,
3326
+ } ;
3327
+ assert_eq ! (
3328
+ NullableInterval :: from( stats_unknown) ,
3329
+ NullableInterval :: Unknown
3330
+ ) ;
3331
+ }
3216
3332
#[ test]
3217
3333
fn test_interval_display ( ) {
3218
3334
let interval = Interval :: make ( Some ( 0.25_f32 ) , Some ( 0.50_f32 ) ) . unwrap ( ) ;
0 commit comments