@@ -39,9 +39,10 @@ WITH HEADER ROW
39
39
LOCATION '../../testing/data/csv/aggregate_test_100.csv'
40
40
41
41
statement ok
42
- CREATE TABLE d_table (c1 decimal(10,3)) as values
43
- (110.000), (110.001), (110.002), (110.003), (110.004), (110.005), (110.006), (110.007), (110.008), (110.009),
44
- (-100.000),(-100.001),(-100.002),(-100.003),(-100.004),(-100.005),(-100.006),(-100.007),(-100.008),(-100.009)
42
+ CREATE TABLE d_table (c1 decimal(10,3), c2 varchar)
43
+ as values
44
+ (110.000, 'A'), (110.001, 'A'), (110.002, 'A'), (110.003, 'A'), (110.004, 'A'), (110.005, 'A'), (110.006, 'A'), (110.007, 'A'), (110.008, 'A'), (110.009, 'A'),
45
+ (-100.000, 'B'),(-100.001, 'B'),(-100.002, 'B'),(-100.003, 'B'),(-100.004, 'B'),(-100.005, 'B'),(-100.006, 'B'),(-100.007, 'B'),(-100.008, 'B'),(-100.009, 'B')
45
46
46
47
statement ok
47
48
CREATE TABLE median_table (
@@ -448,7 +449,7 @@ drop table cpu;
448
449
449
450
# this test is to show create table as and select into works in the same way
450
451
statement ok
451
- SELECT * INTO cpu
452
+ SELECT * INTO cpu
452
453
FROM (VALUES
453
454
('host0', 90.1),
454
455
('host1', 90.2),
@@ -1483,22 +1484,6 @@ NULL 2
1483
1484
statement ok
1484
1485
drop table the_nulls;
1485
1486
1486
- # All supported timestamp types
1487
-
1488
- # "nanos" --> TimestampNanosecondArray
1489
- # "micros" --> TimestampMicrosecondArray
1490
- # "millis" --> TimestampMillisecondArray
1491
- # "secs" --> TimestampSecondArray
1492
- # "names" --> StringArray
1493
-
1494
- statement ok
1495
- create table t_source
1496
- as values
1497
- ('2018-11-13T17:11:10.011375885995', 'Row 0'),
1498
- ('2011-12-13T11:13:10.12345', 'Row 1'),
1499
- (null, 'Row 2'),
1500
- ('2021-01-01T05:11:10.432', 'Row 3');
1501
-
1502
1487
statement ok
1503
1488
create table bit_aggregate_functions (
1504
1489
c1 SMALLINT NOT NULL,
@@ -1568,62 +1553,196 @@ SELECT bool_or(distinct c1), bool_or(distinct c2), bool_or(distinct c3), bool_or
1568
1553
----
1569
1554
true true true false true true false NULL
1570
1555
1556
+ # All supported timestamp types
1557
+
1558
+ # "nanos" --> TimestampNanosecondArray
1559
+ # "micros" --> TimestampMicrosecondArray
1560
+ # "millis" --> TimestampMillisecondArray
1561
+ # "secs" --> TimestampSecondArray
1562
+ # "names" --> StringArray
1563
+
1564
+ statement ok
1565
+ create table t_source
1566
+ as values
1567
+ ('2018-11-13T17:11:10.011375885995', 'Row 0', 'X'),
1568
+ ('2011-12-13T11:13:10.12345', 'Row 1', 'X'),
1569
+ (null, 'Row 2', 'Y'),
1570
+ ('2021-01-01T05:11:10.432', 'Row 3', 'Y');
1571
+
1571
1572
statement ok
1572
1573
create table t as
1573
1574
select
1574
1575
arrow_cast(column1, 'Timestamp(Nanosecond, None)') as nanos,
1575
1576
arrow_cast(column1, 'Timestamp(Microsecond, None)') as micros,
1576
1577
arrow_cast(column1, 'Timestamp(Millisecond, None)') as millis,
1577
1578
arrow_cast(column1, 'Timestamp(Second, None)') as secs,
1578
- column2 as names
1579
+ column2 as names,
1580
+ column3 as tag
1579
1581
from t_source;
1580
1582
1581
1583
# Demonstate the contents
1582
- query PPPPT
1584
+ query PPPPTT
1583
1585
select * from t;
1584
1586
----
1585
- 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0
1586
- 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10 Row 1
1587
- NULL NULL NULL NULL Row 2
1588
- 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3
1587
+ 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 X
1588
+ 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10 Row 1 X
1589
+ NULL NULL NULL NULL Row 2 Y
1590
+ 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 Y
1589
1591
1590
1592
1591
1593
# aggregate_timestamps_sum
1592
- statement error Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)
1594
+ statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)\.
1593
1595
SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t;
1594
1596
1597
+ statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)\.
1598
+ SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY tag ORDER BY tag;
1599
+
1595
1600
# aggregate_timestamps_count
1596
1601
query IIII
1597
1602
SELECT count(nanos), count(micros), count(millis), count(secs) FROM t;
1598
1603
----
1599
1604
3 3 3 3
1600
1605
1606
+ query TIIII
1607
+ SELECT tag, count(nanos), count(micros), count(millis), count(secs) FROM t GROUP BY tag ORDER BY tag;
1608
+ ----
1609
+ X 2 2 2 2
1610
+ Y 1 1 1 1
1601
1611
1602
1612
# aggregate_timestamps_min
1603
1613
query PPPP
1604
1614
SELECT min(nanos), min(micros), min(millis), min(secs) FROM t;
1605
1615
----
1606
1616
2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10
1607
1617
1618
+ query TPPPP
1619
+ SELECT tag, min(nanos), min(micros), min(millis), min(secs) FROM t GROUP BY tag ORDER BY tag;
1620
+ ----
1621
+ X 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10
1622
+ Y 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10
1623
+
1608
1624
# aggregate_timestamps_max
1609
1625
query PPPP
1610
1626
SELECT max(nanos), max(micros), max(millis), max(secs) FROM t;
1611
1627
----
1612
1628
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10
1613
1629
1630
+ query TPPPP
1631
+ SELECT tag, max(nanos), max(micros), max(millis), max(secs) FROM t GROUP BY tag ORDER BY tag
1632
+ ----
1633
+ X 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10
1634
+ Y 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10
1614
1635
1615
1636
1616
1637
# aggregate_timestamps_avg
1617
- statement error Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\).
1638
+ statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\)\ .
1618
1639
SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t
1619
1640
1641
+ statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\)\.
1642
+ SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY tag ORDER BY tag;
1643
+
1620
1644
1621
1645
statement ok
1622
1646
drop table t_source;
1623
1647
1624
1648
statement ok
1625
1649
drop table t;
1626
1650
1651
+
1652
+ # All supported Date tpes
1653
+
1654
+ # "date32" --> Date32Array
1655
+ # "date64" --> Date64Array
1656
+ # "names" --> StringArray
1657
+
1658
+ statement ok
1659
+ create table t_source
1660
+ as values
1661
+ ('2018-11-13', 'Row 0', 'X'),
1662
+ ('2011-12-13', 'Row 1', 'X'),
1663
+ (null, 'Row 2', 'Y'),
1664
+ ('2021-01-01', 'Row 3', 'Y');
1665
+
1666
+ statement ok
1667
+ create table t as
1668
+ select
1669
+ arrow_cast(column1, 'Date32') as date32,
1670
+ -- Workaround https://github.com/apache/arrow-rs/issues/4512 is fixed, can use this
1671
+ -- arrow_cast(column1, 'Date64') as date64,
1672
+ arrow_cast(arrow_cast(column1, 'Date32'), 'Date64') as date64,
1673
+ column2 as names,
1674
+ column3 as tag
1675
+ from t_source;
1676
+
1677
+ # Demonstate the contents
1678
+ query DDTT
1679
+ select * from t;
1680
+ ----
1681
+ 2018-11-13 2018-11-13T00:00:00 Row 0 X
1682
+ 2011-12-13 2011-12-13T00:00:00 Row 1 X
1683
+ NULL NULL Row 2 Y
1684
+ 2021-01-01 2021-01-01T00:00:00 Row 3 Y
1685
+
1686
+
1687
+ # aggregate_timestamps_sum
1688
+ statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Date32\.
1689
+ SELECT sum(date32), sum(date64) FROM t;
1690
+
1691
+ statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Date32\.
1692
+ SELECT tag, sum(date32), sum(date64) FROM t GROUP BY tag ORDER BY tag;
1693
+
1694
+ # aggregate_timestamps_count
1695
+ query II
1696
+ SELECT count(date32), count(date64) FROM t;
1697
+ ----
1698
+ 3 3
1699
+
1700
+ query TII
1701
+ SELECT tag, count(date32), count(date64) FROM t GROUP BY tag ORDER BY tag;
1702
+ ----
1703
+ X 2 2
1704
+ Y 1 1
1705
+
1706
+ # aggregate_timestamps_min
1707
+ query DD
1708
+ SELECT min(date32), min(date64) FROM t;
1709
+ ----
1710
+ 2011-12-13 2011-12-13T00:00:00
1711
+
1712
+ query TDD
1713
+ SELECT tag, min(date32), min(date64) FROM t GROUP BY tag ORDER BY tag;
1714
+ ----
1715
+ X 2011-12-13 2011-12-13T00:00:00
1716
+ Y 2021-01-01 2021-01-01T00:00:00
1717
+
1718
+ # aggregate_timestamps_max
1719
+ query DD
1720
+ SELECT max(date32), max(date64) FROM t;
1721
+ ----
1722
+ 2021-01-01 2021-01-01T00:00:00
1723
+
1724
+ query TDD
1725
+ SELECT tag, max(date32), max(date64) FROM t GROUP BY tag ORDER BY tag
1726
+ ----
1727
+ X 2018-11-13 2018-11-13T00:00:00
1728
+ Y 2021-01-01 2021-01-01T00:00:00
1729
+
1730
+
1731
+ # aggregate_timestamps_avg
1732
+ statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Date32\.
1733
+ SELECT avg(date32), avg(date64) FROM t
1734
+
1735
+ statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Date32\.
1736
+ SELECT tag, avg(date32), avg(date64) FROM t GROUP BY tag ORDER BY tag;
1737
+
1738
+
1739
+ statement ok
1740
+ drop table t_source;
1741
+
1742
+ statement ok
1743
+ drop table t;
1744
+
1745
+
1627
1746
# All supported time types
1628
1747
1629
1748
# Columns are named:
@@ -1636,10 +1755,10 @@ drop table t;
1636
1755
statement ok
1637
1756
create table t_source
1638
1757
as values
1639
- ('18:06:30.243620451', 'Row 0'),
1640
- ('20:08:28.161121654', 'Row 1'),
1641
- ('19:11:04.156423842', 'Row 2'),
1642
- ('21:06:28.247821084', 'Row 3');
1758
+ ('18:06:30.243620451', 'Row 0', 'A' ),
1759
+ ('20:08:28.161121654', 'Row 1', 'A' ),
1760
+ ('19:11:04.156423842', 'Row 2', 'B' ),
1761
+ ('21:06:28.247821084', 'Row 3', 'B' );
1643
1762
1644
1763
1645
1764
statement ok
@@ -1649,46 +1768,71 @@ select
1649
1768
arrow_cast(column1, 'Time64(Microsecond)') as micros,
1650
1769
arrow_cast(column1, 'Time32(Millisecond)') as millis,
1651
1770
arrow_cast(column1, 'Time32(Second)') as secs,
1652
- column2 as names
1771
+ column2 as names,
1772
+ column3 as tag
1653
1773
from t_source;
1654
1774
1655
1775
# Demonstate the contents
1656
- query DDDDT
1776
+ query DDDDTT
1657
1777
select * from t;
1658
1778
----
1659
- 18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30 Row 0
1660
- 20:08:28.161121654 20:08:28.161121 20:08:28.161 20:08:28 Row 1
1661
- 19:11:04.156423842 19:11:04.156423 19:11:04.156 19:11:04 Row 2
1662
- 21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 Row 3
1779
+ 18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30 Row 0 A
1780
+ 20:08:28.161121654 20:08:28.161121 20:08:28.161 20:08:28 Row 1 A
1781
+ 19:11:04.156423842 19:11:04.156423 19:11:04.156 19:11:04 Row 2 B
1782
+ 21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 Row 3 B
1663
1783
1664
1784
# aggregate_times_sum
1665
1785
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Time64\(Nanosecond\).
1666
1786
SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t
1667
1787
1788
+ statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Time64\(Nanosecond\)\.
1789
+ SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY tag ORDER BY tag
1790
+
1668
1791
# aggregate_times_count
1669
1792
query IIII
1670
1793
SELECT count(nanos), count(micros), count(millis), count(secs) FROM t
1671
1794
----
1672
1795
4 4 4 4
1673
1796
1797
+ query TIIII
1798
+ SELECT tag, count(nanos), count(micros), count(millis), count(secs) FROM t GROUP BY tag ORDER BY tag
1799
+ ----
1800
+ A 2 2 2 2
1801
+ B 2 2 2 2
1802
+
1674
1803
1675
1804
# aggregate_times_min
1676
1805
query DDDD
1677
1806
SELECT min(nanos), min(micros), min(millis), min(secs) FROM t
1678
1807
----
1679
1808
18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30
1680
1809
1810
+ query TDDDD
1811
+ SELECT tag, min(nanos), min(micros), min(millis), min(secs) FROM t GROUP BY tag ORDER BY tag
1812
+ ----
1813
+ A 18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30
1814
+ B 19:11:04.156423842 19:11:04.156423 19:11:04.156 19:11:04
1815
+
1681
1816
# aggregate_times_max
1682
1817
query DDDD
1683
1818
SELECT max(nanos), max(micros), max(millis), max(secs) FROM t
1684
1819
----
1685
1820
21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28
1686
1821
1822
+ query TDDDD
1823
+ SELECT tag, max(nanos), max(micros), max(millis), max(secs) FROM t GROUP BY tag ORDER BY tag
1824
+ ----
1825
+ A 20:08:28.161121654 20:08:28.161121 20:08:28.161 20:08:28
1826
+ B 21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28
1827
+
1687
1828
1688
1829
# aggregate_times_avg
1689
1830
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Time64\(Nanosecond\).
1690
1831
SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t
1691
1832
1833
+ statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Time64\(Nanosecond\)\.
1834
+ SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY tag ORDER BY tag;
1835
+
1692
1836
statement ok
1693
1837
drop table t_source;
1694
1838
@@ -1710,13 +1854,25 @@ select sum(c1), arrow_typeof(sum(c1)) from d_table;
1710
1854
----
1711
1855
100 Decimal128(20, 3)
1712
1856
1857
+ query TRT
1858
+ select c2, sum(c1), arrow_typeof(sum(c1)) from d_table GROUP BY c2 ORDER BY c2;
1859
+ ----
1860
+ A 1100.045 Decimal128(20, 3)
1861
+ B -1000.045 Decimal128(20, 3)
1862
+
1713
1863
1714
1864
# aggregate_decimal_avg
1715
1865
query RT
1716
1866
select avg(c1), arrow_typeof(avg(c1)) from d_table
1717
1867
----
1718
1868
5 Decimal128(14, 7)
1719
1869
1870
+ query TRT
1871
+ select c2, avg(c1), arrow_typeof(avg(c1)) from d_table GROUP BY c2 ORDER BY c2
1872
+ ----
1873
+ A 110.0045 Decimal128(14, 7)
1874
+ B -100.0045 Decimal128(14, 7)
1875
+
1720
1876
# Use PostgresSQL dialect
1721
1877
statement ok
1722
1878
set datafusion.sql_parser.dialect = 'Postgres';
0 commit comments