Skip to content

Commit 44e3876

Browse files
authored
Add additional test coverage for aggregaes using dates/times/timestamps/decimals (#6939)
* Add additional test coverage for aggregaes using dates/times/timestamps/decimals * Add coverage for date32/date64
1 parent ad3b8f6 commit 44e3876

File tree

1 file changed

+194
-38
lines changed

1 file changed

+194
-38
lines changed

datafusion/core/tests/sqllogictests/test_files/aggregate.slt

+194-38
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,10 @@ WITH HEADER ROW
3939
LOCATION '../../testing/data/csv/aggregate_test_100.csv'
4040

4141
statement ok
42-
CREATE TABLE d_table (c1 decimal(10,3)) as values
43-
(110.000), (110.001), (110.002), (110.003), (110.004), (110.005), (110.006), (110.007), (110.008), (110.009),
44-
(-100.000),(-100.001),(-100.002),(-100.003),(-100.004),(-100.005),(-100.006),(-100.007),(-100.008),(-100.009)
42+
CREATE TABLE d_table (c1 decimal(10,3), c2 varchar)
43+
as values
44+
(110.000, 'A'), (110.001, 'A'), (110.002, 'A'), (110.003, 'A'), (110.004, 'A'), (110.005, 'A'), (110.006, 'A'), (110.007, 'A'), (110.008, 'A'), (110.009, 'A'),
45+
(-100.000, 'B'),(-100.001, 'B'),(-100.002, 'B'),(-100.003, 'B'),(-100.004, 'B'),(-100.005, 'B'),(-100.006, 'B'),(-100.007, 'B'),(-100.008, 'B'),(-100.009, 'B')
4546

4647
statement ok
4748
CREATE TABLE median_table (
@@ -448,7 +449,7 @@ drop table cpu;
448449

449450
# this test is to show create table as and select into works in the same way
450451
statement ok
451-
SELECT * INTO cpu
452+
SELECT * INTO cpu
452453
FROM (VALUES
453454
('host0', 90.1),
454455
('host1', 90.2),
@@ -1483,22 +1484,6 @@ NULL 2
14831484
statement ok
14841485
drop table the_nulls;
14851486

1486-
# All supported timestamp types
1487-
1488-
# "nanos" --> TimestampNanosecondArray
1489-
# "micros" --> TimestampMicrosecondArray
1490-
# "millis" --> TimestampMillisecondArray
1491-
# "secs" --> TimestampSecondArray
1492-
# "names" --> StringArray
1493-
1494-
statement ok
1495-
create table t_source
1496-
as values
1497-
('2018-11-13T17:11:10.011375885995', 'Row 0'),
1498-
('2011-12-13T11:13:10.12345', 'Row 1'),
1499-
(null, 'Row 2'),
1500-
('2021-01-01T05:11:10.432', 'Row 3');
1501-
15021487
statement ok
15031488
create table bit_aggregate_functions (
15041489
c1 SMALLINT NOT NULL,
@@ -1568,62 +1553,196 @@ SELECT bool_or(distinct c1), bool_or(distinct c2), bool_or(distinct c3), bool_or
15681553
----
15691554
true true true false true true false NULL
15701555

1556+
# All supported timestamp types
1557+
1558+
# "nanos" --> TimestampNanosecondArray
1559+
# "micros" --> TimestampMicrosecondArray
1560+
# "millis" --> TimestampMillisecondArray
1561+
# "secs" --> TimestampSecondArray
1562+
# "names" --> StringArray
1563+
1564+
statement ok
1565+
create table t_source
1566+
as values
1567+
('2018-11-13T17:11:10.011375885995', 'Row 0', 'X'),
1568+
('2011-12-13T11:13:10.12345', 'Row 1', 'X'),
1569+
(null, 'Row 2', 'Y'),
1570+
('2021-01-01T05:11:10.432', 'Row 3', 'Y');
1571+
15711572
statement ok
15721573
create table t as
15731574
select
15741575
arrow_cast(column1, 'Timestamp(Nanosecond, None)') as nanos,
15751576
arrow_cast(column1, 'Timestamp(Microsecond, None)') as micros,
15761577
arrow_cast(column1, 'Timestamp(Millisecond, None)') as millis,
15771578
arrow_cast(column1, 'Timestamp(Second, None)') as secs,
1578-
column2 as names
1579+
column2 as names,
1580+
column3 as tag
15791581
from t_source;
15801582

15811583
# Demonstate the contents
1582-
query PPPPT
1584+
query PPPPTT
15831585
select * from t;
15841586
----
1585-
2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0
1586-
2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10 Row 1
1587-
NULL NULL NULL NULL Row 2
1588-
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3
1587+
2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 X
1588+
2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10 Row 1 X
1589+
NULL NULL NULL NULL Row 2 Y
1590+
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3 Y
15891591

15901592

15911593
# aggregate_timestamps_sum
1592-
statement error Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)
1594+
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)\.
15931595
SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t;
15941596

1597+
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Timestamp\(Nanosecond, None\)\.
1598+
SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY tag ORDER BY tag;
1599+
15951600
# aggregate_timestamps_count
15961601
query IIII
15971602
SELECT count(nanos), count(micros), count(millis), count(secs) FROM t;
15981603
----
15991604
3 3 3 3
16001605

1606+
query TIIII
1607+
SELECT tag, count(nanos), count(micros), count(millis), count(secs) FROM t GROUP BY tag ORDER BY tag;
1608+
----
1609+
X 2 2 2 2
1610+
Y 1 1 1 1
16011611

16021612
# aggregate_timestamps_min
16031613
query PPPP
16041614
SELECT min(nanos), min(micros), min(millis), min(secs) FROM t;
16051615
----
16061616
2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10
16071617

1618+
query TPPPP
1619+
SELECT tag, min(nanos), min(micros), min(millis), min(secs) FROM t GROUP BY tag ORDER BY tag;
1620+
----
1621+
X 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123450 2011-12-13T11:13:10.123 2011-12-13T11:13:10
1622+
Y 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10
1623+
16081624
# aggregate_timestamps_max
16091625
query PPPP
16101626
SELECT max(nanos), max(micros), max(millis), max(secs) FROM t;
16111627
----
16121628
2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10
16131629

1630+
query TPPPP
1631+
SELECT tag, max(nanos), max(micros), max(millis), max(secs) FROM t GROUP BY tag ORDER BY tag
1632+
----
1633+
X 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10
1634+
Y 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10
16141635

16151636

16161637
# aggregate_timestamps_avg
1617-
statement error Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\).
1638+
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\)\.
16181639
SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t
16191640

1641+
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Timestamp\(Nanosecond, None\)\.
1642+
SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY tag ORDER BY tag;
1643+
16201644

16211645
statement ok
16221646
drop table t_source;
16231647

16241648
statement ok
16251649
drop table t;
16261650

1651+
1652+
# All supported Date tpes
1653+
1654+
# "date32" --> Date32Array
1655+
# "date64" --> Date64Array
1656+
# "names" --> StringArray
1657+
1658+
statement ok
1659+
create table t_source
1660+
as values
1661+
('2018-11-13', 'Row 0', 'X'),
1662+
('2011-12-13', 'Row 1', 'X'),
1663+
(null, 'Row 2', 'Y'),
1664+
('2021-01-01', 'Row 3', 'Y');
1665+
1666+
statement ok
1667+
create table t as
1668+
select
1669+
arrow_cast(column1, 'Date32') as date32,
1670+
-- Workaround https://github.com/apache/arrow-rs/issues/4512 is fixed, can use this
1671+
-- arrow_cast(column1, 'Date64') as date64,
1672+
arrow_cast(arrow_cast(column1, 'Date32'), 'Date64') as date64,
1673+
column2 as names,
1674+
column3 as tag
1675+
from t_source;
1676+
1677+
# Demonstate the contents
1678+
query DDTT
1679+
select * from t;
1680+
----
1681+
2018-11-13 2018-11-13T00:00:00 Row 0 X
1682+
2011-12-13 2011-12-13T00:00:00 Row 1 X
1683+
NULL NULL Row 2 Y
1684+
2021-01-01 2021-01-01T00:00:00 Row 3 Y
1685+
1686+
1687+
# aggregate_timestamps_sum
1688+
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Date32\.
1689+
SELECT sum(date32), sum(date64) FROM t;
1690+
1691+
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Date32\.
1692+
SELECT tag, sum(date32), sum(date64) FROM t GROUP BY tag ORDER BY tag;
1693+
1694+
# aggregate_timestamps_count
1695+
query II
1696+
SELECT count(date32), count(date64) FROM t;
1697+
----
1698+
3 3
1699+
1700+
query TII
1701+
SELECT tag, count(date32), count(date64) FROM t GROUP BY tag ORDER BY tag;
1702+
----
1703+
X 2 2
1704+
Y 1 1
1705+
1706+
# aggregate_timestamps_min
1707+
query DD
1708+
SELECT min(date32), min(date64) FROM t;
1709+
----
1710+
2011-12-13 2011-12-13T00:00:00
1711+
1712+
query TDD
1713+
SELECT tag, min(date32), min(date64) FROM t GROUP BY tag ORDER BY tag;
1714+
----
1715+
X 2011-12-13 2011-12-13T00:00:00
1716+
Y 2021-01-01 2021-01-01T00:00:00
1717+
1718+
# aggregate_timestamps_max
1719+
query DD
1720+
SELECT max(date32), max(date64) FROM t;
1721+
----
1722+
2021-01-01 2021-01-01T00:00:00
1723+
1724+
query TDD
1725+
SELECT tag, max(date32), max(date64) FROM t GROUP BY tag ORDER BY tag
1726+
----
1727+
X 2018-11-13 2018-11-13T00:00:00
1728+
Y 2021-01-01 2021-01-01T00:00:00
1729+
1730+
1731+
# aggregate_timestamps_avg
1732+
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Date32\.
1733+
SELECT avg(date32), avg(date64) FROM t
1734+
1735+
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Date32\.
1736+
SELECT tag, avg(date32), avg(date64) FROM t GROUP BY tag ORDER BY tag;
1737+
1738+
1739+
statement ok
1740+
drop table t_source;
1741+
1742+
statement ok
1743+
drop table t;
1744+
1745+
16271746
# All supported time types
16281747

16291748
# Columns are named:
@@ -1636,10 +1755,10 @@ drop table t;
16361755
statement ok
16371756
create table t_source
16381757
as values
1639-
('18:06:30.243620451', 'Row 0'),
1640-
('20:08:28.161121654', 'Row 1'),
1641-
('19:11:04.156423842', 'Row 2'),
1642-
('21:06:28.247821084', 'Row 3');
1758+
('18:06:30.243620451', 'Row 0', 'A'),
1759+
('20:08:28.161121654', 'Row 1', 'A'),
1760+
('19:11:04.156423842', 'Row 2', 'B'),
1761+
('21:06:28.247821084', 'Row 3', 'B');
16431762

16441763

16451764
statement ok
@@ -1649,46 +1768,71 @@ select
16491768
arrow_cast(column1, 'Time64(Microsecond)') as micros,
16501769
arrow_cast(column1, 'Time32(Millisecond)') as millis,
16511770
arrow_cast(column1, 'Time32(Second)') as secs,
1652-
column2 as names
1771+
column2 as names,
1772+
column3 as tag
16531773
from t_source;
16541774

16551775
# Demonstate the contents
1656-
query DDDDT
1776+
query DDDDTT
16571777
select * from t;
16581778
----
1659-
18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30 Row 0
1660-
20:08:28.161121654 20:08:28.161121 20:08:28.161 20:08:28 Row 1
1661-
19:11:04.156423842 19:11:04.156423 19:11:04.156 19:11:04 Row 2
1662-
21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 Row 3
1779+
18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30 Row 0 A
1780+
20:08:28.161121654 20:08:28.161121 20:08:28.161 20:08:28 Row 1 A
1781+
19:11:04.156423842 19:11:04.156423 19:11:04.156 19:11:04 Row 2 B
1782+
21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28 Row 3 B
16631783

16641784
# aggregate_times_sum
16651785
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Time64\(Nanosecond\).
16661786
SELECT sum(nanos), sum(micros), sum(millis), sum(secs) FROM t
16671787

1788+
statement error DataFusion error: Error during planning: The function Sum does not support inputs of type Time64\(Nanosecond\)\.
1789+
SELECT tag, sum(nanos), sum(micros), sum(millis), sum(secs) FROM t GROUP BY tag ORDER BY tag
1790+
16681791
# aggregate_times_count
16691792
query IIII
16701793
SELECT count(nanos), count(micros), count(millis), count(secs) FROM t
16711794
----
16721795
4 4 4 4
16731796

1797+
query TIIII
1798+
SELECT tag, count(nanos), count(micros), count(millis), count(secs) FROM t GROUP BY tag ORDER BY tag
1799+
----
1800+
A 2 2 2 2
1801+
B 2 2 2 2
1802+
16741803

16751804
# aggregate_times_min
16761805
query DDDD
16771806
SELECT min(nanos), min(micros), min(millis), min(secs) FROM t
16781807
----
16791808
18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30
16801809

1810+
query TDDDD
1811+
SELECT tag, min(nanos), min(micros), min(millis), min(secs) FROM t GROUP BY tag ORDER BY tag
1812+
----
1813+
A 18:06:30.243620451 18:06:30.243620 18:06:30.243 18:06:30
1814+
B 19:11:04.156423842 19:11:04.156423 19:11:04.156 19:11:04
1815+
16811816
# aggregate_times_max
16821817
query DDDD
16831818
SELECT max(nanos), max(micros), max(millis), max(secs) FROM t
16841819
----
16851820
21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28
16861821

1822+
query TDDDD
1823+
SELECT tag, max(nanos), max(micros), max(millis), max(secs) FROM t GROUP BY tag ORDER BY tag
1824+
----
1825+
A 20:08:28.161121654 20:08:28.161121 20:08:28.161 20:08:28
1826+
B 21:06:28.247821084 21:06:28.247821 21:06:28.247 21:06:28
1827+
16871828

16881829
# aggregate_times_avg
16891830
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Time64\(Nanosecond\).
16901831
SELECT avg(nanos), avg(micros), avg(millis), avg(secs) FROM t
16911832

1833+
statement error DataFusion error: Error during planning: The function Avg does not support inputs of type Time64\(Nanosecond\)\.
1834+
SELECT tag, avg(nanos), avg(micros), avg(millis), avg(secs) FROM t GROUP BY tag ORDER BY tag;
1835+
16921836
statement ok
16931837
drop table t_source;
16941838

@@ -1710,13 +1854,25 @@ select sum(c1), arrow_typeof(sum(c1)) from d_table;
17101854
----
17111855
100 Decimal128(20, 3)
17121856

1857+
query TRT
1858+
select c2, sum(c1), arrow_typeof(sum(c1)) from d_table GROUP BY c2 ORDER BY c2;
1859+
----
1860+
A 1100.045 Decimal128(20, 3)
1861+
B -1000.045 Decimal128(20, 3)
1862+
17131863

17141864
# aggregate_decimal_avg
17151865
query RT
17161866
select avg(c1), arrow_typeof(avg(c1)) from d_table
17171867
----
17181868
5 Decimal128(14, 7)
17191869

1870+
query TRT
1871+
select c2, avg(c1), arrow_typeof(avg(c1)) from d_table GROUP BY c2 ORDER BY c2
1872+
----
1873+
A 110.0045 Decimal128(14, 7)
1874+
B -100.0045 Decimal128(14, 7)
1875+
17201876
# Use PostgresSQL dialect
17211877
statement ok
17221878
set datafusion.sql_parser.dialect = 'Postgres';

0 commit comments

Comments
 (0)