-
-
Notifications
You must be signed in to change notification settings - Fork 262
New function LISTAGG #8689
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
New function LISTAGG #8689
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,144 @@ | ||||||
SQL Language Extension: LISTAGG | ||||||
|
||||||
Function: | ||||||
The current implementation has an aggregate function LIST which concatenates multiple row | ||||||
fields into a blob. The SQL standard has a similar function called LISTAGG. The major | ||||||
difference is that it also supports the ordered concatenation. | ||||||
|
||||||
Authors: | ||||||
Chudaykin Alex <[email protected]> | ||||||
|
||||||
Format: | ||||||
<listagg set function> ::= | ||||||
LISTAGG <left paren> [ <set quantifier> ] <character value expression> <comma> <listagg separator> [ <listagg overflow clause> ] <right paren> <within group specification> | ||||||
|
||||||
<listagg separator> ::= | ||||||
<character string literal> | ||||||
|
||||||
<listagg overflow clause> ::= | ||||||
ON OVERFLOW <overflow behavior> | ||||||
|
||||||
<overflow behavior> ::= | ||||||
ERROR | TRUNCATE [ <listagg truncation filler> ] <listagg count indication> | ||||||
|
||||||
<listagg truncation filler> ::= | ||||||
<character string literal> | ||||||
|
||||||
<listagg count indication> ::= | ||||||
WITH COUNT | WITHOUT COUNT | ||||||
|
||||||
<within group specification> ::= | ||||||
WITHIN GROUP <left paren> ORDER BY <sort specification list> <right paren> | ||||||
|
||||||
Syntax Rules: | ||||||
The legacy LIST syntax is preserved for backward compatibility, LISTAGG is added to cover the | ||||||
standard features. | ||||||
|
||||||
There is a <listagg overflow clause> rule in the standard, which is intended to output an error | ||||||
when the output value overflows. Since the LIST function always returns a BLOB, it was decided | ||||||
that this rule would be meaningless. It was not implemented and silently ignored if specified. | ||||||
|
||||||
If DISTINCT is specified for LISTAGG, then ORDER BY <sort specification list> must fully match | ||||||
<character value expression> | ||||||
|
||||||
Notes: | ||||||
If DISTINCT is specified, the presence of WITHIN GROUP must obey the restriction and will not | ||||||
affect the subsequent code execution. | ||||||
|
||||||
Examples: | ||||||
CREATE TABLE TEST_T | ||||||
(COL1 INT, COL2 VARCHAR(2), COL3 VARCHAR(2), COL4 VARCHAR(2), COL5 BOOLEAN, COL6 VARCHAR(2) | ||||||
CHARACTER SET WIN1251); | ||||||
COMMIT; | ||||||
INSERT INTO TEST_T values(1, 'A', 'A', 'J', false, 'П'); | ||||||
INSERT INTO TEST_T values(2, 'B', 'B', 'I', false, 'Д'); | ||||||
INSERT INTO TEST_T values(3, 'C', 'A', 'L', true, 'Ж'); | ||||||
INSERT INTO TEST_T values(4, 'D', 'B', 'K', true, 'Й'); | ||||||
COMMIT; | ||||||
|
||||||
SELECT LISTAGG (ALL COL4, ':') AS FROM TEST_T; | ||||||
======= | ||||||
J:I:L:K | ||||||
|
||||||
SELECT LISTAGG (DISTINCT COL4, ':') FROM TEST_T; | ||||||
======== | ||||||
I:J:K:L | ||||||
|
||||||
SELECT LISTAGG (DISTINCT COL3, ':') FROM TEST_T; | ||||||
==== | ||||||
A:B | ||||||
|
||||||
SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL2) FROM TEST_T; | ||||||
==== | ||||||
A:B | ||||||
|
||||||
SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL2 DESCENDING) FROM TEST_T; | ||||||
==== | ||||||
A:B | ||||||
|
||||||
SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL2 DESCENDING) FROM TEST_T; | ||||||
======= | ||||||
D:C:B:A | ||||||
|
||||||
SELECT LISTAGG (COL4, ':') WITHIN GROUP (ORDER BY COL3 DESC) FROM TEST_T; | ||||||
======= | ||||||
I:K:J:L | ||||||
|
||||||
SELECT LISTAGG (COL3, ':') WITHIN GROUP (ORDER BY COL5 ASCENDING) FROM TEST_T; | ||||||
======= | ||||||
A:B:A:B | ||||||
|
||||||
SELECT LISTAGG (COL4, ':') WITHIN GROUP (ORDER BY COL3 ASC) FROM TEST_T; | ||||||
======= | ||||||
J:L:I:K | ||||||
|
||||||
SELECT LISTAGG (ALL COL2) WITHIN GROUP (ORDER BY COL4) FROM TEST_T; | ||||||
======= | ||||||
B,A,D,C | ||||||
|
||||||
SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL3 DESC, COL4 ASC) FROM TEST_T; | ||||||
======= | ||||||
B:D:A:C | ||||||
|
||||||
SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL3 DESC, COL4 DESC) FROM TEST_T; | ||||||
======= | ||||||
D:B:C:A | ||||||
|
||||||
SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL3 ASC, COL4 DESC) FROM TEST_T; | ||||||
======= | ||||||
C:A:D:B | ||||||
|
||||||
SELECT LISTAGG (ALL COL6, ':')FROM TEST_T; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
======= | ||||||
П:Д:Ж:Й | ||||||
|
||||||
SELECT LISTAGG (ALL COL6, ':') WITHIN GROUP (ORDER BY COL2 DESC) FROM TEST_T; | ||||||
======= | ||||||
Й:Ж:Д:П | ||||||
|
||||||
SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL6) FROM TEST_T; | ||||||
======= | ||||||
B:C:D:A | ||||||
|
||||||
INSERT INTO TEST_T values(5, 'E', NULL, NULL, NULL, NULL); | ||||||
INSERT INTO TEST_T values(6, 'F', 'C', 'N', true, 'К'); | ||||||
|
||||||
SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL3) FROM TEST_T; | ||||||
=========== | ||||||
E:A:C:B:D:F | ||||||
|
||||||
SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL3 NULLS LAST) FROM TEST_T; | ||||||
=========== | ||||||
A:C:B:D:F:E | ||||||
|
||||||
SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL6 NULLS FIRST) FROM TEST_T; | ||||||
=========== | ||||||
E:B:C:D:F:A | ||||||
|
||||||
SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL2) FROM TEST_T; | ||||||
======== | ||||||
Statement failed, SQLSTATE = 42000 | ||||||
SQL error code = -104 | ||||||
-Invalid command | ||||||
-Sort-key of the ORDER BY specification must match the argument list | ||||||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -124,6 +124,7 @@ string AggNode::internalPrint(NodePrinter& printer) const | |||||||||||||||||
NODE_PRINT(printer, dialect1); | ||||||||||||||||||
NODE_PRINT(printer, arg); | ||||||||||||||||||
NODE_PRINT(printer, asb); | ||||||||||||||||||
NODE_PRINT(printer, sort); | ||||||||||||||||||
NODE_PRINT(printer, indexed); | ||||||||||||||||||
|
||||||||||||||||||
return aggInfo.name; | ||||||||||||||||||
|
@@ -307,7 +308,7 @@ bool AggNode::dsqlMatch(DsqlCompilerScratch* dsqlScratch, const ExprNode* other, | |||||||||||||||||
// ASF: We compare name address. That should be ok, as we have only one AggInfo instance | ||||||||||||||||||
// per function. | ||||||||||||||||||
return aggInfo.blr == o->aggInfo.blr && aggInfo.name == o->aggInfo.name && | ||||||||||||||||||
distinct == o->distinct && dialect1 == o->dialect1; | ||||||||||||||||||
distinct == o->distinct && dialect1 == o->dialect1 && sort == o->sort;; | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does not look correct the comparation of pointer address here for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ChudaykinAlex IMO, the sort node should be added to the list by |
||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
void AggNode::setParameterName(dsql_par* parameter) const | ||||||||||||||||||
|
@@ -352,6 +353,8 @@ AggNode* AggNode::pass2(thread_db* tdbb, CompilerScratch* csb) | |||||||||||||||||
dsc desc; | ||||||||||||||||||
getDesc(tdbb, csb, &desc); | ||||||||||||||||||
impureOffset = csb->allocImpure<impure_value_ex>(); | ||||||||||||||||||
if (sort) | ||||||||||||||||||
doPass2(tdbb, csb, sort.getAddress()); | ||||||||||||||||||
|
||||||||||||||||||
return this; | ||||||||||||||||||
} | ||||||||||||||||||
|
@@ -361,7 +364,7 @@ void AggNode::aggInit(thread_db* tdbb, Request* request) const | |||||||||||||||||
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset); | ||||||||||||||||||
impure->vlux_count = 0; | ||||||||||||||||||
|
||||||||||||||||||
if (distinct) | ||||||||||||||||||
if (distinct || sort) | ||||||||||||||||||
{ | ||||||||||||||||||
// Initialize a sort to reject duplicate values. | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -373,8 +376,8 @@ void AggNode::aggInit(thread_db* tdbb, Request* request) const | |||||||||||||||||
|
||||||||||||||||||
asbImpure->iasb_sort = FB_NEW_POOL(request->req_sorts.getPool()) Sort( | ||||||||||||||||||
tdbb->getDatabase(), &request->req_sorts, asb->length, | ||||||||||||||||||
asb->keyItems.getCount(), 1, asb->keyItems.begin(), | ||||||||||||||||||
RecordSource::rejectDuplicate, 0); | ||||||||||||||||||
asb->keyItems.getCount(), (distinct ? 1 : asb->keyItems.getCount()), | ||||||||||||||||||
asb->keyItems.begin(), (distinct ? RecordSource::rejectDuplicate : nullptr), 0); | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -427,6 +430,44 @@ bool AggNode::aggPass(thread_db* tdbb, Request* request) const | |||||||||||||||||
ULONG* const pDummy = reinterpret_cast<ULONG*>(data + asb->length - sizeof(ULONG)); | ||||||||||||||||||
*pDummy = asbImpure->iasb_dummy++; | ||||||||||||||||||
|
||||||||||||||||||
return true; | ||||||||||||||||||
} | ||||||||||||||||||
else if (sort) | ||||||||||||||||||
{ | ||||||||||||||||||
fb_assert(asb); | ||||||||||||||||||
// "Put" the value to sort. | ||||||||||||||||||
impure_agg_sort* asbImpure = request->getImpure<impure_agg_sort>(asb->impure); | ||||||||||||||||||
UCHAR* data; | ||||||||||||||||||
asbImpure->iasb_sort->put(tdbb, reinterpret_cast<ULONG**>(&data)); | ||||||||||||||||||
|
||||||||||||||||||
MOVE_CLEAR(data, asb->length); | ||||||||||||||||||
|
||||||||||||||||||
auto descOrder = asb->descOrder.begin(); | ||||||||||||||||||
auto keyItem = asb->keyItems.begin(); | ||||||||||||||||||
|
||||||||||||||||||
for (auto& nodeOrder : sort->expressions) | ||||||||||||||||||
{ | ||||||||||||||||||
dsc toDesc = *(descOrder++); | ||||||||||||||||||
toDesc.dsc_address = data + (IPTR)toDesc.dsc_address; | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
if (const auto fromDsc = EVL_expr(tdbb, request, nodeOrder)) | ||||||||||||||||||
{ | ||||||||||||||||||
if (IS_INTL_DATA(fromDsc)) | ||||||||||||||||||
INTL_string_to_key(tdbb, INTL_TEXT_TO_INDEX(fromDsc->getTextType()), | ||||||||||||||||||
fromDsc, &toDesc, INTL_KEY_UNIQUE); | ||||||||||||||||||
Comment on lines
+454
to
+456
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
else | ||||||||||||||||||
MOV_move(tdbb, fromDsc, &toDesc); | ||||||||||||||||||
} | ||||||||||||||||||
else | ||||||||||||||||||
*(data + keyItem->getSkdOffset()) = TRUE; | ||||||||||||||||||
|
||||||||||||||||||
// The first key for NULLS FIRST/LAST, the second key for the sorter | ||||||||||||||||||
keyItem += 2; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
dsc toDesc = asb->desc; | ||||||||||||||||||
toDesc.dsc_address = data + (IPTR)toDesc.dsc_address; | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
MOV_move(tdbb, desc, &toDesc); | ||||||||||||||||||
|
||||||||||||||||||
return true; | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
@@ -455,7 +496,7 @@ dsc* AggNode::execute(thread_db* tdbb, Request* request) const | |||||||||||||||||
impure->vlu_blob = NULL; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
if (distinct) | ||||||||||||||||||
if (distinct || sort) | ||||||||||||||||||
{ | ||||||||||||||||||
impure_agg_sort* asbImpure = request->getImpure<impure_agg_sort>(asb->impure); | ||||||||||||||||||
dsc desc = asb->desc; | ||||||||||||||||||
|
@@ -478,7 +519,10 @@ dsc* AggNode::execute(thread_db* tdbb, Request* request) const | |||||||||||||||||
break; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
desc.dsc_address = data + (asb->intl ? asb->keyItems[1].getSkdOffset() : 0); | ||||||||||||||||||
if (distinct) | ||||||||||||||||||
desc.dsc_address = data + (asb->intl ? asb->keyItems[1].getSkdOffset() : 0); | ||||||||||||||||||
else | ||||||||||||||||||
desc.dsc_address = data + (IPTR)asb->desc.dsc_address; | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
|
||||||||||||||||||
aggPass(tdbb, request, &desc); | ||||||||||||||||||
} | ||||||||||||||||||
|
@@ -877,18 +921,19 @@ AggNode* AvgAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/ | |||||||||||||||||
static AggNode::Register<ListAggNode> listAggInfo("LIST", blr_agg_list, blr_agg_list_distinct); | ||||||||||||||||||
|
||||||||||||||||||
ListAggNode::ListAggNode(MemoryPool& pool, bool aDistinct, ValueExprNode* aArg, | ||||||||||||||||||
ValueExprNode* aDelimiter) | ||||||||||||||||||
ValueExprNode* aDelimiter, ValueListNode* aOrderClause) | ||||||||||||||||||
: AggNode(pool, listAggInfo, aDistinct, false, aArg), | ||||||||||||||||||
delimiter(aDelimiter) | ||||||||||||||||||
delimiter(aDelimiter), | ||||||||||||||||||
dsqlOrderClause(aOrderClause) | ||||||||||||||||||
{ | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
DmlNode* ListAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp) | ||||||||||||||||||
{ | ||||||||||||||||||
ListAggNode* node = FB_NEW_POOL(pool) ListAggNode(pool, | ||||||||||||||||||
(blrOp == blr_agg_list_distinct)); | ||||||||||||||||||
ListAggNode* node = FB_NEW_POOL(pool) ListAggNode(pool, (blrOp == blr_agg_list_distinct)); | ||||||||||||||||||
node->arg = PAR_parse_value(tdbb, csb); | ||||||||||||||||||
node->delimiter = PAR_parse_value(tdbb, csb); | ||||||||||||||||||
node->sort = PAR_sort(tdbb, csb, blr_sort, true); | ||||||||||||||||||
return node; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -899,6 +944,26 @@ void ListAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc) | |||||||||||||||||
desc->setNullable(true); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
void ListAggNode::genBlr(DsqlCompilerScratch* dsqlScratch) | ||||||||||||||||||
{ | ||||||||||||||||||
AggNode::genBlr(dsqlScratch); | ||||||||||||||||||
GEN_sort(dsqlScratch, blr_sort, dsqlOrderClause); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
AggNode* ListAggNode::pass1(thread_db* tdbb, CompilerScratch* csb) | ||||||||||||||||||
{ | ||||||||||||||||||
if (sort && distinct) | ||||||||||||||||||
{ | ||||||||||||||||||
ValueExprNode* const sortNode = *sort->expressions.begin(); | ||||||||||||||||||
if (!arg->sameAs(sortNode, false) || sort->expressions.getCount() > 1) | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should they be identical? Why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. Currently, we sort only once and this is a good bonus. If we follow your suggestion and allow slightly different expressions, then we should either sort twice or ignore the user-specified ordering after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why direction and null placement are important for DISTINCT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nulls placement is not important, I agree, as NULLs are skipped by all aggregate functions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By standard, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It shouldn't but I don't quite understand why you said
BTW, IMHO, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, sorry, my bad. Surely, direction for the combined sort should be taken from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
With my suggestion to use same existing rules, the DISTINCT LISTAGG expression may be something like |
||||||||||||||||||
{ | ||||||||||||||||||
ERR_post(Arg::Gds(isc_sqlerr) << Arg::Num(-104) << Arg::Gds(isc_dsql_command_err) | ||||||||||||||||||
<< Arg::Gds(isc_distinct_order_by_err)); | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
return AggNode::pass1(tdbb, csb); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
bool ListAggNode::setParameterType(DsqlCompilerScratch* dsqlScratch, | ||||||||||||||||||
std::function<void (dsc*)> makeDesc, bool forceVarChar) | ||||||||||||||||||
{ | ||||||||||||||||||
|
@@ -920,6 +985,7 @@ ValueExprNode* ListAggNode::copy(thread_db* tdbb, NodeCopier& copier) const | |||||||||||||||||
node->nodScale = nodScale; | ||||||||||||||||||
node->arg = copier.copy(tdbb, arg); | ||||||||||||||||||
node->delimiter = copier.copy(tdbb, delimiter); | ||||||||||||||||||
node->sort = sort->copy(tdbb, copier); | ||||||||||||||||||
return node; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -985,7 +1051,7 @@ dsc* ListAggNode::aggExecute(thread_db* tdbb, Request* request) const | |||||||||||||||||
{ | ||||||||||||||||||
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset); | ||||||||||||||||||
|
||||||||||||||||||
if (distinct) | ||||||||||||||||||
if (distinct || sort) | ||||||||||||||||||
{ | ||||||||||||||||||
if (impure->vlu_blob) | ||||||||||||||||||
{ | ||||||||||||||||||
|
@@ -1005,7 +1071,8 @@ AggNode* ListAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/ | |||||||||||||||||
thread_db* tdbb = JRD_get_thread_data(); | ||||||||||||||||||
|
||||||||||||||||||
AggNode* node = FB_NEW_POOL(dsqlScratch->getPool()) ListAggNode(dsqlScratch->getPool(), distinct, | ||||||||||||||||||
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, delimiter)); | ||||||||||||||||||
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, delimiter), | ||||||||||||||||||
doDsqlPass(dsqlScratch, dsqlOrderClause)); | ||||||||||||||||||
|
||||||||||||||||||
dsc argDesc; | ||||||||||||||||||
node->arg->make(dsqlScratch, &argDesc); | ||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the syntax format section,
<within group specification>
is mandatory but does not exist in some examples.