Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions doc/sql.extensions/README.listagg
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
SQL Language Extension: LISTAGG

Function:
The current implementation has an aggregate function LIST which concatenates multiple row
fields into a blob. The SQL standard has a similar function called LISTAGG. The major
difference is that it also supports the ordered concatenation.

Authors:
Chudaykin Alex <[email protected]>

Format:
<listagg set function> ::=
LISTAGG <left paren> [ <set quantifier> ] <character value expression> <comma> <listagg separator> [ <listagg overflow clause> ] <right paren> <within group specification>

<listagg separator> ::=
<character string literal>

<listagg overflow clause> ::=
ON OVERFLOW <overflow behavior>

<overflow behavior> ::=
ERROR | TRUNCATE [ <listagg truncation filler> ] <listagg count indication>

<listagg truncation filler> ::=
<character string literal>

<listagg count indication> ::=
WITH COUNT | WITHOUT COUNT

<within group specification> ::=
WITHIN GROUP <left paren> ORDER BY <sort specification list> <right paren>

Syntax Rules:
The legacy LIST syntax is preserved for backward compatibility, LISTAGG is added to cover the
standard features.

There is a <listagg overflow clause> rule in the standard, which is intended to output an error
when the output value overflows. Since the LIST function always returns a BLOB, it was decided
that this rule would be meaningless. It was not implemented and silently ignored if specified.

If DISTINCT is specified for LISTAGG, then ORDER BY <sort specification list> must fully match
<character value expression>

Notes:
If DISTINCT is specified, the presence of WITHIN GROUP must obey the restriction and will not
affect the subsequent code execution.

Examples:
CREATE TABLE TEST_T
(COL1 INT, COL2 VARCHAR(2), COL3 VARCHAR(2), COL4 VARCHAR(2), COL5 BOOLEAN, COL6 VARCHAR(2)
CHARACTER SET WIN1251);
COMMIT;
INSERT INTO TEST_T values(1, 'A', 'A', 'J', false, 'П');
INSERT INTO TEST_T values(2, 'B', 'B', 'I', false, 'Д');
INSERT INTO TEST_T values(3, 'C', 'A', 'L', true, 'Ж');
INSERT INTO TEST_T values(4, 'D', 'B', 'K', true, 'Й');
COMMIT;

SELECT LISTAGG (ALL COL4, ':') AS FROM TEST_T;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the syntax format section, <within group specification> is mandatory but does not exist in some examples.

=======
J:I:L:K

SELECT LISTAGG (DISTINCT COL4, ':') FROM TEST_T;
========
I:J:K:L

SELECT LISTAGG (DISTINCT COL3, ':') FROM TEST_T;
====
A:B

SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL2) FROM TEST_T;
====
A:B

SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL2 DESCENDING) FROM TEST_T;
====
A:B

SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL2 DESCENDING) FROM TEST_T;
=======
D:C:B:A

SELECT LISTAGG (COL4, ':') WITHIN GROUP (ORDER BY COL3 DESC) FROM TEST_T;
=======
I:K:J:L

SELECT LISTAGG (COL3, ':') WITHIN GROUP (ORDER BY COL5 ASCENDING) FROM TEST_T;
=======
A:B:A:B

SELECT LISTAGG (COL4, ':') WITHIN GROUP (ORDER BY COL3 ASC) FROM TEST_T;
=======
J:L:I:K

SELECT LISTAGG (ALL COL2) WITHIN GROUP (ORDER BY COL4) FROM TEST_T;
=======
B,A,D,C

SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL3 DESC, COL4 ASC) FROM TEST_T;
=======
B:D:A:C

SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL3 DESC, COL4 DESC) FROM TEST_T;
=======
D:B:C:A

SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL3 ASC, COL4 DESC) FROM TEST_T;
=======
C:A:D:B

SELECT LISTAGG (ALL COL6, ':')FROM TEST_T;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
SELECT LISTAGG (ALL COL6, ':')FROM TEST_T;
SELECT LISTAGG (ALL COL6, ':') FROM TEST_T;

=======
П:Д:Ж:Й

SELECT LISTAGG (ALL COL6, ':') WITHIN GROUP (ORDER BY COL2 DESC) FROM TEST_T;
=======
Й:Ж:Д:П

SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL6) FROM TEST_T;
=======
B:C:D:A

INSERT INTO TEST_T values(5, 'E', NULL, NULL, NULL, NULL);
INSERT INTO TEST_T values(6, 'F', 'C', 'N', true, 'К');

SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL3) FROM TEST_T;
===========
E:A:C:B:D:F

SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL3 NULLS LAST) FROM TEST_T;
===========
A:C:B:D:F:E

SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL6 NULLS FIRST) FROM TEST_T;
===========
E:B:C:D:F:A

SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL2) FROM TEST_T;
========
Statement failed, SQLSTATE = 42000
SQL error code = -104
-Invalid command
-Sort-key of the ORDER BY specification must match the argument list

2 changes: 2 additions & 0 deletions src/common/ParserTokens.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ PARSER_TOKEN(TOK_LIKE, "LIKE", false)
PARSER_TOKEN(TOK_LIMBO, "LIMBO", true)
PARSER_TOKEN(TOK_LINGER, "LINGER", true)
PARSER_TOKEN(TOK_LIST, "LIST", true)
PARSER_TOKEN(TOK_LISTAGG, "LISTAGG", true)
PARSER_TOKEN(TOK_LN, "LN", true)
PARSER_TOKEN(TOK_LATERAL, "LATERAL", false)
PARSER_TOKEN(TOK_LOCAL, "LOCAL", false)
Expand Down Expand Up @@ -555,6 +556,7 @@ PARSER_TOKEN(TOK_WHERE, "WHERE", false)
PARSER_TOKEN(TOK_WHILE, "WHILE", false)
PARSER_TOKEN(TOK_WINDOW, "WINDOW", false)
PARSER_TOKEN(TOK_WITH, "WITH", false)
PARSER_TOKEN(TOK_WITHIN, "WITHIN", false)
PARSER_TOKEN(TOK_WITHOUT, "WITHOUT", false)
PARSER_TOKEN(TOK_WORK, "WORK", true)
PARSER_TOKEN(TOK_WRITE, "WRITE", true)
Expand Down
91 changes: 79 additions & 12 deletions src/dsql/AggNodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ string AggNode::internalPrint(NodePrinter& printer) const
NODE_PRINT(printer, dialect1);
NODE_PRINT(printer, arg);
NODE_PRINT(printer, asb);
NODE_PRINT(printer, sort);
NODE_PRINT(printer, indexed);

return aggInfo.name;
Expand Down Expand Up @@ -307,7 +308,7 @@ bool AggNode::dsqlMatch(DsqlCompilerScratch* dsqlScratch, const ExprNode* other,
// ASF: We compare name address. That should be ok, as we have only one AggInfo instance
// per function.
return aggInfo.blr == o->aggInfo.blr && aggInfo.name == o->aggInfo.name &&
distinct == o->distinct && dialect1 == o->dialect1;
distinct == o->distinct && dialect1 == o->dialect1 && sort == o->sort;;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does not look correct the comparation of pointer address here for sort.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ChudaykinAlex IMO, the sort node should be added to the list by ListAggNode::getChildren() -- this way you may remove the sort comparison in dsqlMatch() and also remove doPass2(sort) in pass2(), as the sort node will be processed by the inherited methods automagically.

}

void AggNode::setParameterName(dsql_par* parameter) const
Expand Down Expand Up @@ -352,6 +353,8 @@ AggNode* AggNode::pass2(thread_db* tdbb, CompilerScratch* csb)
dsc desc;
getDesc(tdbb, csb, &desc);
impureOffset = csb->allocImpure<impure_value_ex>();
if (sort)
doPass2(tdbb, csb, sort.getAddress());

return this;
}
Expand All @@ -361,7 +364,7 @@ void AggNode::aggInit(thread_db* tdbb, Request* request) const
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
impure->vlux_count = 0;

if (distinct)
if (distinct || sort)
{
// Initialize a sort to reject duplicate values.

Expand All @@ -373,8 +376,8 @@ void AggNode::aggInit(thread_db* tdbb, Request* request) const

asbImpure->iasb_sort = FB_NEW_POOL(request->req_sorts.getPool()) Sort(
tdbb->getDatabase(), &request->req_sorts, asb->length,
asb->keyItems.getCount(), 1, asb->keyItems.begin(),
RecordSource::rejectDuplicate, 0);
asb->keyItems.getCount(), (distinct ? 1 : asb->keyItems.getCount()),
asb->keyItems.begin(), (distinct ? RecordSource::rejectDuplicate : nullptr), 0);
}
}

Expand Down Expand Up @@ -427,6 +430,44 @@ bool AggNode::aggPass(thread_db* tdbb, Request* request) const
ULONG* const pDummy = reinterpret_cast<ULONG*>(data + asb->length - sizeof(ULONG));
*pDummy = asbImpure->iasb_dummy++;

return true;
}
else if (sort)
{
fb_assert(asb);
// "Put" the value to sort.
impure_agg_sort* asbImpure = request->getImpure<impure_agg_sort>(asb->impure);
UCHAR* data;
asbImpure->iasb_sort->put(tdbb, reinterpret_cast<ULONG**>(&data));

MOVE_CLEAR(data, asb->length);

auto descOrder = asb->descOrder.begin();
auto keyItem = asb->keyItems.begin();

for (auto& nodeOrder : sort->expressions)
{
dsc toDesc = *(descOrder++);
toDesc.dsc_address = data + (IPTR)toDesc.dsc_address;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
toDesc.dsc_address = data + (IPTR)toDesc.dsc_address;
toDesc.dsc_address = data + (IPTR) toDesc.dsc_address;

if (const auto fromDsc = EVL_expr(tdbb, request, nodeOrder))
{
if (IS_INTL_DATA(fromDsc))
INTL_string_to_key(tdbb, INTL_TEXT_TO_INDEX(fromDsc->getTextType()),
fromDsc, &toDesc, INTL_KEY_UNIQUE);
Comment on lines +454 to +456
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (IS_INTL_DATA(fromDsc))
INTL_string_to_key(tdbb, INTL_TEXT_TO_INDEX(fromDsc->getTextType()),
fromDsc, &toDesc, INTL_KEY_UNIQUE);
if (IS_INTL_DATA(fromDsc))
{
INTL_string_to_key(tdbb, INTL_TEXT_TO_INDEX(fromDsc->getTextType()),
fromDsc, &toDesc, INTL_KEY_UNIQUE);
}

else
MOV_move(tdbb, fromDsc, &toDesc);
}
else
*(data + keyItem->getSkdOffset()) = TRUE;

// The first key for NULLS FIRST/LAST, the second key for the sorter
keyItem += 2;
}

dsc toDesc = asb->desc;
toDesc.dsc_address = data + (IPTR)toDesc.dsc_address;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
toDesc.dsc_address = data + (IPTR)toDesc.dsc_address;
toDesc.dsc_address = data + (IPTR) toDesc.dsc_address;

MOV_move(tdbb, desc, &toDesc);

return true;
}
}
Expand Down Expand Up @@ -455,7 +496,7 @@ dsc* AggNode::execute(thread_db* tdbb, Request* request) const
impure->vlu_blob = NULL;
}

if (distinct)
if (distinct || sort)
{
impure_agg_sort* asbImpure = request->getImpure<impure_agg_sort>(asb->impure);
dsc desc = asb->desc;
Expand All @@ -478,7 +519,10 @@ dsc* AggNode::execute(thread_db* tdbb, Request* request) const
break;
}

desc.dsc_address = data + (asb->intl ? asb->keyItems[1].getSkdOffset() : 0);
if (distinct)
desc.dsc_address = data + (asb->intl ? asb->keyItems[1].getSkdOffset() : 0);
else
desc.dsc_address = data + (IPTR)asb->desc.dsc_address;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
desc.dsc_address = data + (IPTR)asb->desc.dsc_address;
desc.dsc_address = data + (IPTR) asb->desc.dsc_address;


aggPass(tdbb, request, &desc);
}
Expand Down Expand Up @@ -877,18 +921,19 @@ AggNode* AvgAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
static AggNode::Register<ListAggNode> listAggInfo("LIST", blr_agg_list, blr_agg_list_distinct);

ListAggNode::ListAggNode(MemoryPool& pool, bool aDistinct, ValueExprNode* aArg,
ValueExprNode* aDelimiter)
ValueExprNode* aDelimiter, ValueListNode* aOrderClause)
: AggNode(pool, listAggInfo, aDistinct, false, aArg),
delimiter(aDelimiter)
delimiter(aDelimiter),
dsqlOrderClause(aOrderClause)
{
}

DmlNode* ListAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp)
{
ListAggNode* node = FB_NEW_POOL(pool) ListAggNode(pool,
(blrOp == blr_agg_list_distinct));
ListAggNode* node = FB_NEW_POOL(pool) ListAggNode(pool, (blrOp == blr_agg_list_distinct));
node->arg = PAR_parse_value(tdbb, csb);
node->delimiter = PAR_parse_value(tdbb, csb);
node->sort = PAR_sort(tdbb, csb, blr_sort, true);
return node;
}

Expand All @@ -899,6 +944,26 @@ void ListAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc)
desc->setNullable(true);
}

void ListAggNode::genBlr(DsqlCompilerScratch* dsqlScratch)
{
AggNode::genBlr(dsqlScratch);
GEN_sort(dsqlScratch, blr_sort, dsqlOrderClause);
}

AggNode* ListAggNode::pass1(thread_db* tdbb, CompilerScratch* csb)
{
if (sort && distinct)
{
ValueExprNode* const sortNode = *sort->expressions.begin();
if (!arg->sameAs(sortNode, false) || sort->expressions.getCount() > 1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should they be identical? Why?
I think it should have like GROUP BY rules.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Currently, we sort only once and this is a good bonus. If we follow your suggestion and allow slightly different expressions, then we should either sort twice or ignore the user-specified ordering after DISTINCT. BTW, in this PR it also seems to be ignored -- LISTAGG(DISTINCT COL) WITHIN GROUP (ORDER BY COL DESC) would produce ASC-ordered output. But I suppose it should work the same way as for plain SELECT DISTINCT(COL) FROM T ORDER BY COL DESC, i.e. respect the ORDER BY ordering and optimize the sorts (merge two sorts into one) only if they fully (expressions / directions / NULLs placement) match each other.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why direction and null placement are important for DISTINCT?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nulls placement is not important, I agree, as NULLs are skipped by all aggregate functions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By standard, DISTINCT eliminates duplicates in the ordered (if specified) result set, it should not change the user-defined ordering. Why do you think LISTAGG should behave differently?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shouldn't but I don't quite understand why you said

i.e. respect the ORDER BY ordering and optimize the sorts (merge two sorts into one) only if they fully (expressions / directions / NULLs placement) match each other.

BTW, IMHO, sort->expressions.getCount() > 1 condition here is not needed as it is fine for distinct to use only first sorting segment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, sorry, my bad. Surely, direction for the combined sort should be taken from ORDER BY -- like we already do in Optimizer::checkSorts().

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nulls placement is not important, I agree, as NULLs are skipped by all aggregate functions.

With my suggestion to use same existing rules, the DISTINCT LISTAGG expression may be something like COALESCE(field, 'z') with a ORDER BY Z`.

{
ERR_post(Arg::Gds(isc_sqlerr) << Arg::Num(-104) << Arg::Gds(isc_dsql_command_err)
<< Arg::Gds(isc_distinct_order_by_err));
}
}
return AggNode::pass1(tdbb, csb);
}

bool ListAggNode::setParameterType(DsqlCompilerScratch* dsqlScratch,
std::function<void (dsc*)> makeDesc, bool forceVarChar)
{
Expand All @@ -920,6 +985,7 @@ ValueExprNode* ListAggNode::copy(thread_db* tdbb, NodeCopier& copier) const
node->nodScale = nodScale;
node->arg = copier.copy(tdbb, arg);
node->delimiter = copier.copy(tdbb, delimiter);
node->sort = sort->copy(tdbb, copier);
return node;
}

Expand Down Expand Up @@ -985,7 +1051,7 @@ dsc* ListAggNode::aggExecute(thread_db* tdbb, Request* request) const
{
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);

if (distinct)
if (distinct || sort)
{
if (impure->vlu_blob)
{
Expand All @@ -1005,7 +1071,8 @@ AggNode* ListAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
thread_db* tdbb = JRD_get_thread_data();

AggNode* node = FB_NEW_POOL(dsqlScratch->getPool()) ListAggNode(dsqlScratch->getPool(), distinct,
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, delimiter));
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, delimiter),
doDsqlPass(dsqlScratch, dsqlOrderClause));

dsc argDesc;
node->arg->make(dsqlScratch, &argDesc);
Expand Down
7 changes: 5 additions & 2 deletions src/dsql/AggNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ class AvgAggNode final : public AggNode
class ListAggNode final : public AggNode
{
public:
explicit ListAggNode(MemoryPool& pool, bool aDistinct, ValueExprNode* aArg = NULL,
ValueExprNode* aDelimiter = NULL);
explicit ListAggNode(MemoryPool& pool, bool aDistinct, ValueExprNode* aArg = nullptr,
ValueExprNode* aDelimiter = nullptr, ValueListNode* aOrderClause = nullptr);

static DmlNode* parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp);

Expand All @@ -113,6 +113,8 @@ class ListAggNode final : public AggNode

virtual Firebird::string internalPrint(NodePrinter& printer) const;
virtual void make(DsqlCompilerScratch* dsqlScratch, dsc* desc);
virtual void genBlr(DsqlCompilerScratch* dsqlScratch) final;
virtual AggNode* pass1(thread_db* tdbb, CompilerScratch* csb) final;
virtual bool setParameterType(DsqlCompilerScratch* dsqlScratch,
std::function<void (dsc*)> makeDesc, bool forceVarChar);
virtual void getDesc(thread_db* tdbb, CompilerScratch* csb, dsc* desc);
Expand All @@ -127,6 +129,7 @@ class ListAggNode final : public AggNode

private:
NestConst<ValueExprNode> delimiter;
NestConst<ValueListNode> dsqlOrderClause;
};

class CountAggNode final : public AggNode
Expand Down
2 changes: 2 additions & 0 deletions src/dsql/Nodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class RseNode;
class SlidingWindow;
class TypeClause;
class ValueExprNode;
class SortNode;


// Must be less then MAX_SSHORT. Not used for static arrays.
Expand Down Expand Up @@ -1113,6 +1114,7 @@ class AggNode : public TypedNode<ValueExprNode, ExprNode::TYPE_AGGREGATE>
const AggInfo& aggInfo;
NestConst<ValueExprNode> arg;
const AggregateSort* asb;
NestConst<SortNode> sort;
bool distinct;
bool dialect1;
bool indexed;
Expand Down
Loading
Loading