diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index d9c0772845f..e82a17e90c1 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1002,12 +1002,60 @@ private Pair, List> aggregateWithTrimming( Pair, List> reResolved = resolveAttributesForAggregation(groupExprList, aggExprList, context); + List intendedGroupKeyAliases = getGroupKeyNamesAfterAggregation(reResolved.getLeft()); context.relBuilder.aggregate( context.relBuilder.groupKey(reResolved.getLeft()), reResolved.getRight()); + // During aggregation, Calcite projects both input dependencies and output group-by fields. + // When names conflict, Calcite adds numeric suffixes (e.g., "value0"). + // Apply explicit renaming to restore the intended aliases. + context.relBuilder.rename(intendedGroupKeyAliases); return Pair.of(reResolved.getLeft(), reResolved.getRight()); } + /** + * Imitates {@code Registrar.registerExpression} of {@link RelBuilder} to derive the output order + * of group-by keys after aggregation. + * + *

The projected input reference comes first, while any other computed expression follows. + */ + private List getGroupKeyNamesAfterAggregation(List nodes) { + List reordered = new ArrayList<>(); + List left = new ArrayList<>(); + for (RexNode n : nodes) { + // The same group-key won't be added twice + if (reordered.contains(n) || left.contains(n)) { + continue; + } + if (isInputRef(n)) { + reordered.add(n); + } else { + left.add(n); + } + } + reordered.addAll(left); + return reordered.stream() + .map(this::extractAliasLiteral) + .flatMap(Optional::stream) + .map(RexLiteral::stringValue) + .collect(Collectors.toList()); + } + + /** Whether a rex node is an aliased input reference */ + private boolean isInputRef(RexNode node) { + switch (node.getKind()) { + case AS: + case DESCENDING: + case NULLS_FIRST: + case NULLS_LAST: { + final List operands = ((RexCall) node).operands; + return isInputRef(operands.get(0)); + } + default: + return node instanceof RexInputRef; + } + } + /** * Resolve attributes for aggregation. * @@ -1106,7 +1154,7 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { aggregationAttributes.getLeft().stream() .map(this::extractAliasLiteral) .flatMap(Optional::stream) - .map(ref -> ((RexLiteral) ref).getValueAs(String.class)) + .map(ref -> ref.getValueAs(String.class)) .map(context.relBuilder::field) .map(f -> (RexNode) f) .collect(Collectors.toList()); diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4580.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4580.yml new file mode 100644 index 00000000000..e3ebfe249e8 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4580.yml @@ -0,0 +1,98 @@ +setup: + - do: + indices.create: + index: time_test + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "time_test"}}' + - '{"category":"A","value":1000,"@timestamp":"2024-01-01T00:00:00Z"}' + - '{"index": {"_index": "time_test"}}' + - '{"category":"B","value":2000,"@timestamp":"2024-01-01T00:05:00Z"}' + - '{"index": {"_index": "time_test"}}' + - '{"category":"A","value":1500,"@timestamp":"2024-01-01T00:10:00Z"}' + - '{"index": {"_index": "time_test"}}' + - '{"category":"C","value":3000,"@timestamp":"2024-01-01T00:20:00Z"}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"Test span aggregation with field name collision - basic case": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=time_test | stats count() by span(value, 1000) as value + + - match: { total: 3 } + - match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "value", "type": "bigint"}] } + - match: { datarows: [[2, 1000], [1, 2000], [1, 3000]] } + +--- +"Test span aggregation with field name collision - multiple aggregations": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=time_test | stats count(), avg(value) by span(value, 1000) as value + + - match: { total: 3 } + - match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "avg(value)", "type": "double"}, {"name": "value", "type": "bigint"}] } + - match: { datarows: [[2, 1250.0, 1000], [1, 2000.0, 2000], [1, 3000.0, 3000]] } + +--- +"Test span aggregation without name collision - multiple group-by": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=time_test | stats count() by span(@timestamp, 10min) as @timestamp, category, value + + - match: { total: 4 } + - match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "@timestamp", "type": "timestamp"}, {"name": "category", "type": "string"}, {"name": "value", "type": "bigint"}] } + - match: { datarows: [[1, "2024-01-01 00:00:00", "A", 1000], [1, "2024-01-01 00:10:00", "A", 1500], [1, "2024-01-01 00:00:00", "B", 2000], [1, "2024-01-01 00:20:00", "C", 3000]] } + +--- +"Test span aggregation with duplicated group keys": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=time_test | stats count() by value, value, span(@timestamp, 10min) as @timestamp + + - match: { total: 4 } + - match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "@timestamp", "type": "timestamp"}, {"name": "value", "type": "bigint"}, {"name": "value0", "type": "bigint"}] } + - match: { datarows: [[1, "2024-01-01 00:00:00", 1000, 1000], [1, "2024-01-01 00:10:00", 1500, 1500], [1, "2024-01-01 00:00:00", 2000, 2000], [1, "2024-01-01 00:20:00", 3000, 3000]] }