Skip to content

Commit 26701d2

Browse files
authored
[Backport 2.19-dev] Support 'usenull' option in PPL top and rare commands (#4696) (#4702)
* Support 'usenull' option in PPL `top` and `rare` commands (#4696) * Support 'usenull' option in PPL top and rare commands Signed-off-by: Lantao Jin <[email protected]> * fix the incorrect naming Signed-off-by: Lantao Jin <[email protected]> --------- Signed-off-by: Lantao Jin <[email protected]> (cherry picked from commit c6a5fb9) * fix compile error Signed-off-by: Lantao Jin <[email protected]> * fix compile error Signed-off-by: Lantao Jin <[email protected]> --------- Signed-off-by: Lantao Jin <[email protected]>
1 parent a531b6d commit 26701d2

File tree

28 files changed

+769
-146
lines changed

28 files changed

+769
-146
lines changed

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,7 @@ public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) {
381381
fields.forEach(
382382
field -> newEnv.define(new Symbol(Namespace.FIELD_NAME, field.toString()), field.type()));
383383

384-
List<Argument> options = node.getArguments();
385-
Integer noOfResults = (Integer) options.get(0).getValue().getValue();
384+
Integer noOfResults = node.getNoOfResults();
386385

387386
return new LogicalRareTopN(child, node.getCommandType(), noOfResults, fields, groupBys);
388387
}

core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -540,8 +540,16 @@ public static RareTopN rareTopN(
540540
List<Argument> noOfResults,
541541
List<UnresolvedExpression> groupList,
542542
Field... fields) {
543-
return new RareTopN(input, commandType, noOfResults, Arrays.asList(fields), groupList)
544-
.attach(input);
543+
Integer N =
544+
(Integer)
545+
Argument.ArgumentMap.of(noOfResults)
546+
.getOrDefault("noOfResults", new Literal(10, DataType.INTEGER))
547+
.getValue();
548+
List<Argument> removed =
549+
noOfResults.stream()
550+
.filter(argument -> !argument.getArgName().equals("noOfResults"))
551+
.collect(Collectors.toList());
552+
return new RareTopN(commandType, N, removed, Arrays.asList(fields), groupList).attach(input);
545553
}
546554

547555
public static Limit limit(UnresolvedPlan input, Integer limit, Integer offset) {

core/src/main/java/org/opensearch/sql/ast/expression/Argument.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
3737
}
3838

3939
/** ArgumentMap is a helper class to get argument value by name. */
40+
@EqualsAndHashCode
41+
@ToString
4042
public static class ArgumentMap {
4143
private final Map<String, Literal> map;
4244

core/src/main/java/org/opensearch/sql/ast/tree/RareTopN.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import com.google.common.collect.ImmutableList;
99
import java.util.List;
10-
import lombok.AllArgsConstructor;
1110
import lombok.EqualsAndHashCode;
1211
import lombok.Getter;
1312
import lombok.RequiredArgsConstructor;
@@ -24,12 +23,11 @@
2423
@ToString
2524
@EqualsAndHashCode(callSuper = false)
2625
@RequiredArgsConstructor
27-
@AllArgsConstructor
2826
public class RareTopN extends UnresolvedPlan {
2927

3028
private UnresolvedPlan child;
3129
private final CommandType commandType;
32-
// arguments: noOfResults: Integer, countField: String, showCount: Boolean
30+
private final Integer noOfResults;
3331
private final List<Argument> arguments;
3432
private final List<Field> fields;
3533
private final List<UnresolvedExpression> groupExprList;
@@ -54,4 +52,10 @@ public enum CommandType {
5452
TOP,
5553
RARE
5654
}
55+
56+
public enum Option {
57+
countField,
58+
showCount,
59+
useNull,
60+
}
5761
}

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 51 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC;
1616
import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC;
1717
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP;
18-
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME;
1918
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_MAIN;
2019
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_SUBSEARCH;
20+
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_TOP_RARE;
2121
import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation;
2222
import static org.opensearch.sql.calcite.utils.PlanUtils.getRexCall;
2323
import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild;
@@ -1134,22 +1134,7 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) {
11341134
Pair<List<RexNode>, List<AggCall>> aggregationAttributes =
11351135
aggregateWithTrimming(groupExprList, aggExprList, context);
11361136
if (toAddHintsOnAggregate) {
1137-
final RelHint statHits =
1138-
RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build();
1139-
assert context.relBuilder.peek() instanceof LogicalAggregate
1140-
: "Stats hits should be added to LogicalAggregate";
1141-
context.relBuilder.hints(statHits);
1142-
context
1143-
.relBuilder
1144-
.getCluster()
1145-
.setHintStrategies(
1146-
HintStrategyTable.builder()
1147-
.hintStrategy(
1148-
"stats_args",
1149-
(hint, rel) -> {
1150-
return rel instanceof LogicalAggregate;
1151-
})
1152-
.build());
1137+
addIgnoreNullBucketHintToAggregate(context);
11531138
}
11541139

11551140
// schema reordering
@@ -1869,9 +1854,8 @@ public RelNode visitKmeans(Kmeans node, CalcitePlanContext context) {
18691854
@Override
18701855
public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
18711856
visitChildren(node, context);
1872-
1873-
ArgumentMap arguments = ArgumentMap.of(node.getArguments());
1874-
String countFieldName = (String) arguments.get("countField").getValue();
1857+
ArgumentMap argumentMap = ArgumentMap.of(node.getArguments());
1858+
String countFieldName = (String) argumentMap.get(RareTopN.Option.countField.name()).getValue();
18751859
if (context.relBuilder.peek().getRowType().getFieldNames().contains(countFieldName)) {
18761860
throw new IllegalArgumentException(
18771861
"Field `"
@@ -1886,8 +1870,29 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
18861870
groupExprList.addAll(fieldList);
18871871
List<UnresolvedExpression> aggExprList =
18881872
List.of(AstDSL.alias(countFieldName, AstDSL.aggregate("count", null)));
1873+
1874+
// if usenull=false, add a isNotNull before Aggregate and the hint to this Aggregate
1875+
Boolean bucketNullable = (Boolean) argumentMap.get(RareTopN.Option.useNull.name()).getValue();
1876+
boolean toAddHintsOnAggregate = false;
1877+
if (!bucketNullable && !groupExprList.isEmpty()) {
1878+
toAddHintsOnAggregate = true;
1879+
// add isNotNull filter before aggregation to filter out null bucket
1880+
List<RexNode> groupByList =
1881+
groupExprList.stream()
1882+
.map(expr -> rexVisitor.analyze(expr, context))
1883+
.collect(Collectors.toList());
1884+
context.relBuilder.filter(
1885+
PlanUtils.getSelectColumns(groupByList).stream()
1886+
.map(context.relBuilder::field)
1887+
.map(context.relBuilder::isNotNull)
1888+
.collect(Collectors.toList()));
1889+
}
18891890
aggregateWithTrimming(groupExprList, aggExprList, context);
18901891

1892+
if (toAddHintsOnAggregate) {
1893+
addIgnoreNullBucketHintToAggregate(context);
1894+
}
1895+
18911896
// 2. add a window column
18921897
List<RexNode> partitionKeys = rexVisitor.analyze(node.getGroupExprList(), context);
18931898
RexNode countField;
@@ -1906,26 +1911,46 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
19061911
List.of(countField),
19071912
WindowFrame.toCurrentRow());
19081913
context.relBuilder.projectPlus(
1909-
context.relBuilder.alias(rowNumberWindowOver, ROW_NUMBER_COLUMN_NAME));
1914+
context.relBuilder.alias(rowNumberWindowOver, ROW_NUMBER_COLUMN_NAME_TOP_RARE));
19101915

19111916
// 3. filter row_number() <= k in each partition
1912-
Integer N = (Integer) arguments.get("noOfResults").getValue();
1917+
int k = node.getNoOfResults();
19131918
context.relBuilder.filter(
19141919
context.relBuilder.lessThanOrEqual(
1915-
context.relBuilder.field(ROW_NUMBER_COLUMN_NAME), context.relBuilder.literal(N)));
1920+
context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE),
1921+
context.relBuilder.literal(k)));
19161922

19171923
// 4. project final output. the default output is group by list + field list
1918-
Boolean showCount = (Boolean) arguments.get("showCount").getValue();
1924+
Boolean showCount = (Boolean) argumentMap.get(RareTopN.Option.showCount.name()).getValue();
19191925
if (showCount) {
1920-
context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_NAME));
1926+
context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE));
19211927
} else {
19221928
context.relBuilder.projectExcept(
1923-
context.relBuilder.field(ROW_NUMBER_COLUMN_NAME),
1929+
context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE),
19241930
context.relBuilder.field(countFieldName));
19251931
}
19261932
return context.relBuilder.peek();
19271933
}
19281934

1935+
private static void addIgnoreNullBucketHintToAggregate(CalcitePlanContext context) {
1936+
final RelHint statHits =
1937+
RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build();
1938+
assert context.relBuilder.peek() instanceof LogicalAggregate
1939+
: "Stats hits should be added to LogicalAggregate";
1940+
context.relBuilder.hints(statHits);
1941+
context
1942+
.relBuilder
1943+
.getCluster()
1944+
.setHintStrategies(
1945+
HintStrategyTable.builder()
1946+
.hintStrategy(
1947+
"stats_args",
1948+
(hint, rel) -> {
1949+
return rel instanceof LogicalAggregate;
1950+
})
1951+
.build());
1952+
}
1953+
19291954
@Override
19301955
public RelNode visitTableFunction(TableFunction node, CalcitePlanContext context) {
19311956
throw new CalciteUnsupportedException("Table function is unsupported in Calcite");

core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public interface PlanUtils {
6262
/** this is only for dedup command, do not reuse it in other command */
6363
String ROW_NUMBER_COLUMN_FOR_DEDUP = "_row_number_dedup_";
6464

65-
String ROW_NUMBER_COLUMN_NAME = "_row_number_";
65+
String ROW_NUMBER_COLUMN_NAME_TOP_RARE = "_row_number_top_rare_";
6666
String ROW_NUMBER_COLUMN_NAME_MAIN = "_row_number_main_";
6767
String ROW_NUMBER_COLUMN_NAME_SUBSEARCH = "_row_number_subsearch_";
6868

docs/user/ppl/admin/settings.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ The behaviours it controlled includes:
211211

212212
- The default value of argument ``bucket_nullable`` in ``stats`` command. Check `stats command <../cmd/stats.rst>`_ for details.
213213
- The return value of ``divide`` and ``/`` operator. Check `expressions <../functions/expressions.rst>`_ for details.
214+
- The default value of argument ``usenull`` in ``top`` and ``rare`` commands. Check `top command <../cmd/top.rst>`_ and `rare command <../cmd/rare.rst>`_ for details.
214215

215216
Example 1
216217
-------

docs/user/ppl/cmd/rare.rst

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
=============
1+
====
22
rare
3-
=============
3+
====
44

55
.. rubric:: Table of contents
66

@@ -10,13 +10,13 @@ rare
1010

1111

1212
Description
13-
============
13+
===========
1414
| Using ``rare`` command to find the least common tuple of values of all fields in the field list.
1515
1616
**Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields.
1717

1818
Syntax
19-
============
19+
======
2020
rare <field-list> [by-clause]
2121

2222
rare [rare-options] <field-list> [by-clause] ``(available from 3.1.0+)``
@@ -26,10 +26,13 @@ rare [rare-options] <field-list> [by-clause] ``(available from 3.1.0+)``
2626
* rare-options: optional. options for the rare command. Supported syntax is [countfield=<string>] [showcount=<bool>].
2727
* showcount=<bool>: optional. whether to create a field in output that represent a count of the tuple of values. Default value is ``true``.
2828
* countfield=<string>: optional. the name of the field that contains count. Default value is ``'count'``.
29+
* usenull=<bool>: optional (since 3.4.0). whether to output the null value. The default value of ``usenull`` is determined by ``plugins.ppl.syntax.legacy.preferred``:
2930

31+
* When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true``
32+
* When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false``
3033

3134
Example 1: Find the least common values in a field
32-
===========================================
35+
==================================================
3336

3437
The example finds least common gender of all the accounts.
3538

@@ -46,7 +49,7 @@ PPL query::
4649

4750

4851
Example 2: Find the least common values organized by gender
49-
====================================================
52+
===========================================================
5053

5154
The example finds least common age of all the accounts group by gender.
5255

@@ -66,12 +69,10 @@ PPL query::
6669
Example 3: Rare command with Calcite enabled
6770
============================================
6871

69-
The example finds least common gender of all the accounts when ``plugins.calcite.enabled`` is true.
70-
7172
PPL query::
7273

73-
PPL> source=accounts | rare gender;
74-
fetched row
74+
os> source=accounts | rare gender;
75+
fetched rows / total rows = 2/2
7576
+--------+-------+
7677
| gender | count |
7778
|--------+-------|
@@ -83,19 +84,47 @@ PPL query::
8384
Example 4: Specify the count field option
8485
=========================================
8586

86-
The example specifies the count field when ``plugins.calcite.enabled`` is true.
87-
8887
PPL query::
8988

90-
PPL> source=accounts | rare countfield='cnt' gender;
91-
fetched row
89+
os> source=accounts | rare countfield='cnt' gender;
90+
fetched rows / total rows = 2/2
9291
+--------+-----+
9392
| gender | cnt |
9493
|--------+-----|
9594
| F | 1 |
9695
| M | 3 |
9796
+--------+-----+
9897

98+
99+
Example 5: Specify the usenull field option
100+
===========================================
101+
102+
PPL query::
103+
104+
os> source=accounts | rare usenull=false email;
105+
fetched rows / total rows = 3/3
106+
+-----------------------+-------+
107+
| email | count |
108+
|-----------------------+-------|
109+
110+
111+
112+
+-----------------------+-------+
113+
114+
PPL query::
115+
116+
os> source=accounts | rare usenull=true email;
117+
fetched rows / total rows = 4/4
118+
+-----------------------+-------+
119+
| email | count |
120+
|-----------------------+-------|
121+
| null | 1 |
122+
123+
124+
125+
+-----------------------+-------+
126+
127+
99128
Limitations
100129
===========
101130
The ``rare`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node.

0 commit comments

Comments
 (0)