Skip to content

Commit 6c9fd77

Browse files
committed
feat: introduce SubstraitSqlToCalcite and SubstraitSelectStatementParser
1 parent 0532c18 commit 6c9fd77

File tree

7 files changed

+157
-96
lines changed

7 files changed

+157
-96
lines changed
Lines changed: 18 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,12 @@
11
package io.substrait.isthmus;
22

3-
import com.google.common.annotations.VisibleForTesting;
4-
import io.substrait.isthmus.sql.SubstraitSqlValidator;
3+
import io.substrait.isthmus.sql.SubstraitSqlToCalcite;
54
import io.substrait.plan.ImmutablePlan.Builder;
65
import io.substrait.plan.Plan.Version;
76
import io.substrait.plan.PlanProtoConverter;
87
import io.substrait.proto.Plan;
9-
import java.util.List;
10-
import org.apache.calcite.plan.hep.HepPlanner;
11-
import org.apache.calcite.plan.hep.HepProgram;
128
import org.apache.calcite.prepare.Prepare;
13-
import org.apache.calcite.rel.RelRoot;
14-
import org.apache.calcite.rel.rules.CoreRules;
15-
import org.apache.calcite.sql.SqlNode;
16-
import org.apache.calcite.sql.SqlNodeList;
179
import org.apache.calcite.sql.parser.SqlParseException;
18-
import org.apache.calcite.sql.parser.SqlParser;
19-
import org.apache.calcite.sql.validate.SqlValidator;
20-
import org.apache.calcite.sql2rel.SqlToRelConverter;
21-
import org.apache.calcite.sql2rel.StandardConvertletTable;
2210

2311
/** Take a SQL statement and a set of table definitions and return a substrait plan. */
2412
public class SqlToSubstrait extends SqlConverterBase {
@@ -32,69 +20,35 @@ public SqlToSubstrait(FeatureBoard features) {
3220
}
3321

3422
public Plan execute(String sql, Prepare.CatalogReader catalogReader) throws SqlParseException {
35-
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
36-
return executeInner(sql, validator, catalogReader);
23+
return executeInner(sql, catalogReader);
3724
}
3825

39-
List<RelRoot> sqlToRelNode(String sql, Prepare.CatalogReader catalogReader)
40-
throws SqlParseException {
41-
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
42-
return sqlToRelNode(sql, validator, catalogReader);
43-
}
44-
45-
private Plan executeInner(String sql, SqlValidator validator, Prepare.CatalogReader catalogReader)
26+
private Plan executeInner(String sql, Prepare.CatalogReader catalogReader)
4627
throws SqlParseException {
4728
Builder builder = io.substrait.plan.Plan.builder();
4829
builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build());
4930

5031
// TODO: consider case in which one sql passes conversion while others don't
51-
sqlToRelNode(sql, validator, catalogReader).stream()
32+
SubstraitSqlToCalcite.convertSelects(sql, catalogReader).stream()
5233
.map(root -> SubstraitRelVisitor.convert(root, EXTENSION_COLLECTION, featureBoard))
5334
.forEach(root -> builder.addRoots(root));
5435

5536
PlanProtoConverter planToProto = new PlanProtoConverter();
56-
5737
return planToProto.toProto(builder.build());
5838
}
5939

60-
private List<RelRoot> sqlToRelNode(
61-
String sql, SqlValidator validator, Prepare.CatalogReader catalogReader)
62-
throws SqlParseException {
63-
SqlParser parser = SqlParser.create(sql, parserConfig);
64-
SqlNodeList parsedList = parser.parseStmtList();
65-
SqlToRelConverter converter = createSqlToRelConverter(validator, catalogReader);
66-
List<RelRoot> roots =
67-
parsedList.stream()
68-
.map(parsed -> getBestExpRelRoot(converter, parsed))
69-
.collect(java.util.stream.Collectors.toList());
70-
return roots;
71-
}
72-
73-
@VisibleForTesting
74-
SqlToRelConverter createSqlToRelConverter(
75-
SqlValidator validator, Prepare.CatalogReader catalogReader) {
76-
SqlToRelConverter converter =
77-
new SqlToRelConverter(
78-
null,
79-
validator,
80-
catalogReader,
81-
relOptCluster,
82-
StandardConvertletTable.INSTANCE,
83-
converterConfig);
84-
return converter;
85-
}
86-
87-
@VisibleForTesting
88-
static RelRoot getBestExpRelRoot(SqlToRelConverter converter, SqlNode parsed) {
89-
RelRoot root = converter.convertQuery(parsed, true, true);
90-
{
91-
// RelBuilder seems to implicitly use the rule below,
92-
// need to add to avoid discrepancies in assertFullRoundTrip
93-
HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build();
94-
HepPlanner hepPlanner = new HepPlanner(program);
95-
hepPlanner.setRoot(root.rel);
96-
root = root.withRel(hepPlanner.findBestExp());
97-
}
98-
return root;
99-
}
40+
// @VisibleForTesting
41+
// static RelRoot getBestExpRelRoot(SqlToRelConverter converter, SqlNode parsed) {
42+
// RelRoot root = converter.convertQuery(parsed, true, true);
43+
// {
44+
// // RelBuilder seems to implicitly use the rule below,
45+
// // need to add to avoid discrepancies in assertFullRoundTrip
46+
// HepProgram program =
47+
// HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build();
48+
// HepPlanner hepPlanner = new HepPlanner(program);
49+
// hepPlanner.setRoot(root.rel);
50+
// root = root.withRel(hepPlanner.findBestExp());
51+
// }
52+
// return root;
53+
// }
10054
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package io.substrait.isthmus.sql;
2+
3+
import java.util.List;
4+
import org.apache.calcite.avatica.util.Casing;
5+
import org.apache.calcite.sql.SqlNode;
6+
import org.apache.calcite.sql.parser.SqlParseException;
7+
import org.apache.calcite.sql.parser.SqlParser;
8+
import org.apache.calcite.sql.validate.SqlConformanceEnum;
9+
10+
/** Utility class for parsing SELECT statements to {@link org.apache.calcite.rel.RelRoot}s */
11+
public class SubstraitSelectStatementParser {
12+
13+
private static final SqlParser.Config PARSER_CONFIG =
14+
SqlParser.config()
15+
// TODO: switch to Casing.UNCHANGED
16+
.withUnquotedCasing(Casing.TO_UPPER)
17+
// use LENIENT conformance to allow for parsing a wide variety of dialects
18+
.withConformance(SqlConformanceEnum.LENIENT);
19+
20+
/** Parse one or more SELECT statements */
21+
public static List<SqlNode> parseSelectStatements(String selectStatements)
22+
throws SqlParseException {
23+
SqlParser parser = SqlParser.create(selectStatements, PARSER_CONFIG);
24+
return parser.parseStmtList();
25+
}
26+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
package io.substrait.isthmus.sql;
2+
3+
import io.substrait.isthmus.SubstraitTypeSystem;
4+
import java.util.List;
5+
import java.util.stream.Collectors;
6+
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
7+
import org.apache.calcite.plan.RelOptCluster;
8+
import org.apache.calcite.plan.RelOptPlanner;
9+
import org.apache.calcite.plan.RelOptTable;
10+
import org.apache.calcite.plan.hep.HepPlanner;
11+
import org.apache.calcite.plan.hep.HepProgram;
12+
import org.apache.calcite.prepare.Prepare;
13+
import org.apache.calcite.rel.RelNode;
14+
import org.apache.calcite.rel.RelRoot;
15+
import org.apache.calcite.rel.rules.CoreRules;
16+
import org.apache.calcite.rex.RexBuilder;
17+
import org.apache.calcite.sql.SqlNode;
18+
import org.apache.calcite.sql.parser.SqlParseException;
19+
import org.apache.calcite.sql2rel.SqlToRelConverter;
20+
import org.apache.calcite.sql2rel.StandardConvertletTable;
21+
22+
public class SubstraitSqlToCalcite {
23+
24+
public static RelRoot convertSelect(String selectStatement, Prepare.CatalogReader catalogReader)
25+
throws SqlParseException {
26+
return convertSelect(selectStatement, catalogReader, createRelOptCluster());
27+
}
28+
29+
public static RelRoot convertSelect(
30+
String selectStatement, Prepare.CatalogReader catalogReader, RelOptCluster cluster)
31+
throws SqlParseException {
32+
List<SqlNode> sqlNodes = SubstraitSelectStatementParser.parseSelectStatements(selectStatement);
33+
if (sqlNodes.size() != 1) {
34+
throw new IllegalArgumentException(
35+
String.format("Expected one SELECT statement, found: %d", sqlNodes.size()));
36+
}
37+
List<RelRoot> relRoots = convert(sqlNodes, catalogReader, cluster);
38+
// as there was only 1 select statement, there should only be 1 root
39+
return relRoots.get(0);
40+
}
41+
42+
public static List<RelRoot> convertSelects(
43+
String selectStatements, Prepare.CatalogReader catalogReader) throws SqlParseException {
44+
return convertSelects(selectStatements, catalogReader, createRelOptCluster());
45+
}
46+
47+
public static List<RelRoot> convertSelects(
48+
String selectStatements, Prepare.CatalogReader catalogReader, RelOptCluster cluster)
49+
throws SqlParseException {
50+
List<SqlNode> sqlNodes = SubstraitSelectStatementParser.parseSelectStatements(selectStatements);
51+
return convert(sqlNodes, catalogReader, cluster);
52+
}
53+
54+
static List<RelRoot> convert(
55+
List<SqlNode> selectStatements, Prepare.CatalogReader catalogReader, RelOptCluster cluster) {
56+
RelOptTable.ViewExpander viewExpander = null;
57+
SqlToRelConverter converter =
58+
new SqlToRelConverter(
59+
viewExpander,
60+
new SubstraitSqlValidator(catalogReader),
61+
catalogReader,
62+
cluster,
63+
StandardConvertletTable.INSTANCE,
64+
SqlToRelConverter.CONFIG);
65+
// apply validation
66+
boolean needsValidation = true;
67+
// query is the root of the tree
68+
boolean top = true;
69+
return selectStatements.stream()
70+
.map(
71+
sqlNode -> removeUnnecessaryProjects(converter.convertQuery(sqlNode, needsValidation, top)))
72+
.collect(Collectors.toList());
73+
}
74+
75+
static RelOptCluster createRelOptCluster() {
76+
RexBuilder rexBuilder =
77+
new RexBuilder(new JavaTypeFactoryImpl(SubstraitTypeSystem.TYPE_SYSTEM));
78+
HepProgram program = HepProgram.builder().build();
79+
RelOptPlanner emptyPlanner = new HepPlanner(program);
80+
return RelOptCluster.create(emptyPlanner, rexBuilder);
81+
}
82+
83+
static RelRoot removeUnnecessaryProjects(RelRoot root) {
84+
return root.withRel(removeUnnecessaryProjects(root.rel));
85+
}
86+
87+
static RelNode removeUnnecessaryProjects(RelNode root) {
88+
HepProgram program = HepProgram.builder().addRuleInstance(CoreRules.PROJECT_REMOVE).build();
89+
HepPlanner planner = new HepPlanner(program);
90+
planner.setRoot(root);
91+
return planner.findBestExp();
92+
}
93+
}

isthmus/src/test/java/io/substrait/isthmus/ApplyJoinPlanTest.java

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package io.substrait.isthmus;
22

3+
import io.substrait.isthmus.sql.SubstraitSqlToCalcite;
34
import java.util.Map;
45
import java.util.Map.Entry;
56
import java.util.Optional;
@@ -11,10 +12,7 @@
1112

1213
public class ApplyJoinPlanTest extends PlanTestBase {
1314

14-
private static RelRoot getCalcitePlan(String sql) throws SqlParseException {
15-
SqlToSubstrait s = new SqlToSubstrait();
16-
return s.sqlToRelNode(sql, TPCDS_CATALOG).get(0);
17-
}
15+
static SqlToSubstrait s = new SqlToSubstrait();
1816

1917
private static void validateOuterRef(
2018
Map<RexFieldAccess, Integer> fieldAccessDepthMap, String refName, String colName, int depth) {
@@ -53,16 +51,15 @@ public void lateralJoinQuery() throws SqlParseException {
5351
*/
5452

5553
// validate outer reference map
56-
RelRoot root = getCalcitePlan(sql);
54+
RelRoot root = SubstraitSqlToCalcite.convertSelect(sql, TPCDS_CATALOG);
5755
Map<RexFieldAccess, Integer> fieldAccessDepthMap = buildOuterFieldRefMap(root);
5856
Assertions.assertEquals(1, fieldAccessDepthMap.size());
5957
validateOuterRef(fieldAccessDepthMap, "$cor0", "SS_ITEM_SK", 1);
6058

6159
// TODO validate end to end conversion
62-
SqlToSubstrait sE2E = new SqlToSubstrait();
6360
Assertions.assertThrows(
6461
UnsupportedOperationException.class,
65-
() -> sE2E.execute(sql, TPCDS_CATALOG),
62+
() -> s.execute(sql, TPCDS_CATALOG),
6663
"Lateral join is not supported");
6764
}
6865

@@ -74,7 +71,7 @@ public void outerApplyQuery() throws SqlParseException {
7471
+ "FROM store_sales OUTER APPLY\n"
7572
+ " (select i_item_sk from item where item.i_item_sk = store_sales.ss_item_sk)";
7673

77-
RelRoot root = getCalcitePlan(sql);
74+
RelRoot root = SubstraitSqlToCalcite.convertSelect(sql, TPCDS_CATALOG);
7875

7976
Map<RexFieldAccess, Integer> fieldAccessDepthMap = buildOuterFieldRefMap(root);
8077
Assertions.assertEquals(1, fieldAccessDepthMap.size());
@@ -83,7 +80,7 @@ public void outerApplyQuery() throws SqlParseException {
8380
// TODO validate end to end conversion
8481
Assertions.assertThrows(
8582
UnsupportedOperationException.class,
86-
() -> new SqlToSubstrait().execute(sql, TPCDS_CATALOG),
83+
() -> s.execute(sql, TPCDS_CATALOG),
8784
"APPLY is not supported");
8885
}
8986

@@ -112,7 +109,7 @@ public void nestedApplyJoinQuery() throws SqlParseException {
112109
LogicalFilter(condition=[AND(=($4, $cor0.I_ITEM_SK), =($4, $cor2.SS_ITEM_SK))])
113110
LogicalTableScan(table=[[tpcds, PROMOTION]])
114111
*/
115-
RelRoot root = getCalcitePlan(sql);
112+
RelRoot root = SubstraitSqlToCalcite.convertSelect(sql, TPCDS_CATALOG);
116113

117114
Map<RexFieldAccess, Integer> fieldAccessDepthMap = buildOuterFieldRefMap(root);
118115
Assertions.assertEquals(3, fieldAccessDepthMap.size());
@@ -123,7 +120,7 @@ public void nestedApplyJoinQuery() throws SqlParseException {
123120
// TODO validate end to end conversion
124121
Assertions.assertThrows(
125122
UnsupportedOperationException.class,
126-
() -> new SqlToSubstrait().execute(sql, TPCDS_CATALOG),
123+
() -> s.execute(sql, TPCDS_CATALOG),
127124
"APPLY is not supported");
128125
}
129126

@@ -138,7 +135,7 @@ public void crossApplyQuery() throws SqlParseException {
138135
// TODO validate end to end conversion
139136
Assertions.assertThrows(
140137
UnsupportedOperationException.class,
141-
() -> new SqlToSubstrait().execute(sql, TPCDS_CATALOG),
138+
() -> s.execute(sql, TPCDS_CATALOG),
142139
"APPLY is not supported");
143140
}
144141
}

isthmus/src/test/java/io/substrait/isthmus/NameRoundtripTest.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import static org.junit.jupiter.api.Assertions.assertEquals;
55

66
import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
7+
import io.substrait.isthmus.sql.SubstraitSqlToCalcite;
78
import io.substrait.plan.Plan;
89
import io.substrait.relation.NamedScan;
910
import java.util.List;
@@ -25,10 +26,8 @@ void preserveNamesFromSql() throws Exception {
2526
String query = "SELECT \"a\", \"B\" FROM foo GROUP BY a, b";
2627
List<String> expectedNames = List.of("a", "B");
2728

28-
List<org.apache.calcite.rel.RelRoot> calciteRelRoots = s.sqlToRelNode(query, catalogReader);
29-
assertEquals(1, calciteRelRoots.size());
30-
31-
org.apache.calcite.rel.RelRoot calciteRelRoot1 = calciteRelRoots.get(0);
29+
org.apache.calcite.rel.RelRoot calciteRelRoot1 =
30+
SubstraitSqlToCalcite.convertSelect(query, catalogReader);
3231
assertEquals(expectedNames, calciteRelRoot1.validatedRowType.getFieldNames());
3332

3433
io.substrait.plan.Plan.Root substraitRelRoot =

isthmus/src/test/java/io/substrait/isthmus/OptimizerIntegrationTest.java

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22

33
import static io.substrait.isthmus.SqlConverterBase.EXTENSION_COLLECTION;
44
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
5-
import static org.junit.jupiter.api.Assertions.assertEquals;
65

6+
import io.substrait.isthmus.sql.SubstraitSqlToCalcite;
77
import java.io.IOException;
8-
import java.util.List;
98
import org.apache.calcite.plan.hep.HepPlanner;
109
import org.apache.calcite.plan.hep.HepProgram;
1110
import org.apache.calcite.plan.hep.HepProgramBuilder;
@@ -24,11 +23,8 @@ void conversionHandlesBuiltInSum0CallAddedByRule() throws SqlParseException, IOE
2423
// verify that the query works generally
2524
assertFullRoundTrip(query);
2625

27-
SqlToSubstrait sqlConverter = new SqlToSubstrait();
28-
List<RelRoot> relRoots = sqlConverter.sqlToRelNode(query, TPCH_CATALOG);
29-
assertEquals(1, relRoots.size());
30-
RelRoot planRoot = relRoots.get(0);
31-
RelNode originalPlan = planRoot.rel;
26+
RelRoot relRoot = SubstraitSqlToCalcite.convertSelect(query, TPCH_CATALOG);
27+
RelNode originalPlan = relRoot.rel;
3228

3329
// Create a program to apply the AGGREGATE_EXPAND_DISTINCT_AGGREGATES_TO_JOIN rule.
3430
// This will introduce a SqlSumEmptyIsZeroAggFunction to the plan.
@@ -46,6 +42,6 @@ void conversionHandlesBuiltInSum0CallAddedByRule() throws SqlParseException, IOE
4642
assertDoesNotThrow(
4743
() ->
4844
// Conversion of the new plan should succeed
49-
SubstraitRelVisitor.convert(RelRoot.of(newPlan, planRoot.kind), EXTENSION_COLLECTION));
45+
SubstraitRelVisitor.convert(RelRoot.of(newPlan, relRoot.kind), EXTENSION_COLLECTION));
5046
}
5147
}

0 commit comments

Comments
 (0)