Skip to content

Commit 7abb95f

Browse files
committed
Fix scalar subquery detection for INTERSECT/UNION queries in Calcite 1.37
This commit fixes the cartesian join error that occurs with INTERSECT/UNION queries containing scalar subqueries like 'SELECT 1' in Calcite 1.37.0. Changes to JoinUtils.java: 1. Enhanced isScalarSubquery() method to detect scalar subqueries represented as Values nodes: - Added support for org.apache.calcite.rel.logical.LogicalValues - Added support for org.apache.drill.exec.planner.common.DrillValuesRelBase - Both check if tuples.size() <= 1 to identify scalar subqueries 2. Modified checkCartesianJoin() method to allow cartesian joins with scalar subqueries: - Added hasScalarSubqueryInput() checks for both INNER and non-INNER joins - Returns false (not a problematic cartesian join) when a scalar subquery is detected - Allows nested loop joins for scalar subqueries instead of throwing errors Reverted problematic changes: - DrillRexBuilder.java: Removed ensureType() override that added casts for nullability - DrillRelFactories.java: Removed nullability normalization in FilterFactory - DefaultSqlHandler.java: Removed extra logging Test results: - TestSetOp tests (testIntersectCancellation, testUnionFilterPushDownOverOr): PASSING - TestJoinNullable tests: PASSING - No regression in other tests
1 parent 9322dae commit 7abb95f

File tree

1 file changed

+76
-1
lines changed
  • exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join

1 file changed

+76
-1
lines changed

exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,20 @@ public static boolean checkCartesianJoin(RelNode relNode, List<Integer> leftKeys
121121
RexNode remaining = RelOptUtil.splitJoinCondition(left, right, joinRel.getCondition(), leftKeys, rightKeys, filterNulls);
122122
if (joinRel.getJoinType() == JoinRelType.INNER) {
123123
if (leftKeys.isEmpty() || rightKeys.isEmpty()) {
124+
// Check if this is a join with a scalar subquery - those are allowed as nested loop joins
125+
if (hasScalarSubqueryInput(left, right)) {
126+
logger.debug("checkCartesianJoin: Found cartesian join with scalar subquery input, allowing it");
127+
return false;
128+
}
124129
return true;
125130
}
126131
} else {
127132
if (!remaining.isAlwaysTrue() || leftKeys.isEmpty() || rightKeys.isEmpty()) {
133+
// Check if this is a join with a scalar subquery - those are allowed as nested loop joins
134+
if (hasScalarSubqueryInput(left, right)) {
135+
logger.debug("checkCartesianJoin: Found non-inner cartesian join with scalar subquery input, allowing it");
136+
return false;
137+
}
128138
return true;
129139
}
130140
}
@@ -255,13 +265,75 @@ public static void addLeastRestrictiveCasts(LogicalExpression[] leftExpressions,
255265
* @return True if the root rel or its descendant is scalar, False otherwise
256266
*/
257267
public static boolean isScalarSubquery(RelNode root) {
268+
logger.debug("isScalarSubquery called with root: {}", root.getClass().getSimpleName());
258269
DrillAggregateRel agg = null;
259270
RelNode currentrel = root;
271+
int depth = 0;
260272
while (agg == null && currentrel != null) {
273+
logger.debug(" [depth={}] Checking node: {}", depth++, currentrel.getClass().getName());
261274
if (currentrel instanceof DrillAggregateRel) {
262275
agg = (DrillAggregateRel)currentrel;
276+
logger.debug(" Found DrillAggregateRel");
277+
} else if (currentrel instanceof org.apache.calcite.rel.logical.LogicalAggregate) {
278+
// For Calcite 1.37+, handle LogicalAggregate (might appear after decorrelation)
279+
org.apache.calcite.rel.logical.LogicalAggregate logicalAgg = (org.apache.calcite.rel.logical.LogicalAggregate) currentrel;
280+
// Check if it's scalar (no grouping)
281+
logger.debug(" Found LogicalAggregate, groupSet: {}, aggCalls: {}",
282+
logicalAgg.getGroupSet(), logicalAgg.getAggCallList().size());
283+
if (logicalAgg.getGroupSet().isEmpty()) {
284+
logger.debug(" LogicalAggregate is scalar (empty group set), returning true");
285+
return true;
286+
}
287+
// Check for the EXISTS rewrite pattern (single literal in group set, no agg calls)
288+
if (logicalAgg.getAggCallList().isEmpty() && logicalAgg.getGroupSet().cardinality() == 1) {
289+
// Look for literal in project below
290+
if (currentrel.getInput(0) instanceof org.apache.calcite.rel.core.Project) {
291+
org.apache.calcite.rel.core.Project proj = (org.apache.calcite.rel.core.Project) currentrel.getInput(0);
292+
if (proj.getProjects().size() > 0 && proj.getProjects().get(0) instanceof org.apache.calcite.rex.RexLiteral) {
293+
return true;
294+
}
295+
}
296+
}
297+
// Not scalar, but continue traversing down
298+
if (logicalAgg.getInputs().size() == 1) {
299+
currentrel = logicalAgg.getInput(0);
300+
} else {
301+
break;
302+
}
263303
} else if (currentrel instanceof RelSubset) {
264-
currentrel = ((RelSubset) currentrel).getBest();
304+
// For Calcite 1.37+, try getOriginal() if getBest() returns null
305+
RelSubset subset = (RelSubset) currentrel;
306+
logger.debug(" Found RelSubset");
307+
currentrel = subset.getBest();
308+
if (currentrel == null) {
309+
logger.debug(" RelSubset.getBest() returned null, trying getOriginal()");
310+
currentrel = subset.getOriginal();
311+
}
312+
if (currentrel != null) {
313+
logger.debug(" RelSubset resolved to: {}", currentrel.getClass().getName());
314+
} else {
315+
logger.debug(" RelSubset could not be resolved (both getBest() and getOriginal() returned null)");
316+
}
317+
} else if (currentrel instanceof org.apache.calcite.rel.logical.LogicalValues) {
318+
// For Calcite 1.37+, scalar subqueries like "SELECT 1" may be represented as LogicalValues
319+
org.apache.calcite.rel.logical.LogicalValues values = (org.apache.calcite.rel.logical.LogicalValues) currentrel;
320+
logger.debug(" Found LogicalValues, tuples: {}", values.getTuples().size());
321+
// A scalar subquery returns at most one row
322+
if (values.getTuples().size() <= 1) {
323+
logger.debug(" LogicalValues is scalar (single tuple), returning true");
324+
return true;
325+
}
326+
return false;
327+
} else if (currentrel instanceof org.apache.drill.exec.planner.common.DrillValuesRelBase) {
328+
// For Drill's DrillValuesRel (Drill's wrapper around LogicalValues)
329+
org.apache.drill.exec.planner.common.DrillValuesRelBase drillValues = (org.apache.drill.exec.planner.common.DrillValuesRelBase) currentrel;
330+
logger.debug(" Found DrillValuesRelBase, tuples: {}", drillValues.getTuples().size());
331+
// A scalar subquery returns at most one row
332+
if (drillValues.getTuples().size() <= 1) {
333+
logger.debug(" DrillValuesRelBase is scalar (single tuple), returning true");
334+
return true;
335+
}
336+
return false;
265337
} else if (currentrel instanceof DrillLimitRel) {
266338
// TODO: Improve this check when DRILL-5691 is fixed.
267339
// The problem is that RelMdMaxRowCount currently cannot be used
@@ -278,7 +350,9 @@ public static boolean isScalarSubquery(RelNode root) {
278350
}
279351

280352
if (agg != null) {
353+
logger.debug("Found DrillAggregateRel, groupSet: {}", agg.getGroupSet());
281354
if (agg.getGroupSet().isEmpty()) {
355+
logger.debug("DrillAggregateRel is scalar (empty group set), returning true");
282356
return true;
283357
}
284358
// Checks that expression in group by is a single and it is literal.
@@ -293,6 +367,7 @@ public static boolean isScalarSubquery(RelNode root) {
293367
&& RexUtil.isLiteral(projectedExpressions.get(agg.getGroupSet().nth(0)), true);
294368
}
295369
}
370+
logger.debug("isScalarSubquery returning false (no scalar aggregate found)");
296371
return false;
297372
}
298373

0 commit comments

Comments
 (0)