From 1f1fe390a3ced6dc5bab1790b431fd7a917a6ebd Mon Sep 17 00:00:00 2001 From: Thomas Rebele Date: Thu, 4 Sep 2025 17:21:14 +0200 Subject: [PATCH] HIVE-29176: Wrong result when HiveAntiJoin is replacing an IS NULL filter on a nullable column --- .../ql/optimizer/calcite/HiveCalciteUtil.java | 44 +-- .../calcite/rules/HiveAntiSemiJoinRule.java | 175 ++++++++--- .../rules/TestHiveAntiSemiJoinRule.java | 248 +++++++++++++++ .../calcite/rules/TestRuleHelper.java | 113 ++++++- .../test/queries/clientpositive/antijoin3.q | 64 ++++ .../clientpositive/llap/antijoin3.q.out | 292 ++++++++++++++++++ .../clientpositive/llap/llap_smb_ptf.q.out | 47 ++- 7 files changed, 868 insertions(+), 115 deletions(-) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveAntiSemiJoinRule.java create mode 100644 ql/src/test/queries/clientpositive/antijoin3.q create mode 100644 ql/src/test/results/clientpositive/llap/antijoin3.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index a19a6c74ec3b..7df2c5973171 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -1233,43 +1233,17 @@ public FixNullabilityShuttle(RexBuilder rexBuilder, } /** - * Checks if any of the expression given as list expressions are from right side of the join. - * This is used during anti join conversion. - * - * @param joinRel Join node whose right side has to be searched. - * @param expressions The list of expression to search. - * @return true if any of the expressions is from right side of join. + * Given a join, creates a bitset of the joined columns originating from the right-hand side. + * @param joinRel a join that concatenates all columns from its inputs (so no semi-join) + * @return a bitset */ - public static boolean hasAnyExpressionFromRightSide(RelNode joinRel, List expressions) { - List joinFields = joinRel.getRowType().getFieldList(); - int nTotalFields = joinFields.size(); - List leftFields = (joinRel.getInputs().get(0)).getRowType().getFieldList(); - int nFieldsLeft = leftFields.size(); - ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, nTotalFields); - - for (RexNode node : expressions) { - ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node); - if (rightBitmap.contains(inputBits)) { - return true; - } - } - return false; - } - - public static boolean hasAllExpressionsFromRightSide(RelNode joinRel, List expressions) { - List joinFields = joinRel.getRowType().getFieldList(); - int nTotalFields = joinFields.size(); - List leftFields = (joinRel.getInputs().get(0)).getRowType().getFieldList(); - int nFieldsLeft = leftFields.size(); - ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, nTotalFields); - - for (RexNode node : expressions) { - ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node); - if (!rightBitmap.contains(inputBits)) { - return false; - } + public static ImmutableBitSet getRightSideBitset(RelNode joinRel) { + if(joinRel.getInputs().size() != 2) { + throw new IllegalArgumentException("The relation must have exactly two children:\n" + RelOptUtil.toString(joinRel)); } - return true; + int nTotalFields = joinRel.getRowType().getFieldCount(); + int nFieldsLeft = (joinRel.getInputs().get(0)).getRowType().getFieldCount(); + return ImmutableBitSet.range(nFieldsLeft, nTotalFields); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java index 3697ec2c4aa1..c3e292797122 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java @@ -17,29 +17,40 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import com.google.common.collect.ImmutableList; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RexImplicationChecker; import org.apache.calcite.plan.Strong; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexExecutor; +import org.apache.calcite.rex.RexExecutorImpl; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; -import java.util.Collections; import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; +import java.util.Optional; /** * Planner rule that converts a join plus filter to anti join. @@ -86,14 +97,17 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join assert (filter != null); - List filterList = getResidualFilterNodes(filter, join); - if (filterList == null) { + ImmutableBitSet rhsFields = HiveCalciteUtil.getRightSideBitset(join); + Optional> optFilterList = getResidualFilterNodes(filter, join, rhsFields); + if (optFilterList.isEmpty()) { return; } + List filterList = optFilterList.get(); // If any projection is there from right side, then we can not convert to anti join. - boolean hasProjection = HiveCalciteUtil.hasAnyExpressionFromRightSide(join, project.getProjects()); - if (hasProjection) { + ImmutableBitSet projectedFields = RelOptUtil.InputFinder.bits(project.getProjects(), null); + boolean projectionUsesRHS = projectedFields.intersects(rhsFields); + if (projectionUsesRHS) { return; } @@ -119,13 +133,14 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join /** * Extracts the non-null filter conditions from given filter node. * - * @param filter The filter condition to be checked. - * @param join Join node whose right side has to be searched. + * @param filter The filter condition to be checked. + * @param join Join node whose right side has to be searched. + * @param rhsFields * @return null : Anti join condition is not matched for filter. - * Empty list : No residual filter conditions present. - * Valid list containing the filter to be applied after join. + * Empty list : No residual filter conditions present. + * Valid list containing the filter to be applied after join. */ - private List getResidualFilterNodes(Filter filter, Join join) { + private Optional> getResidualFilterNodes(Filter filter, Join join, ImmutableBitSet rhsFields) { // 1. If null filter is not present from right side then we can not convert to anti join. // 2. If any non-null filter is present from right side, we can not convert it to anti join. // 3. Keep other filters which needs to be executed after join. @@ -135,43 +150,123 @@ private List getResidualFilterNodes(Filter filter, Join join) { List aboveFilters = RelOptUtil.conjunctions(filter.getCondition()); boolean hasNullFilterOnRightSide = false; List filterList = new ArrayList<>(); + final ImmutableBitSet notNullColumnsFromRightSide = getNotNullColumnsFromRightSide(join); + for (RexNode filterNode : aboveFilters) { - if (filterNode.getKind() == SqlKind.IS_NULL) { - // Null filter from right side table can be removed and its a pre-condition for anti join conversion. - if (HiveCalciteUtil.hasAllExpressionsFromRightSide(join, Collections.singletonList(filterNode)) - && isStrong(((RexCall) filterNode).getOperands().get(0))) { - hasNullFilterOnRightSide = true; - } else { - filterList.add(filterNode); - } - } else { - if (HiveCalciteUtil.hasAnyExpressionFromRightSide(join, Collections.singletonList(filterNode))) { - // If some non null condition is present from right side, we can not convert the join to anti join as - // anti join does not project the fields from right side. - return null; - } else { - filterList.add(filterNode); - } + final ImmutableBitSet usedFields = RelOptUtil.InputFinder.bits(filterNode); + boolean usesFieldFromRHS = usedFields.intersects(rhsFields); + + if(!usesFieldFromRHS) { + // Only LHS fields or constants, so the filterNode is part of the residual filter + filterList.add(filterNode); + continue; + } + + // In the following we check for filter nodes that let us deduce that + // "an (originally) not-null column of RHS IS NULL because the LHS row will not be matched" + + if(filterNode.getKind() != SqlKind.IS_NULL) { + return Optional.empty(); + } + + boolean usesRHSFieldsOnly = rhsFields.contains(usedFields); + if (!usesRHSFieldsOnly) { + // If there is a mix between LHS and RHS fields, don't convert to anti-join + return Optional.empty(); + } + + // Null filter from right side table can be removed and it is a pre-condition for anti join conversion. + RexNode arg = ((RexCall) filterNode).getOperands().get(0); + if (isStrong(arg, notNullColumnsFromRightSide)) { + hasNullFilterOnRightSide = true; + } else if(!isStrong(arg, rhsFields)) { + // if all RHS fields are null and the IS NULL is still not fulfilled, bail out + return Optional.empty(); } } if (!hasNullFilterOnRightSide) { - return null; + return Optional.empty(); } - return filterList; + return Optional.of(filterList); } - private boolean isStrong(RexNode rexNode) { - AtomicBoolean hasCast = new AtomicBoolean(false); - rexNode.accept(new RexVisitorImpl(true) { - @Override - public Void visitCall(RexCall call) { - if (call.getKind() == SqlKind.CAST) { - hasCast.set(true); - } - return super.visitCall(call); + private ImmutableBitSet getNotNullColumnsFromRightSide(RelNode joinRel) { + // we need to shift the indices of the second child to the right + int shift = (joinRel.getInput(0)).getRowType().getFieldCount(); + ImmutableBitSet rhsNotnullColumns = deduceNotNullColumns(joinRel.getInput(1)); + return rhsNotnullColumns.shift(shift); + } + + /** + * Deduce which columns of the relNode are definitively NOT NULL. + */ + private ImmutableBitSet deduceNotNullColumns(RelNode relNode) { + // adapted from org.apache.calcite.plan.RelOptUtil.containsNullableFields + RelOptCluster cluster = relNode.getCluster(); + final RexBuilder rexBuilder = cluster.getRexBuilder(); + final RelMetadataQuery mq = cluster.getMetadataQuery(); + ImmutableBitSet.Builder result = ImmutableBitSet.builder(); + ImmutableBitSet.Builder candidatesBuilder = ImmutableBitSet.builder(); + List fieldList = relNode.getRowType().getFieldList(); + for (int i=0; i preds = predicates.pulledUpPredicates; + final List antecedent = new ArrayList<>(preds); + final RexNode first = RexUtil.composeConjunction(rexBuilder, antecedent); + for (int c : candidates) { + RelDataTypeField field = fieldList.get(c); + final RexNode second = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, + rexBuilder.makeInputRef(field.getType(), field.getIndex())); + // Suppose we have EMP(empno INT NOT NULL, mgr INT), + // and predicates [empno > 0, mgr > 0]. + // We make first: "empno > 0 AND mgr > 0" + // and second: "mgr IS NOT NULL" + // and ask whether first implies second. + // It does, so we have no nullable columns. + if(checker.implies(first, second)) { + result.set(c); } - }); - return !hasCast.get() && Strong.isStrong(rexNode); + } + return result.build(); + } + + private boolean isStrong(RexNode rexNode, ImmutableBitSet rightSideBitset) { + try { + rexNode.accept(new RexVisitorImpl(true) { + @Override + public Void visitCall(RexCall call) { + if (call.getKind() == SqlKind.CAST) { + throw Util.FoundOne.NULL; + } + return super.visitCall(call); + } + }); + } catch (Util.FoundOne e) { + // Hive's CAST might introduce NULL for NOT NULL fields + return false; + } + return Strong.isNull(rexNode, rightSideBitset); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveAntiSemiJoinRule.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveAntiSemiJoinRule.java new file mode 100644 index 000000000000..97e808eaa40c --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveAntiSemiJoinRule.java @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.runtime.Hook; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.tools.RelBuilder; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.Collections; + +import static org.apache.hadoop.hive.ql.optimizer.calcite.rules.TestRuleHelper.*; + +@RunWith(MockitoJUnitRunner.class) +public class TestHiveAntiSemiJoinRule { + + PlanFixture fixture() { + RelOptPlanner planner = buildPlanner(Collections.singletonList(HiveAntiSemiJoinRule.INSTANCE)); + // executor is needed to determine nullability of RHS columns + planner.setExecutor(new HiveRexExecutorImpl()); + return new PlanFixture(planner) + .registerTable("t1", T1Record.class) + .registerTable("t2", T2Record.class) + .registerTable("t3", T3Record.class); + } + + @Test + public void testFilterOnNullableColumn() { + PlanFixture fixture = fixture(); + RelBuilder b = fixture.createRelBuilder(); + + // @formatter:off + RelNode plan = b + .scan("t1") + .scan("t2") + .join(JoinRelType.LEFT, b.equals( + b.field(2, 0, "t1nullable"), + b.field(2, 1, "t2id"))) + .filter(b.isNull(b.field("t2nullable"))) + .project(b.field("t1id")) + .build(); + + String expectedPlan = "HiveProject(t1id=[$0])\n" + + " HiveFilter(condition=[IS NULL($5)])\n" + + " HiveJoin(condition=[=($2, $3)], joinType=[left], algorithm=[none], cost=[not available])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " LogicalTableScan(table=[[t2]])\n"; + // @formatter:on + + assertPlans(fixture.getPlanner(), plan, expectedPlan, expectedPlan); + } + + @Test + public void testFilterOnFormerlyNullableColumn() { + PlanFixture fixture = fixture(); + RelBuilder b = fixture.createRelBuilder(); + + // @formatter:off + RelNode plan = b + .scan("t1") + .scan("t2") + .filter(b.isNotNull(b.field("t2nullable"))) + .join(JoinRelType.LEFT, b.equals( + b.field(2, 0, "t1nullable"), + b.field(2, 1, "t2nullable"))) + // the IS NOT NULL on the RHS ensures that the values + // we get from t2nullable are actually NOT NULL + .filter(b.isNull(b.field("t2nullable"))) + .project(b.field("t1id")) + .build(); + + String prePlan = "HiveProject(t1id=[$0])\n" + + " HiveFilter(condition=[IS NULL($5)])\n" + + " HiveJoin(condition=[=($2, $5)], joinType=[left], algorithm=[none], cost=[not available])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " HiveFilter(condition=[IS NOT NULL($2)])\n" + + " LogicalTableScan(table=[[t2]])\n"; + + String postPlan = "HiveProject(t1id=[$0])\n" + + " HiveAntiJoin(condition=[=($2, $5)], joinType=[anti])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " HiveFilter(condition=[IS NOT NULL($2)])\n" + + " LogicalTableScan(table=[[t2]])\n"; + // @formatter:on + + assertPlans(fixture.getPlanner(), plan, prePlan, postPlan); + } + + + @Test + public void testFilterIsNullFromBothSides() { + PlanFixture fixture = fixture(); + + RelNode plan; + try (Hook.Closeable ignore = Hook.REL_BUILDER_SIMPLIFY.addThread(Hook.propertyJ(false))) { + RelBuilder b = fixture.createRelBuilder(); + // @formatter:off + plan = b.scan("t1") + .scan("t2") + .join(JoinRelType.LEFT, b.equals(b.field(2, 0, "t1nullable"), b.field(2, 1, "t2id"))) + .filter(b.isNull(b.call(SqlStdOperatorTable.PLUS, b.field("t2nullable"), b.field("t1nullable")))) + .project(b.field("t1id")).build(); + // @formatter:on + } + + // @formatter:off + String expectedPlan = "HiveProject(t1id=[$0])\n" + + " HiveFilter(condition=[IS NULL(+($5, $2))])\n" + + " HiveJoin(condition=[=($2, $3)], joinType=[left], algorithm=[none], cost=[not available])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " LogicalTableScan(table=[[t2]])\n"; + // @formatter:on + + assertPlans(fixture.getPlanner(), plan, expectedPlan, expectedPlan); + } + + @Test + public void testFilterOnNotNullColumn() { + PlanFixture fixture = fixture(); + RelBuilder b = fixture.createRelBuilder(); + + // @formatter:off + RelNode plan = b + .scan("t1") + .scan("t2") + .join(JoinRelType.LEFT, b.equals( + b.field(2, 0, "t1nullable"), + b.field(2, 1, "t2id"))) + .filter(b.isNull(b.field("t2notnull"))) + .project(b.field("t1id")) + .build(); + + String prePlan = "HiveProject(t1id=[$0])\n" + + " HiveFilter(condition=[IS NULL($4)])\n" + + " HiveJoin(condition=[=($2, $3)], joinType=[left], algorithm=[none], cost=[not available])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " LogicalTableScan(table=[[t2]])\n"; + + String postPlan = "HiveProject(t1id=[$0])\n" + + " HiveAntiJoin(condition=[=($2, $3)], joinType=[anti])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " LogicalTableScan(table=[[t2]])\n"; + // @formatter:on + + assertPlans(fixture.getPlanner(), plan, prePlan, postPlan); + } + + /** Check RHS without any nullable columns */ + @Test + public void testFilterOnNotNullColumn2() { + PlanFixture fixture = fixture(); + RelBuilder b = fixture.createRelBuilder(); + + // @formatter:off + RelNode plan = b + .scan("t1") + .scan("t3") + .join(JoinRelType.LEFT, b.equals( + b.field(2, 0, "t1nullable"), + b.field(2, 1, "t3id"))) + .filter(b.isNull(b.field("t3notnull"))) + .project(b.field("t1id")) + .build(); + + String prePlan = "HiveProject(t1id=[$0])\n" + + " HiveFilter(condition=[IS NULL($4)])\n" + + " HiveJoin(condition=[=($2, $3)], joinType=[left], algorithm=[none], cost=[not available])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " LogicalTableScan(table=[[t3]])\n"; + + String postPlan = "HiveProject(t1id=[$0])\n" + + " HiveAntiJoin(condition=[=($2, $3)], joinType=[anti])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " LogicalTableScan(table=[[t3]])\n"; + // @formatter:on + + assertPlans(fixture.getPlanner(), plan, prePlan, postPlan); + } + + @Test + public void testFilterOnNullAndNotNullColumn() { + PlanFixture fixture = fixture(); + RelBuilder b = fixture.createRelBuilder(); + + // @formatter:off + RelNode plan = b + .scan("t1") + .scan("t2") + .join(JoinRelType.LEFT, b.equals( + b.field(2, 0, "t1nullable"), + b.field(2, 1, "t2id"))) + .filter(b.and(b.isNull(b.field("t2notnull")), b.isNull((b.field("t2nullable"))))) + .project(b.field("t1id")) + .build(); + + String prePlan = "HiveProject(t1id=[$0])\n" + + " HiveFilter(condition=[AND(IS NULL($4), IS NULL($5))])\n" + + " HiveJoin(condition=[=($2, $3)], joinType=[left], algorithm=[none], cost=[not available])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " LogicalTableScan(table=[[t2]])\n"; + + String postPlan = "HiveProject(t1id=[$0])\n" + + " HiveAntiJoin(condition=[=($2, $3)], joinType=[anti])\n" + + " LogicalTableScan(table=[[t1]])\n" + + " LogicalTableScan(table=[[t2]])\n"; + // @formatter:on + + assertPlans(fixture.getPlanner(), plan, prePlan, postPlan); + } + + static class T1Record { + public int t1id; + public int t1notnull; + public Integer t1nullable; + } + + static class T2Record { + public int t2id; + public int t2notnull; + public Integer t2nullable; + } + + static class T3Record { + public int t3id; + public int t3notnull; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestRuleHelper.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestRuleHelper.java index 8c49f58c4245..937e2c0499db 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestRuleHelper.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestRuleHelper.java @@ -19,12 +19,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import org.apache.calcite.jdbc.JavaTypeFactoryImpl; -import org.apache.calcite.plan.AbstractRelOptPlanner; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptSchema; -import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.*; import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.RelNode; @@ -37,19 +32,13 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; -import org.junit.Test; -import org.junit.runner.RunWith; import org.mockito.ArgumentMatchers; -import org.mockito.Mock; -import org.mockito.junit.MockitoJUnitRunner; -import java.util.Collection; -import java.util.Collections; +import java.util.*; import static org.junit.Assert.assertEquals; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.*; public class TestRuleHelper { @@ -73,7 +62,99 @@ public static AbstractRelOptPlanner buildPlanner(Collection rules) { return new HepPlanner(programBuilder.build()); } - public static RelBuilder buildRelBuilder(AbstractRelOptPlanner planner, + public static class HiveTableMock { + final Class recordClass; + final RelOptHiveTable tableMock; + final Table hiveTableMock; + final List name; + + public HiveTableMock(List name, Class recordClass, PlanFixture mockBuilder) { + this.name = List.copyOf(name); + this.recordClass = recordClass; + tableMock = mock(RelOptHiveTable.class); + hiveTableMock = mock(Table.class); + + RelDataType rowTypeMock = JAVA_TYPE_FACTORY.createStructType(recordClass); + doReturn(rowTypeMock).when(tableMock).getRowType(); + + LogicalTableScan tableScan = LogicalTableScan.create(mockBuilder.optCluster, tableMock, Collections.emptyList()); + doReturn(tableScan).when(tableMock).toRel(ArgumentMatchers.any()); + + doReturn(this.name).when(tableMock).getQualifiedName(); + + lenient().doReturn(hiveTableMock).when(tableMock).getHiveTableMD(); + } + } + + /** + * A fixture for creating plans with HiveRelNodes. + */ + public static class PlanFixture { + final RelOptCluster optCluster; + + final Map, HiveTableMock> tables = new HashMap<>(); + + Class defaultRecordClass; + + public PlanFixture(RelOptPlanner planner) { + RexBuilder rexBuilder = new RexBuilder(JAVA_TYPE_FACTORY); + optCluster = RelOptCluster.create(planner, rexBuilder); + } + + /** + * Register a table in the schema, using the attributes of the class as columns. + */ + public PlanFixture registerTable(String name, Class recordClass) { + return registerTable(List.of(name), recordClass); + } + + /** + * Similar to {@link #registerTable(String, Class)}, but with a qualified name. + *

+ * See {@link RelOptTable#getQualifiedName()}. + */ + public PlanFixture registerTable(List name, Class recordClass) { + name = List.copyOf(name); + tables.put(name, new HiveTableMock(name, recordClass, this)); + return this; + } + + /** + * Allows to use any table names when scanning. + *

+ * The scanned table will provide the attributes of the class as columns. + */ + public PlanFixture setDefaultRecordClass(Class recordClass) { + this.defaultRecordClass = recordClass; + return this; + } + + public RelOptPlanner getPlanner() { + return optCluster.getPlanner(); + } + + public RelBuilder createRelBuilder() { + final RelOptSchema schemaMock; + schemaMock = mock(RelOptSchema.class); + // create a copy that we can modify in our method + Map, HiveTableMock> tableMap = new HashMap<>(tables); + + when(schemaMock.getTableForMember(any())).thenAnswer(i -> { + List tableName = i.getArgument(0); + HiveTableMock hiveTableMock = tableMap.get(tableName); + if(hiveTableMock == null) { + Objects.requireNonNull(defaultRecordClass, "Table " + tableName + " was not registered with the mock, and no default table provided"); + hiveTableMock = new HiveTableMock(tableName, defaultRecordClass, this); + tableMap.put(tableName, hiveTableMock); + } + return hiveTableMock.tableMock; + } ); + + return HiveRelFactories.HIVE_BUILDER.create(optCluster, schemaMock); + } + } + + public static RelBuilder buildRelBuilder(RelOptPlanner planner, RelOptSchema schemaMock, RelOptHiveTable tableMock, Table hiveTableMock, Class clazz) { RexBuilder rexBuilder = new RexBuilder(JAVA_TYPE_FACTORY); @@ -101,7 +182,7 @@ static RexNode and(RelBuilder relBuilder, RexNode... args) { return relBuilder.call(SqlStdOperatorTable.AND, args); } - static void assertPlans(AbstractRelOptPlanner planner, RelNode plan, String expectedPrePlan, String expectedPostPlan) { + static void assertPlans(RelOptPlanner planner, RelNode plan, String expectedPrePlan, String expectedPostPlan) { planner.setRoot(plan); RelNode optimizedRelNode = planner.findBestExp(); assertEquals("Original plans do not match", expectedPrePlan, RelOptUtil.toString(plan)); diff --git a/ql/src/test/queries/clientpositive/antijoin3.q b/ql/src/test/queries/clientpositive/antijoin3.q new file mode 100644 index 000000000000..81cb5ce9954e --- /dev/null +++ b/ql/src/test/queries/clientpositive/antijoin3.q @@ -0,0 +1,64 @@ +SET hive.vectorized.execution.enabled=false; +set hive.mapred.mode=nonstrict; +SET hive.auto.convert.join=false; +SET hive.auto.convert.anti.join=true; +-- SORT_QUERY_RESULTS + +create table antijoin3_t1 (t1id int not null, t1notnull string not null, t1nullable string); +create table antijoin3_t2 (t2id int not null, t2notnull string not null, t2nullable string); +create table antijoin3_t3 (t3id int not null); + +insert into antijoin3_t1 values +(0, "val_0", null), +(1, "val_1", null), +(2, "val_2", "val_2"), +(3, "val_3", "val_3"), +(4, "val_4", "val_4"); + +insert into antijoin3_t2 values +(0, "val_0", null), +(1, "val_1", null), +(4, "val_4", "val_4"), +(5, "val_5", "val_5"); + +insert into antijoin3_t3 values (0), (4), (6); + +-- do not introduce anti-join if filtering a nullable column with IS NULL +explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2nullable is null; +select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2nullable is null; + +-- but introduce anti-join if filtering a NOT NULL column with IS NULL +explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null; +select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null; + +-- play it safe and do not introduce antijoin for filters combining LHS and RHS columns +explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null; +select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null; + +-- selecting constants do not prevent an anti-join (HIVE-29164) +explain cbo select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null; +select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null; + +-- check whether nullability is propagated correctly +explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id +where t2notnull is null; + +select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id +where t2notnull is null; + +explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id +-- t3id is from the RHS of the left join, so it becomes nullable, so no antijoin +where t3id is null; + +select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id +-- t3id is from the RHS of the left join, so it becomes nullable, so no antijoin +where t3id is null; + diff --git a/ql/src/test/results/clientpositive/llap/antijoin3.q.out b/ql/src/test/results/clientpositive/llap/antijoin3.q.out new file mode 100644 index 000000000000..a05912d87d5c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/antijoin3.q.out @@ -0,0 +1,292 @@ +PREHOOK: query: create table antijoin3_t1 (t1id int not null, t1notnull string not null, t1nullable string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@antijoin3_t1 +POSTHOOK: query: create table antijoin3_t1 (t1id int not null, t1notnull string not null, t1nullable string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@antijoin3_t1 +PREHOOK: query: create table antijoin3_t2 (t2id int not null, t2notnull string not null, t2nullable string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@antijoin3_t2 +POSTHOOK: query: create table antijoin3_t2 (t2id int not null, t2notnull string not null, t2nullable string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@antijoin3_t2 +PREHOOK: query: create table antijoin3_t3 (t3id int not null) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@antijoin3_t3 +POSTHOOK: query: create table antijoin3_t3 (t3id int not null) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@antijoin3_t3 +PREHOOK: query: insert into antijoin3_t1 values +(0, "val_0", null), +(1, "val_1", null), +(2, "val_2", "val_2"), +(3, "val_3", "val_3"), +(4, "val_4", "val_4") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@antijoin3_t1 +POSTHOOK: query: insert into antijoin3_t1 values +(0, "val_0", null), +(1, "val_1", null), +(2, "val_2", "val_2"), +(3, "val_3", "val_3"), +(4, "val_4", "val_4") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@antijoin3_t1 +POSTHOOK: Lineage: antijoin3_t1.t1id SCRIPT [] +POSTHOOK: Lineage: antijoin3_t1.t1notnull SCRIPT [] +POSTHOOK: Lineage: antijoin3_t1.t1nullable SCRIPT [] +PREHOOK: query: insert into antijoin3_t2 values +(0, "val_0", null), +(1, "val_1", null), +(4, "val_4", "val_4"), +(5, "val_5", "val_5") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@antijoin3_t2 +POSTHOOK: query: insert into antijoin3_t2 values +(0, "val_0", null), +(1, "val_1", null), +(4, "val_4", "val_4"), +(5, "val_5", "val_5") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@antijoin3_t2 +POSTHOOK: Lineage: antijoin3_t2.t2id SCRIPT [] +POSTHOOK: Lineage: antijoin3_t2.t2notnull SCRIPT [] +POSTHOOK: Lineage: antijoin3_t2.t2nullable SCRIPT [] +PREHOOK: query: insert into antijoin3_t3 values (0), (4), (6) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@antijoin3_t3 +POSTHOOK: query: insert into antijoin3_t3 values (0), (4), (6) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@antijoin3_t3 +POSTHOOK: Lineage: antijoin3_t3.t3id SCRIPT [] +PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2nullable is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2nullable is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2]) + HiveFilter(condition=[IS NULL($4)]) + HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2]) + HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1]) + HiveProject(t2id=[$0], t2nullable=[$2]) + HiveTableScan(table=[[default, antijoin3_t2]], table:alias=[t2]) + +PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2nullable is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2nullable is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +0 val_0 NULL +1 val_1 NULL +2 val_2 val_2 +3 val_3 val_3 +PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +CBO PLAN: +HiveAntiJoin(condition=[=($0, $3)], joinType=[anti]) + HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2]) + HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1]) + HiveProject(t2id=[$0], t2notnull=[$1]) + HiveTableScan(table=[[default, antijoin3_t2]], table:alias=[t2]) + +PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +2 val_2 val_2 +3 val_3 val_3 +PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +CBO PLAN: +HiveValues(tuples=[[]]) + +PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2], _o__c3=[_UTF-16LE'foo':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) + HiveAntiJoin(condition=[=($0, $3)], joinType=[anti]) + HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2]) + HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1]) + HiveProject(t2id=[$0], t2notnull=[$1]) + HiveTableScan(table=[[default, antijoin3_t2]], table:alias=[t2]) + +PREHOOK: query: select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +#### A masked pattern was here #### +2 val_2 val_2 foo +3 val_3 val_3 foo +PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id +where t2notnull is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +PREHOOK: Input: default@antijoin3_t3 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id +where t2notnull is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +POSTHOOK: Input: default@antijoin3_t3 +#### A masked pattern was here #### +CBO PLAN: +HiveAntiJoin(condition=[=($0, $3)], joinType=[anti]) + HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2]) + HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1]) + HiveProject(t2id=[$0], t2notnull=[$1], t3id=[$2]) + HiveJoin(condition=[=($0, $2)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(t2id=[$0], t2notnull=[$1]) + HiveTableScan(table=[[default, antijoin3_t2]], table:alias=[antijoin3_t2]) + HiveProject(t3id=[$0]) + HiveTableScan(table=[[default, antijoin3_t3]], table:alias=[antijoin3_t3]) + +PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id +where t2notnull is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +PREHOOK: Input: default@antijoin3_t3 +#### A masked pattern was here #### +POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id +where t2notnull is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +POSTHOOK: Input: default@antijoin3_t3 +#### A masked pattern was here #### +2 val_2 val_2 +3 val_3 val_3 +PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id + +where t3id is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +PREHOOK: Input: default@antijoin3_t3 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id + +where t3id is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +POSTHOOK: Input: default@antijoin3_t3 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2]) + HiveFilter(condition=[IS NULL($4)]) + HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2]) + HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1]) + HiveJoin(condition=[=($0, $1)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(t2id=[$0]) + HiveTableScan(table=[[default, antijoin3_t2]], table:alias=[antijoin3_t2]) + HiveProject(t3id=[$0]) + HiveTableScan(table=[[default, antijoin3_t3]], table:alias=[antijoin3_t3]) + +PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id + +where t3id is null +PREHOOK: type: QUERY +PREHOOK: Input: default@antijoin3_t1 +PREHOOK: Input: default@antijoin3_t2 +PREHOOK: Input: default@antijoin3_t3 +#### A masked pattern was here #### +POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 +left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq +on t1id=t2id + +where t3id is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@antijoin3_t1 +POSTHOOK: Input: default@antijoin3_t2 +POSTHOOK: Input: default@antijoin3_t3 +#### A masked pattern was here #### +1 val_1 NULL +2 val_2 val_2 +3 val_3 val_3 diff --git a/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out b/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out index f9f128b394c6..ba1d7fdde052 100644 --- a/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out @@ -586,12 +586,18 @@ STAGE PLANS: expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: @@ -663,8 +669,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 304 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col7 (type: int), _col0 (type: smallint), _col1 (type: string), _col2 (type: smallint), _col3 (type: string), _col5 (type: smallint), _col6 (type: string), _col8 (type: smallint), _col9 (type: smallint), _col10 (type: string), _col11 (type: string), _col12 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + expressions: _col7 (type: int), _col0 (type: smallint), _col1 (type: string), _col2 (type: smallint), _col3 (type: string), _col5 (type: smallint), _col6 (type: string), _col8 (type: smallint), _col9 (type: smallint), _col11 (type: string), _col12 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 1 Data size: 304 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -672,36 +678,29 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 304 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: smallint), _col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col6 (type: smallint), _col7 (type: string), _col8 (type: smallint), _col9 (type: smallint), _col10 (type: string), _col11 (type: string), _col12 (type: string) + value expressions: _col1 (type: smallint), _col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col5 (type: smallint), _col6 (type: string), _col7 (type: smallint), _col8 (type: smallint), _col9 (type: string), _col10 (type: string), _col11 (type: string) Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Anti Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col14 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 1 Data size: 334 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: smallint), _col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col6 (type: smallint), _col7 (type: string), _col8 (type: smallint), _col9 (type: smallint), _col11 (type: string), _col12 (type: string), _col10 (type: string), _col14 (type: int) + expressions: _col0 (type: int), _col1 (type: smallint), _col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col5 (type: smallint), _col6 (type: string), _col7 (type: smallint), _col8 (type: smallint), _col9 (type: string), _col10 (type: string), _col11 (type: string), '201611160940' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 1 Data size: 334 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: _col12 is null (type: boolean) + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 334 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: smallint), _col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col5 (type: smallint), _col6 (type: string), _col7 (type: smallint), _col8 (type: smallint), _col9 (type: string), _col10 (type: string), _col11 (type: string), '201611160940' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 334 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 334 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: