Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1233,43 +1233,17 @@ public FixNullabilityShuttle(RexBuilder rexBuilder,
}

/**
* Checks if any of the expression given as list expressions are from right side of the join.
* This is used during anti join conversion.
*
* @param joinRel Join node whose right side has to be searched.
* @param expressions The list of expression to search.
* @return true if any of the expressions is from right side of join.
* Given a join, creates a bitset of the joined columns originating from the right-hand side.
* @param joinRel a join that concatenates all columns from its inputs (so no semi-join)
* @return a bitset
*/
public static boolean hasAnyExpressionFromRightSide(RelNode joinRel, List<RexNode> expressions) {
List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
int nTotalFields = joinFields.size();
List<RelDataTypeField> leftFields = (joinRel.getInputs().get(0)).getRowType().getFieldList();
int nFieldsLeft = leftFields.size();
ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, nTotalFields);

for (RexNode node : expressions) {
ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node);
if (rightBitmap.contains(inputBits)) {
return true;
}
}
return false;
}

public static boolean hasAllExpressionsFromRightSide(RelNode joinRel, List<RexNode> expressions) {
List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
int nTotalFields = joinFields.size();
List<RelDataTypeField> leftFields = (joinRel.getInputs().get(0)).getRowType().getFieldList();
int nFieldsLeft = leftFields.size();
ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, nTotalFields);

for (RexNode node : expressions) {
ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node);
if (!rightBitmap.contains(inputBits)) {
return false;
}
public static ImmutableBitSet getRightSideBitset(RelNode joinRel) {
if(joinRel.getInputs().size() != 2) {
throw new IllegalArgumentException("The relation must have exactly two children:\n" + RelOptUtil.toString(joinRel));
}
return true;
int nTotalFields = joinRel.getRowType().getFieldCount();
int nFieldsLeft = (joinRel.getInputs().get(0)).getRowType().getFieldCount();
return ImmutableBitSet.range(nFieldsLeft, nTotalFields);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,40 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;

import com.google.common.collect.ImmutableList;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptPredicateList;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.RexImplicationChecker;
import org.apache.calcite.plan.Strong;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexExecutor;
import org.apache.calcite.rex.RexExecutorImpl;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitorImpl;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Util;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.Optional;

/**
* Planner rule that converts a join plus filter to anti join.
Expand Down Expand Up @@ -86,14 +97,17 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join

assert (filter != null);

List<RexNode> filterList = getResidualFilterNodes(filter, join);
if (filterList == null) {
ImmutableBitSet rhsFields = HiveCalciteUtil.getRightSideBitset(join);
Optional<List<RexNode>> optFilterList = getResidualFilterNodes(filter, join, rhsFields);
if (optFilterList.isEmpty()) {
return;
}
List<RexNode> filterList = optFilterList.get();

// If any projection is there from right side, then we can not convert to anti join.
boolean hasProjection = HiveCalciteUtil.hasAnyExpressionFromRightSide(join, project.getProjects());
if (hasProjection) {
ImmutableBitSet projectedFields = RelOptUtil.InputFinder.bits(project.getProjects(), null);
boolean projectionUsesRHS = projectedFields.intersects(rhsFields);
if (projectionUsesRHS) {
return;
}

Expand All @@ -119,13 +133,14 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
/**
* Extracts the non-null filter conditions from given filter node.
*
* @param filter The filter condition to be checked.
* @param join Join node whose right side has to be searched.
* @param filter The filter condition to be checked.
* @param join Join node whose right side has to be searched.
* @param rhsFields
* @return null : Anti join condition is not matched for filter.
* Empty list : No residual filter conditions present.
* Valid list containing the filter to be applied after join.
* Empty list : No residual filter conditions present.
* Valid list containing the filter to be applied after join.
*/
private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
private Optional<List<RexNode>> getResidualFilterNodes(Filter filter, Join join, ImmutableBitSet rhsFields) {
// 1. If null filter is not present from right side then we can not convert to anti join.
// 2. If any non-null filter is present from right side, we can not convert it to anti join.
// 3. Keep other filters which needs to be executed after join.
Expand All @@ -135,43 +150,123 @@ private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
List<RexNode> aboveFilters = RelOptUtil.conjunctions(filter.getCondition());
boolean hasNullFilterOnRightSide = false;
List<RexNode> filterList = new ArrayList<>();
final ImmutableBitSet notNullColumnsFromRightSide = getNotNullColumnsFromRightSide(join);

for (RexNode filterNode : aboveFilters) {
if (filterNode.getKind() == SqlKind.IS_NULL) {
// Null filter from right side table can be removed and its a pre-condition for anti join conversion.
if (HiveCalciteUtil.hasAllExpressionsFromRightSide(join, Collections.singletonList(filterNode))
&& isStrong(((RexCall) filterNode).getOperands().get(0))) {
hasNullFilterOnRightSide = true;
} else {
filterList.add(filterNode);
}
} else {
if (HiveCalciteUtil.hasAnyExpressionFromRightSide(join, Collections.singletonList(filterNode))) {
// If some non null condition is present from right side, we can not convert the join to anti join as
// anti join does not project the fields from right side.
return null;
} else {
filterList.add(filterNode);
}
final ImmutableBitSet usedFields = RelOptUtil.InputFinder.bits(filterNode);
boolean usesFieldFromRHS = usedFields.intersects(rhsFields);

if(!usesFieldFromRHS) {
// Only LHS fields or constants, so the filterNode is part of the residual filter
filterList.add(filterNode);
continue;
}

// In the following we check for filter nodes that let us deduce that
// "an (originally) not-null column of RHS IS NULL because the LHS row will not be matched"

if(filterNode.getKind() != SqlKind.IS_NULL) {
return Optional.empty();
}

boolean usesRHSFieldsOnly = rhsFields.contains(usedFields);
if (!usesRHSFieldsOnly) {
// If there is a mix between LHS and RHS fields, don't convert to anti-join
return Optional.empty();
}

// Null filter from right side table can be removed and it is a pre-condition for anti join conversion.
RexNode arg = ((RexCall) filterNode).getOperands().get(0);
if (isStrong(arg, notNullColumnsFromRightSide)) {
hasNullFilterOnRightSide = true;
} else if(!isStrong(arg, rhsFields)) {
// if all RHS fields are null and the IS NULL is still not fulfilled, bail out
return Optional.empty();
}
}

if (!hasNullFilterOnRightSide) {
return null;
return Optional.empty();
}
return filterList;
return Optional.of(filterList);
}

private boolean isStrong(RexNode rexNode) {
AtomicBoolean hasCast = new AtomicBoolean(false);
rexNode.accept(new RexVisitorImpl<Void>(true) {
@Override
public Void visitCall(RexCall call) {
if (call.getKind() == SqlKind.CAST) {
hasCast.set(true);
}
return super.visitCall(call);
private ImmutableBitSet getNotNullColumnsFromRightSide(RelNode joinRel) {
// we need to shift the indices of the second child to the right
int shift = (joinRel.getInput(0)).getRowType().getFieldCount();
ImmutableBitSet rhsNotnullColumns = deduceNotNullColumns(joinRel.getInput(1));
return rhsNotnullColumns.shift(shift);
}

/**
* Deduce which columns of the <code>relNode</code> are definitively NOT NULL.
*/
private ImmutableBitSet deduceNotNullColumns(RelNode relNode) {
// adapted from org.apache.calcite.plan.RelOptUtil.containsNullableFields
RelOptCluster cluster = relNode.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
final RelMetadataQuery mq = cluster.getMetadataQuery();
ImmutableBitSet.Builder result = ImmutableBitSet.builder();
ImmutableBitSet.Builder candidatesBuilder = ImmutableBitSet.builder();
List<RelDataTypeField> fieldList = relNode.getRowType().getFieldList();
for (int i=0; i<fieldList.size(); i++) {
if (fieldList.get(i).getType().isNullable()) {
candidatesBuilder.set(i);
}
else {
result.set(i);
}
}
ImmutableBitSet candidates = candidatesBuilder.build();
if (candidates.isEmpty()) {
// All columns are declared NOT NULL, no need to change
return result.build();
}
final RexExecutor executor = cluster.getPlanner().getExecutor();
if (!(executor instanceof RexExecutorImpl)) {
// Cannot proceed without an executor.
return result.build();
}

final RexImplicationChecker checker =
new RexImplicationChecker(rexBuilder, executor, relNode.getRowType());
final RelOptPredicateList predicates = mq.getPulledUpPredicates(relNode);

ImmutableList<RexNode> preds = predicates.pulledUpPredicates;
final List<RexNode> antecedent = new ArrayList<>(preds);
final RexNode first = RexUtil.composeConjunction(rexBuilder, antecedent);
for (int c : candidates) {
RelDataTypeField field = fieldList.get(c);
final RexNode second = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL,
rexBuilder.makeInputRef(field.getType(), field.getIndex()));
// Suppose we have EMP(empno INT NOT NULL, mgr INT),
// and predicates [empno > 0, mgr > 0].
// We make first: "empno > 0 AND mgr > 0"
// and second: "mgr IS NOT NULL"
// and ask whether first implies second.
// It does, so we have no nullable columns.
if(checker.implies(first, second)) {
result.set(c);
}
});
return !hasCast.get() && Strong.isStrong(rexNode);
}
return result.build();
}

private boolean isStrong(RexNode rexNode, ImmutableBitSet rightSideBitset) {
try {
rexNode.accept(new RexVisitorImpl<Void>(true) {
@Override
public Void visitCall(RexCall call) {
if (call.getKind() == SqlKind.CAST) {
throw Util.FoundOne.NULL;
}
return super.visitCall(call);
}
});
} catch (Util.FoundOne e) {
// Hive's CAST might introduce NULL for NOT NULL fields
return false;
}
return Strong.isNull(rexNode, rightSideBitset);
}
}
Loading
Loading