1717 */
1818package org .apache .hadoop .hive .ql .optimizer .calcite .rules ;
1919
20+ import com .google .common .collect .ImmutableList ;
21+ import org .apache .calcite .plan .RelOptCluster ;
22+ import org .apache .calcite .plan .RelOptPredicateList ;
2023import org .apache .calcite .plan .RelOptRule ;
2124import org .apache .calcite .plan .RelOptRuleCall ;
2225import org .apache .calcite .plan .RelOptUtil ;
26+ import org .apache .calcite .plan .RexImplicationChecker ;
2327import org .apache .calcite .plan .Strong ;
2428import org .apache .calcite .rel .RelNode ;
2529import org .apache .calcite .rel .core .Filter ;
2630import org .apache .calcite .rel .core .Join ;
2731import org .apache .calcite .rel .core .JoinRelType ;
2832import org .apache .calcite .rel .core .Project ;
33+ import org .apache .calcite .rel .metadata .RelMetadataQuery ;
34+ import org .apache .calcite .rel .type .RelDataTypeField ;
35+ import org .apache .calcite .rex .RexBuilder ;
2936import org .apache .calcite .rex .RexCall ;
37+ import org .apache .calcite .rex .RexExecutor ;
38+ import org .apache .calcite .rex .RexExecutorImpl ;
3039import org .apache .calcite .rex .RexNode ;
40+ import org .apache .calcite .rex .RexUtil ;
3141import org .apache .calcite .rex .RexVisitorImpl ;
3242import org .apache .calcite .sql .SqlKind ;
3343import org .apache .calcite .sql .fun .SqlStdOperatorTable ;
44+ import org .apache .calcite .util .ImmutableBitSet ;
45+ import org .apache .calcite .util .Util ;
3446import org .apache .hadoop .hive .ql .optimizer .calcite .HiveCalciteUtil ;
3547import org .apache .hadoop .hive .ql .optimizer .calcite .reloperators .HiveAntiJoin ;
3648import org .slf4j .Logger ;
3749import org .slf4j .LoggerFactory ;
3850
3951import java .util .ArrayList ;
40- import java .util .Collections ;
4152import java .util .List ;
42- import java .util .concurrent . atomic . AtomicBoolean ;
53+ import java .util .Optional ;
4354
4455/**
4556 * Planner rule that converts a join plus filter to anti join.
@@ -86,14 +97,17 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
8697
8798 assert (filter != null );
8899
89- List <RexNode > filterList = getResidualFilterNodes (filter , join );
90- if (filterList == null ) {
100+ ImmutableBitSet rhsFields = HiveCalciteUtil .getRightSideBitset (join );
101+ Optional <List <RexNode >> optFilterList = getResidualFilterNodes (filter , join , rhsFields );
102+ if (optFilterList .isEmpty ()) {
91103 return ;
92104 }
105+ List <RexNode > filterList = optFilterList .get ();
93106
94107 // If any projection is there from right side, then we can not convert to anti join.
95- boolean hasProjection = HiveCalciteUtil .hasAnyExpressionFromRightSide (join , project .getProjects ());
96- if (hasProjection ) {
108+ ImmutableBitSet projectedFields = RelOptUtil .InputFinder .bits (project .getProjects (), null );
109+ boolean projectionUsesRHS = projectedFields .intersects (rhsFields );
110+ if (projectionUsesRHS ) {
97111 return ;
98112 }
99113
@@ -119,13 +133,14 @@ protected void perform(RelOptRuleCall call, Project project, Filter filter, Join
119133 /**
120134 * Extracts the non-null filter conditions from given filter node.
121135 *
122- * @param filter The filter condition to be checked.
123- * @param join Join node whose right side has to be searched.
136+ * @param filter The filter condition to be checked.
137+ * @param join Join node whose right side has to be searched.
138+ * @param rhsFields
124139 * @return null : Anti join condition is not matched for filter.
125- * Empty list : No residual filter conditions present.
126- * Valid list containing the filter to be applied after join.
140+ * Empty list : No residual filter conditions present.
141+ * Valid list containing the filter to be applied after join.
127142 */
128- private List <RexNode > getResidualFilterNodes (Filter filter , Join join ) {
143+ private Optional < List <RexNode >> getResidualFilterNodes (Filter filter , Join join , ImmutableBitSet rhsFields ) {
129144 // 1. If null filter is not present from right side then we can not convert to anti join.
130145 // 2. If any non-null filter is present from right side, we can not convert it to anti join.
131146 // 3. Keep other filters which needs to be executed after join.
@@ -135,43 +150,123 @@ private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
135150 List <RexNode > aboveFilters = RelOptUtil .conjunctions (filter .getCondition ());
136151 boolean hasNullFilterOnRightSide = false ;
137152 List <RexNode > filterList = new ArrayList <>();
153+ final ImmutableBitSet notNullColumnsFromRightSide = getNotNullColumnsFromRightSide (join );
154+
138155 for (RexNode filterNode : aboveFilters ) {
139- if (filterNode .getKind () == SqlKind .IS_NULL ) {
140- // Null filter from right side table can be removed and its a pre-condition for anti join conversion.
141- if (HiveCalciteUtil .hasAllExpressionsFromRightSide (join , Collections .singletonList (filterNode ))
142- && isStrong (((RexCall ) filterNode ).getOperands ().get (0 ))) {
143- hasNullFilterOnRightSide = true ;
144- } else {
145- filterList .add (filterNode );
146- }
147- } else {
148- if (HiveCalciteUtil .hasAnyExpressionFromRightSide (join , Collections .singletonList (filterNode ))) {
149- // If some non null condition is present from right side, we can not convert the join to anti join as
150- // anti join does not project the fields from right side.
151- return null ;
152- } else {
153- filterList .add (filterNode );
154- }
156+ final ImmutableBitSet usedFields = RelOptUtil .InputFinder .bits (filterNode );
157+ boolean usesFieldFromRHS = usedFields .intersects (rhsFields );
158+
159+ if (!usesFieldFromRHS ) {
160+ // Only LHS fields or constants, so the filterNode is part of the residual filter
161+ filterList .add (filterNode );
162+ continue ;
163+ }
164+
165+ // In the following we check for filter nodes that let us deduce that
166+ // "an (originally) not-null column of RHS IS NULL because the LHS row will not be matched"
167+
168+ if (filterNode .getKind () != SqlKind .IS_NULL ) {
169+ return Optional .empty ();
170+ }
171+
172+ boolean usesRHSFieldsOnly = rhsFields .contains (usedFields );
173+ if (!usesRHSFieldsOnly ) {
174+ // If there is a mix between LHS and RHS fields, don't convert to anti-join
175+ return Optional .empty ();
176+ }
177+
178+ // Null filter from right side table can be removed and it is a pre-condition for anti join conversion.
179+ RexNode arg = ((RexCall ) filterNode ).getOperands ().get (0 );
180+ if (isStrong (arg , notNullColumnsFromRightSide )) {
181+ hasNullFilterOnRightSide = true ;
182+ } else if (!isStrong (arg , rhsFields )) {
183+ // if all RHS fields are null and the IS NULL is still not fulfilled, bail out
184+ return Optional .empty ();
155185 }
156186 }
157187
158188 if (!hasNullFilterOnRightSide ) {
159- return null ;
189+ return Optional . empty () ;
160190 }
161- return filterList ;
191+ return Optional . of ( filterList ) ;
162192 }
163193
164- private boolean isStrong (RexNode rexNode ) {
165- AtomicBoolean hasCast = new AtomicBoolean (false );
166- rexNode .accept (new RexVisitorImpl <Void >(true ) {
167- @ Override
168- public Void visitCall (RexCall call ) {
169- if (call .getKind () == SqlKind .CAST ) {
170- hasCast .set (true );
171- }
172- return super .visitCall (call );
194+ private ImmutableBitSet getNotNullColumnsFromRightSide (RelNode joinRel ) {
195+ // we need to shift the indices of the second child to the right
196+ int shift = (joinRel .getInput (0 )).getRowType ().getFieldCount ();
197+ ImmutableBitSet rhsNotnullColumns = deduceNotNullColumns (joinRel .getInput (1 ));
198+ return rhsNotnullColumns .shift (shift );
199+ }
200+
201+ /**
202+ * Deduce which columns of the <code>relNode</code> are definitively NOT NULL.
203+ */
204+ private ImmutableBitSet deduceNotNullColumns (RelNode relNode ) {
205+ // adapted from org.apache.calcite.plan.RelOptUtil.containsNullableFields
206+ RelOptCluster cluster = relNode .getCluster ();
207+ final RexBuilder rexBuilder = cluster .getRexBuilder ();
208+ final RelMetadataQuery mq = cluster .getMetadataQuery ();
209+ ImmutableBitSet .Builder result = ImmutableBitSet .builder ();
210+ ImmutableBitSet .Builder candidatesBuilder = ImmutableBitSet .builder ();
211+ List <RelDataTypeField > fieldList = relNode .getRowType ().getFieldList ();
212+ for (int i =0 ; i <fieldList .size (); i ++) {
213+ if (fieldList .get (i ).getType ().isNullable ()) {
214+ candidatesBuilder .set (i );
215+ }
216+ else {
217+ result .set (i );
218+ }
219+ }
220+ ImmutableBitSet candidates = candidatesBuilder .build ();
221+ if (candidates .isEmpty ()) {
222+ // All columns are declared NOT NULL, no need to change
223+ return result .build ();
224+ }
225+ final RexExecutor executor = cluster .getPlanner ().getExecutor ();
226+ if (!(executor instanceof RexExecutorImpl )) {
227+ // Cannot proceed without an executor.
228+ return result .build ();
229+ }
230+
231+ final RexImplicationChecker checker =
232+ new RexImplicationChecker (rexBuilder , executor , relNode .getRowType ());
233+ final RelOptPredicateList predicates = mq .getPulledUpPredicates (relNode );
234+
235+ ImmutableList <RexNode > preds = predicates .pulledUpPredicates ;
236+ final List <RexNode > antecedent = new ArrayList <>(preds );
237+ final RexNode first = RexUtil .composeConjunction (rexBuilder , antecedent );
238+ for (int c : candidates ) {
239+ RelDataTypeField field = fieldList .get (c );
240+ final RexNode second = rexBuilder .makeCall (SqlStdOperatorTable .IS_NOT_NULL ,
241+ rexBuilder .makeInputRef (field .getType (), field .getIndex ()));
242+ // Suppose we have EMP(empno INT NOT NULL, mgr INT),
243+ // and predicates [empno > 0, mgr > 0].
244+ // We make first: "empno > 0 AND mgr > 0"
245+ // and second: "mgr IS NOT NULL"
246+ // and ask whether first implies second.
247+ // It does, so we have no nullable columns.
248+ if (checker .implies (first , second )) {
249+ result .set (c );
173250 }
174- });
175- return !hasCast .get () && Strong .isStrong (rexNode );
251+ }
252+ return result .build ();
253+ }
254+
255+ private boolean isStrong (RexNode rexNode , ImmutableBitSet rightSideBitset ) {
256+ try {
257+ rexNode .accept (new RexVisitorImpl <Void >(true ) {
258+ @ Override
259+ public Void visitCall (RexCall call ) {
260+ if (call .getKind () == SqlKind .CAST ) {
261+ throw Util .FoundOne .NULL ;
262+ }
263+ return super .visitCall (call );
264+ }
265+ });
266+ } catch (Util .FoundOne e ) {
267+ // Hive's CAST might introduce NULL for NOT NULL fields
268+ return false ;
269+ }
270+ return Strong .isNull (rexNode , rightSideBitset );
176271 }
177272}
0 commit comments