From 09fd96ec24f9cb9e2d37a794ec5b2725db7c75fb Mon Sep 17 00:00:00 2001 From: Gian Merlino <gianmerlino@gmail.com> Date: Thu, 30 Jan 2025 08:56:46 -0800 Subject: [PATCH] Expr: Collect BindingAnalysis in bulk rather than one at a time. (#17613) Speeds up analysis of functions with large numbers of arguments, such as CASE statements with many branches. The prior code would call "with" for each argument on the accumulated analysis so far, which needlessly re-created the sets of variables over and over. --- .../druid/math/expr/BinaryEvalOpExprBase.java | 4 +- .../java/org/apache/druid/math/expr/Expr.java | 68 +++++++++++++------ .../org/apache/druid/math/expr/Exprs.java | 6 +- .../druid/math/expr/FunctionalExpr.java | 28 +++----- 4 files changed, 60 insertions(+), 46 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/BinaryEvalOpExprBase.java b/processing/src/main/java/org/apache/druid/math/expr/BinaryEvalOpExprBase.java index ebd90a88bfda..1ec567f97b0b 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/BinaryEvalOpExprBase.java +++ b/processing/src/main/java/org/apache/druid/math/expr/BinaryEvalOpExprBase.java @@ -24,6 +24,7 @@ import org.apache.druid.java.util.common.StringUtils; import javax.annotation.Nullable; +import java.util.Arrays; import java.util.Objects; /** @@ -77,7 +78,8 @@ public String stringify() public BindingAnalysis analyzeInputs() { // currently all binary operators operate on scalar inputs - return left.analyzeInputs().with(right).withScalarArguments(ImmutableSet.of(left, right)); + return BindingAnalysis.collectExprs(Arrays.asList(left, right)) + .withScalarArguments(ImmutableSet.of(left, right)); } @Nullable diff --git a/processing/src/main/java/org/apache/druid/math/expr/Expr.java b/processing/src/main/java/org/apache/druid/math/expr/Expr.java index 9355447340ad..b287d8f12892 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/Expr.java +++ b/processing/src/main/java/org/apache/druid/math/expr/Expr.java @@ -39,10 +39,12 @@ import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; /** * Base interface of Druid expression language abstract syntax tree nodes. All {@link Expr} implementations are @@ -576,6 +578,50 @@ private BindingAnalysis( this.isOutputArray = isOutputArray; } + /** + * Create an instance by combining a collection of other instances. + */ + public static BindingAnalysis collect(final Collection<BindingAnalysis> others) + { + if (others.isEmpty()) { + return EMTPY; + } else if (others.size() == 1) { + return Iterables.getOnlyElement(others); + } else { + final ImmutableSet.Builder<IdentifierExpr> freeVariables = ImmutableSet.builder(); + final ImmutableSet.Builder<IdentifierExpr> scalarVariables = ImmutableSet.builder(); + final ImmutableSet.Builder<IdentifierExpr> arrayVariables = ImmutableSet.builder(); + + boolean hasInputArrays = false; + boolean isOutputArray = false; + + for (final BindingAnalysis other : others) { + hasInputArrays = hasInputArrays || other.hasInputArrays; + isOutputArray = isOutputArray || other.isOutputArray; + + freeVariables.addAll(other.freeVariables); + scalarVariables.addAll(other.scalarVariables); + arrayVariables.addAll(other.arrayVariables); + } + + return new BindingAnalysis( + freeVariables.build(), + scalarVariables.build(), + arrayVariables.build(), + hasInputArrays, + isOutputArray + ); + } + } + + /** + * Create an instance by combining a collection of analyses from {@link Expr#analyzeInputs()}. + */ + public static BindingAnalysis collectExprs(final Collection<Expr> exprs) + { + return collect(exprs.stream().map(Expr::analyzeInputs).collect(Collectors.toList())); + } + /** * Get the list of required column inputs to evaluate an expression ({@link IdentifierExpr#binding}) */ @@ -658,28 +704,6 @@ public boolean isOutputArray() return isOutputArray; } - /** - * Combine with {@link BindingAnalysis} from {@link Expr#analyzeInputs()} - */ - public BindingAnalysis with(Expr other) - { - return with(other.analyzeInputs()); - } - - /** - * Combine (union) another {@link BindingAnalysis} - */ - public BindingAnalysis with(BindingAnalysis other) - { - return new BindingAnalysis( - ImmutableSet.copyOf(Sets.union(freeVariables, other.freeVariables)), - ImmutableSet.copyOf(Sets.union(scalarVariables, other.scalarVariables)), - ImmutableSet.copyOf(Sets.union(arrayVariables, other.arrayVariables)), - hasInputArrays || other.hasInputArrays, - isOutputArray || other.isOutputArray - ); - } - /** * Add set of arguments as {@link BindingAnalysis#scalarVariables} that are *directly* {@link IdentifierExpr}, * else they are ignored. diff --git a/processing/src/main/java/org/apache/druid/math/expr/Exprs.java b/processing/src/main/java/org/apache/druid/math/expr/Exprs.java index e3d2844ba033..c012cc833270 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/Exprs.java +++ b/processing/src/main/java/org/apache/druid/math/expr/Exprs.java @@ -58,11 +58,7 @@ public static UnsupportedOperationException cannotVectorize(String msg) */ public static Expr.BindingAnalysis analyzeBindings(final List<Expr> args) { - Expr.BindingAnalysis accumulator = new Expr.BindingAnalysis(); - for (final Expr arg : args) { - accumulator = accumulator.with(arg); - } - return accumulator; + return Expr.BindingAnalysis.collectExprs(args); } /** diff --git a/processing/src/main/java/org/apache/druid/math/expr/FunctionalExpr.java b/processing/src/main/java/org/apache/druid/math/expr/FunctionalExpr.java index e95140011a78..5eadbf96ba12 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/FunctionalExpr.java +++ b/processing/src/main/java/org/apache/druid/math/expr/FunctionalExpr.java @@ -128,15 +128,11 @@ public Expr visit(Shuttle shuttle) @Override public BindingAnalysis analyzeInputs() { - BindingAnalysis accumulator = new BindingAnalysis(); - - for (Expr arg : args) { - accumulator = accumulator.with(arg); - } - return accumulator.withScalarArguments(function.getScalarInputs(args)) - .withArrayArguments(function.getArrayInputs(args)) - .withArrayInputs(function.hasArrayInputs()) - .withArrayOutput(function.hasArrayOutput()); + return BindingAnalysis.collectExprs(args) + .withScalarArguments(function.getScalarInputs(args)) + .withArrayArguments(function.getArrayInputs(args)) + .withArrayInputs(function.hasArrayInputs()) + .withArrayOutput(function.hasArrayOutput()); } @Override @@ -194,20 +190,16 @@ class ApplyFunctionExpr implements Expr // apply function expressions are examined during expression selector creation, so precompute and cache the // binding details of children ImmutableList.Builder<BindingAnalysis> argBindingDetailsBuilder = ImmutableList.builder(); - BindingAnalysis accumulator = new BindingAnalysis(); for (Expr arg : argsExpr) { - BindingAnalysis argDetails = arg.analyzeInputs(); - argBindingDetailsBuilder.add(argDetails); - accumulator = accumulator.with(argDetails); + argBindingDetailsBuilder.add(arg.analyzeInputs()); } lambdaBindingAnalysis = lambdaExpr.analyzeInputs(); - - bindingAnalysis = accumulator.with(lambdaBindingAnalysis) - .withArrayArguments(function.getArrayInputs(argsExpr)) - .withArrayInputs(true) - .withArrayOutput(function.hasArrayOutput(lambdaExpr)); argsBindingAnalyses = argBindingDetailsBuilder.build(); + bindingAnalysis = BindingAnalysis.collect(argBindingDetailsBuilder.add(lambdaBindingAnalysis).build()) + .withArrayArguments(function.getArrayInputs(argsExpr)) + .withArrayInputs(true) + .withArrayOutput(function.hasArrayOutput(lambdaExpr)); } @Override