Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-50992][SQL] OOMs and performance issues with AQE in large plans #49724

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4443,15 +4443,16 @@ object SQLConf {
.createWithDefault(false)

val UI_EXPLAIN_MODE = buildConf("spark.sql.ui.explainMode")
.doc("Configures the query explain mode used in the Spark SQL UI. The value can be 'simple', " +
"'extended', 'codegen', 'cost', or 'formatted'. The default value is 'formatted'.")
.doc("Configures the query explain mode used in the Spark SQL UI. The value can be 'off', " +
"'simple', 'extended', 'codegen', 'cost', or 'formatted'. The default value is 'formatted'.")
.version("3.1.0")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValue(mode => Set("SIMPLE", "EXTENDED", "CODEGEN", "COST", "FORMATTED").contains(mode),
"Invalid value for 'spark.sql.ui.explainMode'. Valid values are 'simple', 'extended', " +
"'codegen', 'cost' and 'formatted'.")
.createWithDefault("formatted")
.checkValue(
mode => Set("OFF", "SIMPLE", "EXTENDED", "CODEGEN", "COST", "FORMATTED").contains(mode),
"Invalid value for 'spark.sql.ui.explainMode'. Valid values are 'off', 'simple', " +
"'extended', 'codegen', 'cost' and 'formatted'.")
.createWithDefault("off")

val SOURCES_BINARY_FILE_MAX_LENGTH = buildConf("spark.sql.sources.binaryFile.maxLength")
.doc("The max length of a file that can be read by the binary file data source. " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ sealed trait ExplainMode {
def name: String
}

/**
* Off mode means that when printing explain for a DataFrame, an empty string is expected
* to be printed to the console.
*/
case object OffMode extends ExplainMode { val name = "off" }

/**
* Simple mode means that when printing explain for a DataFrame, only a physical plan is
* expected to be printed to the console.
Expand Down Expand Up @@ -61,12 +67,13 @@ object ExplainMode {
* Returns the explain mode from the given string.
*/
def fromString(mode: String): ExplainMode = mode.toLowerCase(Locale.ROOT) match {
case OffMode.name => OffMode
case SimpleMode.name => SimpleMode
case ExtendedMode.name => ExtendedMode
case CodegenMode.name => CodegenMode
case CostMode.name => CostMode
case FormattedMode.name => FormattedMode
case _ => throw new IllegalArgumentException(s"Unknown explain mode: $mode. Accepted " +
"explain modes are 'simple', 'extended', 'codegen', 'cost', 'formatted'.")
"explain modes are 'off', 'simple', 'extended', 'codegen', 'cost', 'formatted'.")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ class QueryExecution(
}

mode match {
case OffMode =>
case SimpleMode =>
queryExecution.simpleString(false, maxFields, append)
case ExtendedMode =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

package org.apache.spark.sql.execution.columnar

import org.apache.commons.lang3.StringUtils

import org.apache.spark.{SparkException, TaskContext}
import org.apache.spark.network.util.JavaUtils
import org.apache.spark.rdd.RDD
Expand Down Expand Up @@ -218,7 +216,7 @@ case class CachedRDDBuilder(
private val materializedPartitions = cachedPlan.session.sparkContext.longAccumulator

val cachedName = tableName.map(n => s"In-memory table $n")
.getOrElse(StringUtils.abbreviate(cachedPlan.toString, 1024))
.getOrElse(cachedPlan.simpleStringWithNodeId())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this equivalent logically?


val supportsColumnarInput: Boolean = {
cachedPlan.supportsColumnar &&
Expand Down