-
Notifications
You must be signed in to change notification settings - Fork 28.7k
[SPARK-51119][SQL] Readers on executors resolving EXISTS_DEFAULT should not call catalogs #49840
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
164bd6b
2fa0b0b
0491833
fc92de7
57793fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,9 +40,11 @@ import scala.util.Try | |
import org.apache.commons.codec.binary.{Hex => ApacheHex} | ||
import org.json4s.JsonAST._ | ||
|
||
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection} | ||
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, FunctionIdentifier, InternalRow, ScalaReflection} | ||
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedFunction} | ||
import org.apache.spark.sql.catalyst.expressions.codegen._ | ||
import org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils | ||
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser | ||
import org.apache.spark.sql.catalyst.trees.TreePattern | ||
import org.apache.spark.sql.catalyst.trees.TreePattern.{LITERAL, NULL_LITERAL, TRUE_OR_FALSE_LITERAL} | ||
import org.apache.spark.sql.catalyst.types._ | ||
|
@@ -265,6 +267,23 @@ object Literal { | |
s"Literal must have a corresponding value to ${dataType.catalogString}, " + | ||
s"but class ${Utils.getSimpleName(value.getClass)} found.") | ||
} | ||
|
||
def fromSQL(sql: String): Expression = { | ||
CatalystSqlParser.parseExpression(sql).transformUp { | ||
case u: UnresolvedFunction => | ||
assert(u.nameParts.length == 1) | ||
assert(!u.isDistinct) | ||
assert(u.filter.isEmpty) | ||
assert(!u.ignoreNulls) | ||
assert(u.orderingWithinGroup.isEmpty) | ||
assert(!u.isInternal) | ||
FunctionRegistry.builtin.lookupFunction(FunctionIdentifier(u.nameParts.head), u.arguments) | ||
} match { | ||
case c: Cast if c.needsTimeZone => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the CAST can be nested inside array/map/struct, we should put this case match inside the @szehon-ho can you make a followup PR for it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan sure, let me do that. BTW, I looked a little bit and couldnt reproduce a failure with the current implementation using a following unit test with a nested cast:
Unlike the failing case of top-level cast:
EXISTS_DEFAULT is saved without a cast in the first case: So I think in this particular scenario, it doesnt matter. But agree that it is better to have it, as we are making a generic method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm looking at the previous test failure
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. synced offline, see the other comment. |
||
c.withTimeZone(SQLConf.get.sessionLocalTimeZone) | ||
case e: Expression => e | ||
} | ||
} | ||
} | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,15 +19,14 @@ package org.apache.spark.sql.catalyst.util | |
|
||
import scala.collection.mutable.ArrayBuffer | ||
|
||
import org.apache.spark.{SparkException, SparkThrowable, SparkUnsupportedOperationException} | ||
import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException} | ||
import org.apache.spark.internal.{Logging, MDC} | ||
import org.apache.spark.internal.LogKeys._ | ||
import org.apache.spark.sql.AnalysisException | ||
import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper} | ||
import org.apache.spark.sql.catalyst.analysis._ | ||
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog} | ||
import org.apache.spark.sql.catalyst.expressions._ | ||
import org.apache.spark.sql.catalyst.expressions.{Literal => ExprLiteral} | ||
import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, Optimizer} | ||
import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} | ||
import org.apache.spark.sql.catalyst.plans.logical._ | ||
|
@@ -320,6 +319,29 @@ object ResolveDefaultColumns extends QueryErrorsBase | |
coerceDefaultValue(analyzed, dataType, statementType, colName, defaultSQL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the CAST is added here, but it should be constant-folded before we generate the existing default string. We need to debug it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. synced with @cloud-fan offline, this is not constant folded after this line, when analyzing to create EXISTS_DEFAULT. So in the input of analyzeExistsDefault() , EXISTS_DEFAULT sometimes has a top level CAST |
||
} | ||
|
||
/** | ||
* Analyze EXISTS_DEFAULT value. This skips some steps of analyze as most of the | ||
* analysis has been done before. | ||
*/ | ||
private def analyzeExistenceDefaultValue(field: StructField): Expression = { | ||
val defaultSQL = field.metadata.getString(EXISTS_DEFAULT_COLUMN_METADATA_KEY) | ||
|
||
// Parse the expression. | ||
val expr = Literal.fromSQL(defaultSQL) | ||
|
||
// Check invariants | ||
if (expr.containsPattern(PLAN_EXPRESSION)) { | ||
throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions( | ||
"", field.name, defaultSQL) | ||
} | ||
if (!expr.resolved) { | ||
throw QueryCompilationErrors.defaultValuesUnresolvedExprError( | ||
"", field.name, defaultSQL, null) | ||
} | ||
|
||
coerceDefaultValue(expr, field.dataType, "", field.name, defaultSQL) | ||
} | ||
|
||
/** | ||
* If the provided default value is a literal of a wider type than the target column, | ||
* but the literal value fits within the narrower type, just coerce it for convenience. | ||
|
@@ -405,19 +427,9 @@ object ResolveDefaultColumns extends QueryErrorsBase | |
def getExistenceDefaultValues(schema: StructType): Array[Any] = { | ||
schema.fields.map { field: StructField => | ||
val defaultValue: Option[String] = field.getExistenceDefaultValue() | ||
defaultValue.map { text: String => | ||
val expr = try { | ||
val expr = analyze(field, "", EXISTS_DEFAULT_COLUMN_METADATA_KEY) | ||
expr match { | ||
case _: ExprLiteral | _: Cast => expr | ||
} | ||
} catch { | ||
// AnalysisException thrown from analyze is already formatted, throw it directly. | ||
case ae: AnalysisException => throw ae | ||
case _: MatchError => | ||
throw SparkException.internalError(s"parse existence default as literal err," + | ||
s" field name: ${field.name}, value: $text") | ||
} | ||
defaultValue.map { _: String => | ||
val expr = analyzeExistenceDefaultValue(field) | ||
|
||
// The expression should be a literal value by this point, possibly wrapped in a cast | ||
// function. This is enforced by the execution of commands that assign default values. | ||
expr.eval() | ||
|
Uh oh!
There was an error while loading. Please reload this page.