diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 3c0505263159..786c00098cd4 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -677,6 +677,9 @@ std::unordered_map WholeStageResultIterator::getQueryC configs[velox::core::QueryConfig::kSparkPartitionId] = std::to_string(taskInfo_.partitionId); + configs[velox::core::QueryConfig::kSparkAnsiModeEnabled] = + std::to_string(veloxCfg_->get(kVeloxSparkAnsiModeEnabled, false)); + // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY' // or 'legacy' if (veloxCfg_->get(kSparkLegacyTimeParserPolicy, "") == "LEGACY") { diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h index 54db73303184..a80f4adfe781 100644 --- a/cpp/velox/config/VeloxConfig.h +++ b/cpp/velox/config/VeloxConfig.h @@ -192,6 +192,8 @@ const std::string kQueryTraceTaskRegExp = "spark.gluten.sql.columnar.backend.vel const std::string kOpTraceDirectoryCreateConfig = "spark.gluten.sql.columnar.backend.velox.opTraceDirectoryCreateConfig"; +const std::string kVeloxSparkAnsiModeEnabled = "spark.sql.ansi.enabled"; + // Cudf config. // GPU RMM memory resource const std::string kCudfMemoryResource = "spark.gluten.sql.columnar.backend.velox.cudf.memoryResource"; diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java index 1984c44d7404..3a0f4b143e14 100644 --- a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java +++ b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java @@ -26,12 +26,16 @@ public class CastNode implements ExpressionNode, Serializable { private final TypeNode typeNode; private final ExpressionNode expressionNode; - public final boolean isTryCast; + // Substrait Cast FailureBehavior: + // 0 = UNSPECIFIED (Spark LEGACY: allow overflow/truncation) + // 1 = RETURN_NULL (Spark TRY: return null on failure) + // 2 = THROW_EXCEPTION (Spark ANSI: throw on overflow) + public final int failureBehavior; - CastNode(TypeNode typeNode, ExpressionNode expressionNode, boolean isTryCast) { + CastNode(TypeNode typeNode, ExpressionNode expressionNode, int failureBehavior) { this.typeNode = typeNode; this.expressionNode = expressionNode; - this.isTryCast = isTryCast; + this.failureBehavior = failureBehavior; } @Override @@ -39,13 +43,7 @@ public Expression toProtobuf() { Expression.Cast.Builder castBuilder = Expression.Cast.newBuilder(); castBuilder.setType(typeNode.toProtobuf()); castBuilder.setInput(expressionNode.toProtobuf()); - if (!isTryCast) { - // Throw exception on failure. - castBuilder.setFailureBehaviorValue(2); - } else { - // Return null on failure. - castBuilder.setFailureBehaviorValue(1); - } + castBuilder.setFailureBehaviorValue(failureBehavior); Expression.Builder builder = Expression.newBuilder(); builder.setCast(castBuilder.build()); return builder.build(); diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java index 4bdef37878c2..5bf5097f25a7 100644 --- a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java +++ b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java @@ -239,7 +239,21 @@ public static AggregateFunctionNode makeAggregateFunction( public static CastNode makeCast( TypeNode typeNode, ExpressionNode expressionNode, boolean isTryCast) { - return new CastNode(typeNode, expressionNode, isTryCast); + // Backward-compatible: isTryCast=true → RETURN_NULL(1), false → THROW_EXCEPTION(2) + return new CastNode(typeNode, expressionNode, isTryCast ? 1 : 2); + } + + public static CastNode makeCast( + TypeNode typeNode, ExpressionNode expressionNode, boolean isTryCast, boolean isAnsiCast) { + int failureBehavior; + if (isTryCast) { + failureBehavior = 1; // RETURN_NULL + } else if (isAnsiCast) { + failureBehavior = 2; // THROW_EXCEPTION + } else { + failureBehavior = 0; // UNSPECIFIED (legacy) + } + return new CastNode(typeNode, expressionNode, failureBehavior); } public static StringMapNode makeStringMap(Map values) { diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala index b762ec95b645..02c752bf0315 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala @@ -45,10 +45,12 @@ case class CastTransformer(substraitExprName: String, child: ExpressionTransform extends UnaryExpressionTransformer { override def doTransform(context: SubstraitContext): ExpressionNode = { val typeNode = ConverterUtils.getTypeNode(dataType, original.nullable) + val shims = SparkShimLoader.getSparkShims ExpressionBuilder.makeCast( typeNode, child.doTransform(context), - SparkShimLoader.getSparkShims.withTryEvalMode(original)) + shims.withTryEvalMode(original), + shims.withAnsiEvalMode(original)) } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index f7c4ba1a3b42..0cd2eeca92d8 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -105,10 +105,6 @@ class VeloxTestSettings extends BackendTestSettings { "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. ) .exclude("cast from timestamp II") // Rewrite test for Gluten not supported with ANSI mode - .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to short type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to int type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to long type") .exclude("cast from invalid string to numeric should throw NumberFormatException") .exclude("SPARK-26218: Fix the corner case of codegen when casting float to Integer") // Set timezone through config. diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index a075ea98f49e..3389cbb6dcb5 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -104,11 +104,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude( "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. ) - .exclude("cast from timestamp II") // Rewrite test for Gluten not supported with ANSI mode - .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to short type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to int type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to long type") .exclude("cast from invalid string to numeric should throw NumberFormatException") .exclude("SPARK-26218: Fix the corner case of codegen when casting float to Integer") // Set timezone through config. diff --git a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index ac547c57cd75..39d6fd977e9d 100644 --- a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -117,10 +117,6 @@ class VeloxTestSettings extends BackendTestSettings { "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. ) .exclude("cast from timestamp II") // Rewrite test for Gluten not supported with ANSI mode - .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to short type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to int type") - .exclude("ANSI mode: Throw exception on casting out-of-range value to long type") .exclude("cast from invalid string to numeric should throw NumberFormatException") .exclude("SPARK-26218: Fix the corner case of codegen when casting float to Integer") // Set timezone through config. diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala index fc15ebfeef8b..d85f9e5270b5 100644 --- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala +++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala @@ -17,8 +17,8 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.GlutenTestsTrait -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, ALL_TIMEZONES, UTC, UTC_OPT} -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{fromJavaTimestamp, millisToMicros, TimeZoneUTC} +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{ALL_TIMEZONES, UTC, UTC_OPT, withDefaultTimeZone} +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{TimeZoneUTC, fromJavaTimestamp, millisToMicros} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.util.DebuggableThreadUtils diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala index 61bc3bc94568..c0fe9213fd5b 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala @@ -446,6 +446,7 @@ class Spark34Shims extends SparkShims { case d: Divide => d.evalMode == EvalMode.ANSI case m: Multiply => m.evalMode == EvalMode.ANSI case i: IntegralDivide => i.evalMode == EvalMode.ANSI + case c: Cast => c.evalMode == EvalMode.ANSI case _ => false } } diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala index 7e31af9b672c..853f2abe4441 100644 --- a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala @@ -483,6 +483,7 @@ class Spark35Shims extends SparkShims { case d: Divide => d.evalMode == EvalMode.ANSI case m: Multiply => m.evalMode == EvalMode.ANSI case i: IntegralDivide => i.evalMode == EvalMode.ANSI + case c: Cast => c.evalMode == EvalMode.ANSI case _ => false } }