From e89b8021ee84a12c147231433f4aaf8de59b5961 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sun, 12 Jan 2025 15:05:17 -0700 Subject: [PATCH 1/4] Add explicit test for null and empty arrays with array_remove --- .../apache/comet/CometExpressionSuite.scala | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 8c2759a384..8d5e607fe1 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -20,22 +20,19 @@ package org.apache.comet import java.time.{Duration, Period} - import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag import scala.util.Random - import org.apache.hadoop.fs.Path import org.apache.spark.sql.{CometTestBase, DataFrame, Row} import org.apache.spark.sql.catalyst.optimizer.SimplifyExtractValueOps import org.apache.spark.sql.comet.{CometColumnarToRowExec, CometProjectExec} -import org.apache.spark.sql.execution.{InputAdapter, ProjectExec, WholeStageCodegenExec} +import org.apache.spark.sql.execution.{InputAdapter, LocalTableScanExec, ProjectExec, WholeStageCodegenExec} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE import org.apache.spark.sql.types.{Decimal, DecimalType} - import org.apache.comet.CometSparkSessionExtensions.{isSpark33Plus, isSpark34Plus, isSpark35Plus, isSpark40Plus} class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { @@ -2544,5 +2541,19 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { "SELECT array_remove(case when _2 = _3 THEN array(_2, _3,_4) ELSE null END, _3) from t1")) } } + val values: Seq[Option[Array[Option[Int]]]] = Seq( + Some(Array(Some(1), Some(2), Some(3))), + Some(Array(Some(1), Some(2), Some(2))), + Some(Array(None, Some(2), Some(2))), + None, + Some(Array()), + Some(Array(None, None)), + ) + values.toDF("a").createTempView("int_array") + withSQLConf(CometConf.COMET_SPARK_TO_ARROW_ENABLED.key -> "true", + CometConf.COMET_SPARK_TO_ARROW_SUPPORTED_OPERATOR_LIST.key -> "LocalTableScan") { + checkSparkAnswerAndOperator(sql("select a, array_remove(a, 2) from int_array"), classOf[LocalTableScanExec]) + checkSparkAnswerAndOperator(sql("select a, array_remove(a, null) from int_array"), classOf[LocalTableScanExec]) + } } } From a3505648d21840330668f8c5f1d47eeed97362b9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 14 Jan 2025 18:36:48 -0700 Subject: [PATCH 2/4] format --- .../org/apache/comet/CometExpressionSuite.scala | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 8d5e607fe1..2ddb1d10bf 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -20,9 +20,11 @@ package org.apache.comet import java.time.{Duration, Period} + import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag import scala.util.Random + import org.apache.hadoop.fs.Path import org.apache.spark.sql.{CometTestBase, DataFrame, Row} import org.apache.spark.sql.catalyst.optimizer.SimplifyExtractValueOps @@ -33,6 +35,7 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE import org.apache.spark.sql.types.{Decimal, DecimalType} + import org.apache.comet.CometSparkSessionExtensions.{isSpark33Plus, isSpark34Plus, isSpark35Plus, isSpark40Plus} class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { @@ -2547,13 +2550,17 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { Some(Array(None, Some(2), Some(2))), None, Some(Array()), - Some(Array(None, None)), - ) + Some(Array(None, None))) values.toDF("a").createTempView("int_array") - withSQLConf(CometConf.COMET_SPARK_TO_ARROW_ENABLED.key -> "true", + withSQLConf( + CometConf.COMET_SPARK_TO_ARROW_ENABLED.key -> "true", CometConf.COMET_SPARK_TO_ARROW_SUPPORTED_OPERATOR_LIST.key -> "LocalTableScan") { - checkSparkAnswerAndOperator(sql("select a, array_remove(a, 2) from int_array"), classOf[LocalTableScanExec]) - checkSparkAnswerAndOperator(sql("select a, array_remove(a, null) from int_array"), classOf[LocalTableScanExec]) + checkSparkAnswerAndOperator( + sql("select a, array_remove(a, 2) from int_array"), + classOf[LocalTableScanExec]) + checkSparkAnswerAndOperator( + sql("select a, array_remove(a, null) from int_array"), + classOf[LocalTableScanExec]) } } } From ea0e04a9b65d0b09b9ac79ad254ce61cdaf1041c Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 14 Jan 2025 18:47:20 -0700 Subject: [PATCH 3/4] test for strings --- .../apache/comet/CometExpressionSuite.scala | 56 +++++++++++++++---- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 2ddb1d10bf..6270e5ec73 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -2544,6 +2544,38 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { "SELECT array_remove(case when _2 = _3 THEN array(_2, _3,_4) ELSE null END, _3) from t1")) } } + } + + test("array_remove - ints") { + registerIntArray() + withSQLConf( + CometConf.COMET_SPARK_TO_ARROW_ENABLED.key -> "true", + CometConf.COMET_SPARK_TO_ARROW_SUPPORTED_OPERATOR_LIST.key -> "LocalTableScan") { + for (query <- Seq( + "select a, array_remove(a, 2) from int_array", + "select a, array_remove(a, -2) from int_array", + "select a, array_remove(a, null) from int_array")) { + checkSparkAnswerAndOperator(sql(query), classOf[LocalTableScanExec]) + } + } + } + + test("array_remove - strings") { + registerStringArray() + withSQLConf( + CometConf.COMET_SPARK_TO_ARROW_ENABLED.key -> "true", + CometConf.COMET_SPARK_TO_ARROW_SUPPORTED_OPERATOR_LIST.key -> "LocalTableScan") { + for (query <- Seq( + "select a, array_remove(a, 'two') from string_array", + "select a, array_remove(a, '') from string_array", + "select a, array_remove(a, 'four') from string_array", + "select a, array_remove(a, null) from string_array")) { + checkSparkAnswerAndOperator(sql(query), classOf[LocalTableScanExec]) + } + } + } + + private def registerIntArray(): Unit = { val values: Seq[Option[Array[Option[Int]]]] = Seq( Some(Array(Some(1), Some(2), Some(3))), Some(Array(Some(1), Some(2), Some(2))), @@ -2551,16 +2583,18 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { None, Some(Array()), Some(Array(None, None))) - values.toDF("a").createTempView("int_array") - withSQLConf( - CometConf.COMET_SPARK_TO_ARROW_ENABLED.key -> "true", - CometConf.COMET_SPARK_TO_ARROW_SUPPORTED_OPERATOR_LIST.key -> "LocalTableScan") { - checkSparkAnswerAndOperator( - sql("select a, array_remove(a, 2) from int_array"), - classOf[LocalTableScanExec]) - checkSparkAnswerAndOperator( - sql("select a, array_remove(a, null) from int_array"), - classOf[LocalTableScanExec]) - } + values.toDF("a").createOrReplaceTempView("int_array") } + + private def registerStringArray(): Unit = { + val values: Seq[Option[Array[Option[String]]]] = Seq( + Some(Array(Some("one"), Some("two"), Some("three"))), + Some(Array(Some("one"), Some("two"), Some("two"))), + Some(Array(None, Some("two"), Some("two"))), + None, + Some(Array()), + Some(Array(None, None))) + values.toDF("a").createOrReplaceTempView("string_array") + } + } From f45b31e25cdd035f7a55bb6f38b4bd0986a85225 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 15 Jan 2025 08:59:36 -0700 Subject: [PATCH 4/4] format --- .../scala/org/apache/comet/CometExpressionSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 6270e5ec73..cdcfb42807 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -2566,10 +2566,10 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { CometConf.COMET_SPARK_TO_ARROW_ENABLED.key -> "true", CometConf.COMET_SPARK_TO_ARROW_SUPPORTED_OPERATOR_LIST.key -> "LocalTableScan") { for (query <- Seq( - "select a, array_remove(a, 'two') from string_array", - "select a, array_remove(a, '') from string_array", - "select a, array_remove(a, 'four') from string_array", - "select a, array_remove(a, null) from string_array")) { + "select a, array_remove(a, 'two') from string_array", + "select a, array_remove(a, '') from string_array", + "select a, array_remove(a, 'four') from string_array", + "select a, array_remove(a, null) from string_array")) { checkSparkAnswerAndOperator(sql(query), classOf[LocalTableScanExec]) } }