From 7fd48171a5aa5422261ad8ef4fe04ef90537ba8a Mon Sep 17 00:00:00 2001 From: Jon Malkin <786705+jmalkin@users.noreply.github.com> Date: Tue, 28 Jan 2025 16:05:21 -0800 Subject: [PATCH 1/3] Rename packages to use org.apache.spark.sql.datasketches and update imports as needed --- .../DatasketchesFunctionRegistry.scala | 2 +- .../DatasketchesScalaFunctionsBase.scala | 3 ++- .../sql/{registrar => kll}/KllFunctionRegistry.scala | 8 +++++--- .../sql/kll/aggregate/KllDoublesSketchAggBuild.scala | 6 ++++-- .../sql/kll/aggregate/KllDoublesSketchAggMerge.scala | 5 +++-- .../kll/expressions/KllDoublesSketchExpressions.scala | 4 ++-- .../functions.scala} | 10 ++++++---- .../spark/sql/kll/types/KllDoublesSketchType.scala | 3 ++- .../{registrar => theta}/ThetaFunctionRegistry.scala | 4 +++- .../functions.scala} | 7 +++++-- .../org/apache/spark/sql/SparkSessionManager.scala | 3 ++- .../scala/org/apache/spark/sql/{ => kll}/KllTest.scala | 9 +++++---- .../org/apache/spark/sql/{ => theta}/ThetaTest.scala | 8 +++++--- 13 files changed, 45 insertions(+), 27 deletions(-) rename src/main/scala/org/apache/spark/sql/{registrar => common}/DatasketchesFunctionRegistry.scala (98%) rename src/main/scala/org/apache/spark/sql/{registrar => common}/DatasketchesScalaFunctionsBase.scala (94%) rename src/main/scala/org/apache/spark/sql/{registrar => kll}/KllFunctionRegistry.scala (76%) rename src/main/scala/org/apache/spark/sql/{registrar/functions_datasketches_kll.scala => kll/functions.scala} (92%) rename src/main/scala/org/apache/spark/sql/{registrar => theta}/ThetaFunctionRegistry.scala (87%) rename src/main/scala/org/apache/spark/sql/{registrar/functions_datasketches_theta.scala => theta/functions.scala} (88%) rename src/test/scala/org/apache/spark/sql/{ => kll}/KllTest.scala (97%) rename src/test/scala/org/apache/spark/sql/{ => theta}/ThetaTest.scala (94%) diff --git a/src/main/scala/org/apache/spark/sql/registrar/DatasketchesFunctionRegistry.scala b/src/main/scala/org/apache/spark/sql/common/DatasketchesFunctionRegistry.scala similarity index 98% rename from src/main/scala/org/apache/spark/sql/registrar/DatasketchesFunctionRegistry.scala rename to src/main/scala/org/apache/spark/sql/common/DatasketchesFunctionRegistry.scala index 86095d8..bdc8235 100644 --- a/src/main/scala/org/apache/spark/sql/registrar/DatasketchesFunctionRegistry.scala +++ b/src/main/scala/org/apache/spark/sql/common/DatasketchesFunctionRegistry.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.registrar +package org.apache.spark.sql.datasketches.common import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.FunctionIdentifier diff --git a/src/main/scala/org/apache/spark/sql/registrar/DatasketchesScalaFunctionsBase.scala b/src/main/scala/org/apache/spark/sql/common/DatasketchesScalaFunctionsBase.scala similarity index 94% rename from src/main/scala/org/apache/spark/sql/registrar/DatasketchesScalaFunctionsBase.scala rename to src/main/scala/org/apache/spark/sql/common/DatasketchesScalaFunctionsBase.scala index f0ee23a..b060dad 100644 --- a/src/main/scala/org/apache/spark/sql/registrar/DatasketchesScalaFunctionsBase.scala +++ b/src/main/scala/org/apache/spark/sql/common/DatasketchesScalaFunctionsBase.scala @@ -15,10 +15,11 @@ * limitations under the License. */ -package org.apache.spark.sql +package org.apache.spark.sql.datasketches.common import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction +import org.apache.spark.sql.Column // this interfact provides a few helper methods defines and maps all the variants of each function invocation, analagous // to the functions object in core Spark's org.apache.spark.sql.functions diff --git a/src/main/scala/org/apache/spark/sql/registrar/KllFunctionRegistry.scala b/src/main/scala/org/apache/spark/sql/kll/KllFunctionRegistry.scala similarity index 76% rename from src/main/scala/org/apache/spark/sql/registrar/KllFunctionRegistry.scala rename to src/main/scala/org/apache/spark/sql/kll/KllFunctionRegistry.scala index a1fe6a1..550e6c3 100644 --- a/src/main/scala/org/apache/spark/sql/registrar/KllFunctionRegistry.scala +++ b/src/main/scala/org/apache/spark/sql/kll/KllFunctionRegistry.scala @@ -15,13 +15,15 @@ * limitations under the License. */ -package org.apache.spark.sql.registrar +package org.apache.spark.sql.datasketches.kll import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions.{ExpressionInfo} -import org.apache.spark.sql.aggregate.{KllDoublesSketchAggBuild, KllDoublesSketchAggMerge} -import org.apache.spark.sql.expressions.{KllDoublesSketchGetMin, KllDoublesSketchGetMax, KllDoublesSketchGetPmf, KllDoublesSketchGetCdf} +import org.apache.spark.sql.datasketches.common.DatasketchesFunctionRegistry +import org.apache.spark.sql.datasketches.kll.aggregate.{KllDoublesSketchAggBuild, KllDoublesSketchAggMerge} +import org.apache.spark.sql.datasketches.kll.expressions.{KllDoublesSketchGetMin, KllDoublesSketchGetMax, KllDoublesSketchGetPmf, KllDoublesSketchGetCdf} +import org.apache.spark.sql.datasketches.common.DatasketchesFunctionRegistry object KllFunctionRegistry extends DatasketchesFunctionRegistry { override val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map( diff --git a/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggBuild.scala b/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggBuild.scala index 53225ea..7e02240 100644 --- a/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggBuild.scala +++ b/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggBuild.scala @@ -15,14 +15,16 @@ * limitations under the License. */ -package org.apache.spark.sql.aggregate +package org.apache.spark.sql.datasketches.kll.aggregate import org.apache.datasketches.kll.{KllSketch, KllDoublesSketch} +import org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType + import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate import org.apache.spark.sql.catalyst.trees.BinaryLike -import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegerType, LongType, NumericType, FloatType, DoubleType, KllDoublesSketchType} +import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegerType, LongType, NumericType, FloatType, DoubleType} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult /** diff --git a/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggMerge.scala b/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggMerge.scala index bf4c51c..cf0dc7e 100644 --- a/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggMerge.scala +++ b/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggMerge.scala @@ -15,16 +15,17 @@ * limitations under the License. */ -package org.apache.spark.sql.aggregate +package org.apache.spark.sql.datasketches.kll.aggregate import org.apache.datasketches.memory.Memory import org.apache.datasketches.kll.{KllSketch, KllDoublesSketch} +import org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate import org.apache.spark.sql.catalyst.trees.BinaryLike -import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegerType, KllDoublesSketchType} +import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegerType} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult /** diff --git a/src/main/scala/org/apache/spark/sql/kll/expressions/KllDoublesSketchExpressions.scala b/src/main/scala/org/apache/spark/sql/kll/expressions/KllDoublesSketchExpressions.scala index 0ff03e6..f59b0dd 100644 --- a/src/main/scala/org/apache/spark/sql/kll/expressions/KllDoublesSketchExpressions.scala +++ b/src/main/scala/org/apache/spark/sql/kll/expressions/KllDoublesSketchExpressions.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package org.apache.spark.sql.expressions +package org.apache.spark.sql.datasketches.kll.expressions import org.apache.datasketches.memory.Memory import org.apache.datasketches.kll.KllDoublesSketch import org.apache.datasketches.quantilescommon.QuantileSearchCriteria -import org.apache.spark.sql.types.KllDoublesSketchType +import org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType import org.apache.spark.sql.types.{AbstractDataType, ArrayType, BooleanType, DataType, DoubleType} import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ExpectsInputTypes, ImplicitCastInputTypes} diff --git a/src/main/scala/org/apache/spark/sql/registrar/functions_datasketches_kll.scala b/src/main/scala/org/apache/spark/sql/kll/functions.scala similarity index 92% rename from src/main/scala/org/apache/spark/sql/registrar/functions_datasketches_kll.scala rename to src/main/scala/org/apache/spark/sql/kll/functions.scala index 734110c..fe48c1c 100644 --- a/src/main/scala/org/apache/spark/sql/registrar/functions_datasketches_kll.scala +++ b/src/main/scala/org/apache/spark/sql/kll/functions.scala @@ -15,16 +15,18 @@ * limitations under the License. */ -package org.apache.spark.sql +package org.apache.spark.sql.datasketches.kll +import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.functions.lit import org.apache.spark.sql.types.{ArrayType, BooleanType, DoubleType} -import org.apache.spark.sql.aggregate.{KllDoublesSketchAggMerge, KllDoublesSketchAggBuild} -import org.apache.spark.sql.expressions.{KllDoublesSketchGetMin, KllDoublesSketchGetMax, KllDoublesSketchGetPmfCdf} +import org.apache.spark.sql.datasketches.common.DatasketchesScalaFunctionBase +import org.apache.spark.sql.datasketches.kll.aggregate.{KllDoublesSketchAggMerge, KllDoublesSketchAggBuild} +import org.apache.spark.sql.datasketches.kll.expressions.{KllDoublesSketchGetMin, KllDoublesSketchGetMax, KllDoublesSketchGetPmfCdf} -object functions_datasketches_kll extends DatasketchesScalaFunctionBase { +object functions extends DatasketchesScalaFunctionBase { // build sketch def kll_sketch_double_agg_build(expr: Column, k: Column): Column = withAggregateFunction { diff --git a/src/main/scala/org/apache/spark/sql/kll/types/KllDoublesSketchType.scala b/src/main/scala/org/apache/spark/sql/kll/types/KllDoublesSketchType.scala index 2fb01e9..a058190 100644 --- a/src/main/scala/org/apache/spark/sql/kll/types/KllDoublesSketchType.scala +++ b/src/main/scala/org/apache/spark/sql/kll/types/KllDoublesSketchType.scala @@ -15,11 +15,12 @@ * limitations under the License. */ -package org.apache.spark.sql.types +package org.apache.spark.sql.datasketches.kll.types import org.apache.spark.sql.functions.udf import org.apache.datasketches.kll.KllDoublesSketch import org.apache.datasketches.memory.Memory +import org.apache.spark.sql.types.{DataType, DataTypes, UDTRegistration, UserDefinedType} class KllDoublesSketchType extends UserDefinedType[KllDoublesSketch] with Serializable { override def sqlType: DataType = DataTypes.BinaryType diff --git a/src/main/scala/org/apache/spark/sql/registrar/ThetaFunctionRegistry.scala b/src/main/scala/org/apache/spark/sql/theta/ThetaFunctionRegistry.scala similarity index 87% rename from src/main/scala/org/apache/spark/sql/registrar/ThetaFunctionRegistry.scala rename to src/main/scala/org/apache/spark/sql/theta/ThetaFunctionRegistry.scala index 0062154..02c00ee 100644 --- a/src/main/scala/org/apache/spark/sql/registrar/ThetaFunctionRegistry.scala +++ b/src/main/scala/org/apache/spark/sql/theta/ThetaFunctionRegistry.scala @@ -15,13 +15,15 @@ * limitations under the License. */ -package org.apache.spark.sql.registrar +package org.apache.spark.sql.datasketches.theta import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions.{ExpressionInfo} +import org.apache.spark.sql.datasketches.common.DatasketchesFunctionRegistry import org.apache.spark.sql.aggregate.{ThetaSketchAggBuild, ThetaSketchAggUnion} import org.apache.spark.sql.expressions.ThetaSketchGetEstimate +import org.apache.spark.sql.datasketches.common.DatasketchesFunctionRegistry object ThetaFunctionRegistry extends DatasketchesFunctionRegistry { override val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map( diff --git a/src/main/scala/org/apache/spark/sql/registrar/functions_datasketches_theta.scala b/src/main/scala/org/apache/spark/sql/theta/functions.scala similarity index 88% rename from src/main/scala/org/apache/spark/sql/registrar/functions_datasketches_theta.scala rename to src/main/scala/org/apache/spark/sql/theta/functions.scala index 3c33054..2cdd222 100644 --- a/src/main/scala/org/apache/spark/sql/registrar/functions_datasketches_theta.scala +++ b/src/main/scala/org/apache/spark/sql/theta/functions.scala @@ -15,14 +15,17 @@ * limitations under the License. */ -package org.apache.spark.sql +package org.apache.spark.sql.datasketches.theta +import org.apache.spark.sql.Column import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.datasketches.common.DatasketchesScalaFunctionBase import org.apache.spark.sql.aggregate.{ThetaSketchAggBuild, ThetaSketchAggUnion} import org.apache.spark.sql.expressions.ThetaSketchGetEstimate +import org.apache.spark.sql.datasketches.common.DatasketchesScalaFunctionBase -object functions_datasketches_theta extends DatasketchesScalaFunctionBase { +object functions extends DatasketchesScalaFunctionBase { def theta_sketch_agg_build(column: Column, lgk: Int): Column = withAggregateFunction { new ThetaSketchAggBuild(column.expr, lgk) } diff --git a/src/test/scala/org/apache/spark/sql/SparkSessionManager.scala b/src/test/scala/org/apache/spark/sql/SparkSessionManager.scala index 4c96cb4..8aa20bf 100644 --- a/src/test/scala/org/apache/spark/sql/SparkSessionManager.scala +++ b/src/test/scala/org/apache/spark/sql/SparkSessionManager.scala @@ -15,11 +15,12 @@ * limitations under the License. */ -package org.apache.spark.sql +package org.apache.spark.sql.datasketches.common import org.apache.log4j.{Level, Logger} import org.scalatest.BeforeAndAfterAll import org.scalatest.funsuite.AnyFunSuite +import org.apache.spark.sql.SparkSession /** * This class provides a common base for tests. It can perhaps diff --git a/src/test/scala/org/apache/spark/sql/KllTest.scala b/src/test/scala/org/apache/spark/sql/kll/KllTest.scala similarity index 97% rename from src/test/scala/org/apache/spark/sql/KllTest.scala rename to src/test/scala/org/apache/spark/sql/kll/KllTest.scala index 5123774..0a17a9d 100644 --- a/src/test/scala/org/apache/spark/sql/KllTest.scala +++ b/src/test/scala/org/apache/spark/sql/kll/KllTest.scala @@ -15,16 +15,17 @@ * limitations under the License. */ -package org.apache.spark.sql +package org.apache.spark.sql.datasketches.kll import scala.util.Random +import org.apache.spark.sql.Row import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{StructType, StructField, IntegerType, BinaryType} -import org.apache.spark.sql.functions_datasketches_kll._ import org.apache.datasketches.kll.KllDoublesSketch -import org.apache.spark.sql.types.KllDoublesSketchType -import org.apache.spark.sql.registrar.KllFunctionRegistry +import org.apache.spark.sql.datasketches.kll.functions._ +import org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType +import org.apache.spark.sql.datasketches.common.SparkSessionManager class KllTest extends SparkSessionManager { import spark.implicits._ diff --git a/src/test/scala/org/apache/spark/sql/ThetaTest.scala b/src/test/scala/org/apache/spark/sql/theta/ThetaTest.scala similarity index 94% rename from src/test/scala/org/apache/spark/sql/ThetaTest.scala rename to src/test/scala/org/apache/spark/sql/theta/ThetaTest.scala index 6c93d25..53bd0aa 100644 --- a/src/test/scala/org/apache/spark/sql/ThetaTest.scala +++ b/src/test/scala/org/apache/spark/sql/theta/ThetaTest.scala @@ -15,10 +15,12 @@ * limitations under the License. */ -package org.apache.spark.sql +package org.apache.spark.sql.theta -import org.apache.spark.sql.functions_datasketches_theta._ -import org.apache.spark.sql.registrar.ThetaFunctionRegistry +import org.apache.spark.sql.Row +import org.apache.spark.sql.datasketches.common.SparkSessionManager +import org.apache.spark.sql.datasketches.theta.functions._ +import org.apache.spark.sql.datasketches.theta.ThetaFunctionRegistry class ThetaTest extends SparkSessionManager { import spark.implicits._ From c44364acf593e9754e04cfe90984f28f396dbfca Mon Sep 17 00:00:00 2001 From: Jon Malkin <786705+jmalkin@users.noreply.github.com> Date: Tue, 28 Jan 2025 16:06:52 -0800 Subject: [PATCH 2/3] Add datasketches directory to hierarchy --- .../{ => datasketches}/common/DatasketchesFunctionRegistry.scala | 0 .../common/DatasketchesScalaFunctionsBase.scala | 0 .../spark/sql/{ => datasketches}/kll/KllFunctionRegistry.scala | 0 .../kll/aggregate/KllDoublesSketchAggBuild.scala | 0 .../kll/aggregate/KllDoublesSketchAggMerge.scala | 0 .../kll/expressions/KllDoublesSketchExpressions.scala | 0 .../org/apache/spark/sql/{ => datasketches}/kll/functions.scala | 0 .../sql/{ => datasketches}/kll/types/KllDoublesSketchType.scala | 0 .../sql/{ => datasketches}/theta/ThetaFunctionRegistry.scala | 0 .../{ => datasketches}/theta/aggregate/ThetaSketchAggBuild.scala | 0 .../{ => datasketches}/theta/aggregate/ThetaSketchAggUnion.scala | 0 .../{ => datasketches}/theta/expressions/ThetaExpressions.scala | 0 .../org/apache/spark/sql/{ => datasketches}/theta/functions.scala | 0 .../sql/{ => datasketches}/theta/types/ThetaSketchType.scala | 0 .../sql/{ => datasketches}/theta/types/ThetaSketchWrapper.scala | 0 .../apache/spark/sql/{ => datasketches}/SparkSessionManager.scala | 0 .../org/apache/spark/sql/{ => datasketches}/kll/KllTest.scala | 0 .../org/apache/spark/sql/{ => datasketches}/theta/ThetaTest.scala | 0 18 files changed, 0 insertions(+), 0 deletions(-) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/common/DatasketchesFunctionRegistry.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/common/DatasketchesScalaFunctionsBase.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/kll/KllFunctionRegistry.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/kll/aggregate/KllDoublesSketchAggBuild.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/kll/aggregate/KllDoublesSketchAggMerge.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/kll/expressions/KllDoublesSketchExpressions.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/kll/functions.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/kll/types/KllDoublesSketchType.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/theta/ThetaFunctionRegistry.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/theta/aggregate/ThetaSketchAggBuild.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/theta/aggregate/ThetaSketchAggUnion.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/theta/expressions/ThetaExpressions.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/theta/functions.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/theta/types/ThetaSketchType.scala (100%) rename src/main/scala/org/apache/spark/sql/{ => datasketches}/theta/types/ThetaSketchWrapper.scala (100%) rename src/test/scala/org/apache/spark/sql/{ => datasketches}/SparkSessionManager.scala (100%) rename src/test/scala/org/apache/spark/sql/{ => datasketches}/kll/KllTest.scala (100%) rename src/test/scala/org/apache/spark/sql/{ => datasketches}/theta/ThetaTest.scala (100%) diff --git a/src/main/scala/org/apache/spark/sql/common/DatasketchesFunctionRegistry.scala b/src/main/scala/org/apache/spark/sql/datasketches/common/DatasketchesFunctionRegistry.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/common/DatasketchesFunctionRegistry.scala rename to src/main/scala/org/apache/spark/sql/datasketches/common/DatasketchesFunctionRegistry.scala diff --git a/src/main/scala/org/apache/spark/sql/common/DatasketchesScalaFunctionsBase.scala b/src/main/scala/org/apache/spark/sql/datasketches/common/DatasketchesScalaFunctionsBase.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/common/DatasketchesScalaFunctionsBase.scala rename to src/main/scala/org/apache/spark/sql/datasketches/common/DatasketchesScalaFunctionsBase.scala diff --git a/src/main/scala/org/apache/spark/sql/kll/KllFunctionRegistry.scala b/src/main/scala/org/apache/spark/sql/datasketches/kll/KllFunctionRegistry.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/kll/KllFunctionRegistry.scala rename to src/main/scala/org/apache/spark/sql/datasketches/kll/KllFunctionRegistry.scala diff --git a/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggBuild.scala b/src/main/scala/org/apache/spark/sql/datasketches/kll/aggregate/KllDoublesSketchAggBuild.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggBuild.scala rename to src/main/scala/org/apache/spark/sql/datasketches/kll/aggregate/KllDoublesSketchAggBuild.scala diff --git a/src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggMerge.scala b/src/main/scala/org/apache/spark/sql/datasketches/kll/aggregate/KllDoublesSketchAggMerge.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/kll/aggregate/KllDoublesSketchAggMerge.scala rename to src/main/scala/org/apache/spark/sql/datasketches/kll/aggregate/KllDoublesSketchAggMerge.scala diff --git a/src/main/scala/org/apache/spark/sql/kll/expressions/KllDoublesSketchExpressions.scala b/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchExpressions.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/kll/expressions/KllDoublesSketchExpressions.scala rename to src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchExpressions.scala diff --git a/src/main/scala/org/apache/spark/sql/kll/functions.scala b/src/main/scala/org/apache/spark/sql/datasketches/kll/functions.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/kll/functions.scala rename to src/main/scala/org/apache/spark/sql/datasketches/kll/functions.scala diff --git a/src/main/scala/org/apache/spark/sql/kll/types/KllDoublesSketchType.scala b/src/main/scala/org/apache/spark/sql/datasketches/kll/types/KllDoublesSketchType.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/kll/types/KllDoublesSketchType.scala rename to src/main/scala/org/apache/spark/sql/datasketches/kll/types/KllDoublesSketchType.scala diff --git a/src/main/scala/org/apache/spark/sql/theta/ThetaFunctionRegistry.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/ThetaFunctionRegistry.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/theta/ThetaFunctionRegistry.scala rename to src/main/scala/org/apache/spark/sql/datasketches/theta/ThetaFunctionRegistry.scala diff --git a/src/main/scala/org/apache/spark/sql/theta/aggregate/ThetaSketchAggBuild.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggBuild.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/theta/aggregate/ThetaSketchAggBuild.scala rename to src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggBuild.scala diff --git a/src/main/scala/org/apache/spark/sql/theta/aggregate/ThetaSketchAggUnion.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggUnion.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/theta/aggregate/ThetaSketchAggUnion.scala rename to src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggUnion.scala diff --git a/src/main/scala/org/apache/spark/sql/theta/expressions/ThetaExpressions.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/expressions/ThetaExpressions.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/theta/expressions/ThetaExpressions.scala rename to src/main/scala/org/apache/spark/sql/datasketches/theta/expressions/ThetaExpressions.scala diff --git a/src/main/scala/org/apache/spark/sql/theta/functions.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/functions.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/theta/functions.scala rename to src/main/scala/org/apache/spark/sql/datasketches/theta/functions.scala diff --git a/src/main/scala/org/apache/spark/sql/theta/types/ThetaSketchType.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchType.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/theta/types/ThetaSketchType.scala rename to src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchType.scala diff --git a/src/main/scala/org/apache/spark/sql/theta/types/ThetaSketchWrapper.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchWrapper.scala similarity index 100% rename from src/main/scala/org/apache/spark/sql/theta/types/ThetaSketchWrapper.scala rename to src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchWrapper.scala diff --git a/src/test/scala/org/apache/spark/sql/SparkSessionManager.scala b/src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala similarity index 100% rename from src/test/scala/org/apache/spark/sql/SparkSessionManager.scala rename to src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala diff --git a/src/test/scala/org/apache/spark/sql/kll/KllTest.scala b/src/test/scala/org/apache/spark/sql/datasketches/kll/KllTest.scala similarity index 100% rename from src/test/scala/org/apache/spark/sql/kll/KllTest.scala rename to src/test/scala/org/apache/spark/sql/datasketches/kll/KllTest.scala diff --git a/src/test/scala/org/apache/spark/sql/theta/ThetaTest.scala b/src/test/scala/org/apache/spark/sql/datasketches/theta/ThetaTest.scala similarity index 100% rename from src/test/scala/org/apache/spark/sql/theta/ThetaTest.scala rename to src/test/scala/org/apache/spark/sql/datasketches/theta/ThetaTest.scala From 050710fcf4dcf70dd23f29122657be2e6b1be6ac Mon Sep 17 00:00:00 2001 From: Jon Malkin <786705+jmalkin@users.noreply.github.com> Date: Tue, 28 Jan 2025 17:57:55 -0800 Subject: [PATCH 3/3] Rename theta packages, too --- .../sql/datasketches/theta/ThetaFunctionRegistry.scala | 4 ++-- .../datasketches/theta/aggregate/ThetaSketchAggBuild.scala | 6 ++++-- .../datasketches/theta/aggregate/ThetaSketchAggUnion.scala | 5 +++-- .../datasketches/theta/expressions/ThetaExpressions.scala | 5 +++-- .../org/apache/spark/sql/datasketches/theta/functions.scala | 4 ++-- .../sql/datasketches/theta/types/ThetaSketchType.scala | 4 +++- .../sql/datasketches/theta/types/ThetaSketchWrapper.scala | 3 ++- 7 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/main/scala/org/apache/spark/sql/datasketches/theta/ThetaFunctionRegistry.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/ThetaFunctionRegistry.scala index 02c00ee..ddbe615 100644 --- a/src/main/scala/org/apache/spark/sql/datasketches/theta/ThetaFunctionRegistry.scala +++ b/src/main/scala/org/apache/spark/sql/datasketches/theta/ThetaFunctionRegistry.scala @@ -21,8 +21,8 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions.{ExpressionInfo} import org.apache.spark.sql.datasketches.common.DatasketchesFunctionRegistry -import org.apache.spark.sql.aggregate.{ThetaSketchAggBuild, ThetaSketchAggUnion} -import org.apache.spark.sql.expressions.ThetaSketchGetEstimate +import org.apache.spark.sql.datasketches.theta.aggregate.{ThetaSketchAggBuild, ThetaSketchAggUnion} +import org.apache.spark.sql.datasketches.theta.expressions.ThetaSketchGetEstimate import org.apache.spark.sql.datasketches.common.DatasketchesFunctionRegistry object ThetaFunctionRegistry extends DatasketchesFunctionRegistry { diff --git a/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggBuild.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggBuild.scala index d88af6e..95dbd61 100644 --- a/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggBuild.scala +++ b/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggBuild.scala @@ -15,14 +15,16 @@ * limitations under the License. */ -package org.apache.spark.sql.aggregate +package org.apache.spark.sql.datasketches.theta.aggregate import org.apache.datasketches.theta.{UpdateSketch, SetOperation} +import org.apache.spark.sql.datasketches.theta.types.{ThetaSketchType, ThetaSketchWrapper} + import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate import org.apache.spark.sql.catalyst.trees.BinaryLike -import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegerType, LongType, NumericType, FloatType, DoubleType, ThetaSketchWrapper, ThetaSketchType} +import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegerType, LongType, NumericType, FloatType, DoubleType} /** * The ThetaSketchBuild function creates a Theta sketch from a column of values diff --git a/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggUnion.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggUnion.scala index 107382a..6e067c9 100644 --- a/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggUnion.scala +++ b/src/main/scala/org/apache/spark/sql/datasketches/theta/aggregate/ThetaSketchAggUnion.scala @@ -15,16 +15,17 @@ * limitations under the License. */ -package org.apache.spark.sql.aggregate +package org.apache.spark.sql.datasketches.theta.aggregate import org.apache.datasketches.memory.Memory import org.apache.datasketches.theta.{Sketch, SetOperation} +import org.apache.spark.sql.datasketches.theta.types.{ThetaSketchType, ThetaSketchWrapper} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate import org.apache.spark.sql.catalyst.trees.BinaryLike -import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegerType, ThetaSketchWrapper, ThetaSketchType} +import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegerType} /** * Theta Union operation. diff --git a/src/main/scala/org/apache/spark/sql/datasketches/theta/expressions/ThetaExpressions.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/expressions/ThetaExpressions.scala index f30735c..c426ee4 100644 --- a/src/main/scala/org/apache/spark/sql/datasketches/theta/expressions/ThetaExpressions.scala +++ b/src/main/scala/org/apache/spark/sql/datasketches/theta/expressions/ThetaExpressions.scala @@ -15,16 +15,17 @@ * limitations under the License. */ -package org.apache.spark.sql.expressions +package org.apache.spark.sql.datasketches.theta.expressions import org.apache.datasketches.memory.Memory import org.apache.datasketches.theta.Sketch +import org.apache.spark.sql.datasketches.theta.types.ThetaSketchType import org.apache.spark.sql.catalyst.expressions.{Expression, ExpectsInputTypes, UnaryExpression} import org.apache.spark.sql.catalyst.expressions.NullIntolerant import org.apache.spark.sql.catalyst.expressions.ExpressionDescription import org.apache.spark.sql.catalyst.expressions.codegen.{CodeBlock, CodegenContext, ExprCode} -import org.apache.spark.sql.types.{AbstractDataType, DataType, DoubleType, ThetaSketchType} +import org.apache.spark.sql.types.{AbstractDataType, DataType, DoubleType} @ExpressionDescription( usage = """ diff --git a/src/main/scala/org/apache/spark/sql/datasketches/theta/functions.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/functions.scala index 2cdd222..2a788a7 100644 --- a/src/main/scala/org/apache/spark/sql/datasketches/theta/functions.scala +++ b/src/main/scala/org/apache/spark/sql/datasketches/theta/functions.scala @@ -21,8 +21,8 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.functions.lit import org.apache.spark.sql.datasketches.common.DatasketchesScalaFunctionBase -import org.apache.spark.sql.aggregate.{ThetaSketchAggBuild, ThetaSketchAggUnion} -import org.apache.spark.sql.expressions.ThetaSketchGetEstimate +import org.apache.spark.sql.datasketches.theta.aggregate.{ThetaSketchAggBuild, ThetaSketchAggUnion} +import org.apache.spark.sql.datasketches.theta.expressions.ThetaSketchGetEstimate import org.apache.spark.sql.datasketches.common.DatasketchesScalaFunctionBase object functions extends DatasketchesScalaFunctionBase { diff --git a/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchType.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchType.scala index e5a5e2c..cfb27ad 100644 --- a/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchType.scala +++ b/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchType.scala @@ -15,7 +15,9 @@ * limitations under the License. */ -package org.apache.spark.sql.types +package org.apache.spark.sql.datasketches.theta.types + +import org.apache.spark.sql.types.{DataType, DataTypes, UserDefinedType} class ThetaSketchType extends UserDefinedType[ThetaSketchWrapper] { override def sqlType: DataType = DataTypes.BinaryType diff --git a/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchWrapper.scala b/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchWrapper.scala index 86e3b89..a699017 100644 --- a/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchWrapper.scala +++ b/src/main/scala/org/apache/spark/sql/datasketches/theta/types/ThetaSketchWrapper.scala @@ -15,10 +15,11 @@ * limitations under the License. */ -package org.apache.spark.sql.types +package org.apache.spark.sql.datasketches.theta.types import org.apache.datasketches.theta.{UpdateSketch, CompactSketch, Union} import org.apache.datasketches.memory.Memory +import org.apache.spark.sql.types.SQLUserDefinedType @SQLUserDefinedType(udt = classOf[ThetaSketchType]) class ThetaSketchWrapper(var updateSketch: Option[UpdateSketch] = None, var compactSketch: Option[CompactSketch] = None, var union: Option[Union] = None) {