From d5a9505dd162e514bd6ebc8b45421904ec5e2d15 Mon Sep 17 00:00:00 2001 From: Jamison Date: Mon, 3 Feb 2025 16:39:52 -0800 Subject: [PATCH 1/2] SNOW-1887104: Update OBJECT semantics --- .../snowpark/_internal/type_utils.py | 8 ++++++ src/snowflake/snowpark/_internal/udf_utils.py | 6 ++++- src/snowflake/snowpark/types.py | 16 +++--------- src/snowflake/snowpark/udaf.py | 4 +-- tests/integ/scala/test_datatype_suite.py | 25 ++++++------------- 5 files changed, 26 insertions(+), 33 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index c42771f5e8..f7f9f872d6 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -198,6 +198,8 @@ def convert_sf_to_sp_type( return ArrayType(semi_structured_fill) if column_type_name == "VARIANT": return VariantType() + if context._should_use_structured_type_semantics() and column_type_name == "OBJECT": + return StructType() if column_type_name in {"OBJECT", "MAP"}: return MapType(semi_structured_fill, semi_structured_fill) if column_type_name == "GEOGRAPHY": @@ -678,6 +680,12 @@ def python_type_to_snow_type( if tp_args else None ) + if ( + key_type is None + or value_type is None + and context._should_use_structured_type_semantics() + ): + return StructType(), False return MapType(key_type, value_type), False if installed_pandas: diff --git a/src/snowflake/snowpark/_internal/udf_utils.py b/src/snowflake/snowpark/_internal/udf_utils.py index 0f9951270a..df4aef2613 100644 --- a/src/snowflake/snowpark/_internal/udf_utils.py +++ b/src/snowflake/snowpark/_internal/udf_utils.py @@ -1261,7 +1261,11 @@ def create_python_udf_or_sp( if replace and if_not_exists: raise ValueError("options replace and if_not_exists are incompatible") - if isinstance(return_type, StructType) and not return_type.structured: + if ( + isinstance(return_type, StructType) + and not return_type.structured + and object_type != TempObjectType.AGGREGATE_FUNCTION + ): return_sql = f'RETURNS TABLE ({",".join(f"{field.name} {convert_sp_to_sf_type(field.datatype)}" for field in return_type.fields)})' elif installed_pandas and isinstance(return_type, PandasDataFrameType): return_sql = f'RETURNS TABLE ({",".join(f"{name} {convert_sp_to_sf_type(datatype)}" for name, datatype in zip(return_type.col_names, return_type.col_types))})' diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index 95335e56fe..b64d4c9bb5 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -403,15 +403,9 @@ def __init__( value_contains_null: bool = True, ) -> None: if context._should_use_structured_type_semantics(): - if (key_type is None and value_type is not None) or ( - key_type is not None and value_type is None - ): - raise ValueError( - "Must either set both key_type and value_type or leave both unset." - ) - self.structured = ( - structured if structured is not None else key_type is not None - ) + if key_type is None or value_type is None: + raise ValueError("MapType requires key and value type be set.") + self.structured = True self.key_type = key_type self.value_type = value_type else: @@ -476,10 +470,6 @@ def valueType(self): def _fill_ast(self, ast: proto.SpDataType) -> None: ast.sp_map_type.structured = self.structured - if self.key_type is None or self.value_type is None: - raise NotImplementedError( - "SNOW-1862700: AST does not support empty key or value type." - ) self.key_type._fill_ast(ast.sp_map_type.key_ty) self.value_type._fill_ast(ast.sp_map_type.value_ty) diff --git a/src/snowflake/snowpark/udaf.py b/src/snowflake/snowpark/udaf.py index 10290eec9b..9432deea65 100644 --- a/src/snowflake/snowpark/udaf.py +++ b/src/snowflake/snowpark/udaf.py @@ -40,7 +40,7 @@ warning, ) from snowflake.snowpark.column import Column -from snowflake.snowpark.types import DataType, MapType +from snowflake.snowpark.types import DataType, MapType, StructType # Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable # Python 3.9 can use both @@ -717,7 +717,7 @@ def _do_register_udaf( "_do_register_udaf", "Snowflake does not support structured maps as return type for UDAFs. Downcasting to semi-structured object.", ) - return_type = MapType() + return_type = StructType() # Capture original parameters. if _emit_ast: diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index f411c64d3c..0877e1bc83 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -6,7 +6,6 @@ # Many of the tests have been moved to unit/scala/test_datattype_suite.py from decimal import Decimal -from unittest import mock import logging import pytest @@ -533,18 +532,10 @@ def test_structured_dtypes_negative(structured_type_session, structured_type_sup if not structured_type_support: pytest.skip("Test requires structured type support.") - # SNOW-1862700: Map Type missing element or value fails to generate AST. - with pytest.raises( - NotImplementedError, match="AST does not support empty key or value type." - ): - x = MapType() - x._fill_ast(mock.Mock()) + with pytest.raises(ValueError, match="MapType requires key and value type be set."): + MapType() - # Maptype requires both key and value type be set if either is set - with pytest.raises( - ValueError, - match="Must either set both key_type and value_type or leave both unset.", - ): + with pytest.raises(ValueError, match="MapType requires key and value type be set."): MapType(StringType()) @@ -586,7 +577,7 @@ def finish(self) -> dict: "Snowflake does not support structured maps as return type for UDAFs. Downcasting to semi-structured object." in caplog.text ) - assert MapCollector._return_type == MapType() + assert MapCollector._return_type == StructType() @pytest.mark.skipif( @@ -978,8 +969,8 @@ def test_structured_dtypes_cast(structured_type_session, structured_type_support expected_semi_schema = StructType( [ StructField("ARR", ArrayType(), nullable=True), - StructField("MAP", MapType(), nullable=True), - StructField("OBJ", MapType(), nullable=True), + StructField("MAP", StructType(), nullable=True), + StructField("OBJ", StructType(), nullable=True), ] ) expected_structured_schema = StructType( @@ -1008,8 +999,8 @@ def test_structured_dtypes_cast(structured_type_session, structured_type_support schema=StructType( [ StructField("arr", ArrayType()), - StructField("map", MapType()), - StructField("obj", MapType()), + StructField("map", StructType()), + StructField("obj", StructType()), ] ), ) From c83ebacb8646f78724e19417427d1971f297699a Mon Sep 17 00:00:00 2001 From: Jamison Date: Mon, 3 Feb 2025 17:00:59 -0800 Subject: [PATCH 2/2] test fix --- src/snowflake/snowpark/_internal/type_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index f7f9f872d6..6680865a1b 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -681,10 +681,8 @@ def python_type_to_snow_type( else None ) if ( - key_type is None - or value_type is None - and context._should_use_structured_type_semantics() - ): + key_type is None or value_type is None + ) and context._should_use_structured_type_semantics(): return StructType(), False return MapType(key_type, value_type), False