diff --git a/singer_sdk/helpers/_typing.py b/singer_sdk/helpers/_typing.py index 1c8f9f461..fc55a9679 100644 --- a/singer_sdk/helpers/_typing.py +++ b/singer_sdk/helpers/_typing.py @@ -5,6 +5,7 @@ import copy import datetime import decimal +import json import logging import math import typing as t @@ -542,9 +543,32 @@ def _conform_primitive_property( # noqa: PLR0911 # for BIT value, treat 0 as False and anything else as True return elem != b"\x00" if is_boolean_type(property_schema) else elem.hex() if isinstance(elem, (float, decimal.Decimal)): - if math.isnan(elem) or math.isinf(elem): - return None - return elem + return elem if math.isfinite(elem) else None + if isinstance(elem, str) and not is_string_type(property_schema): + return _transform_string_property(elem, property_schema) if _is_exclusive_boolean_type(property_schema): return None if elem is None else elem != 0 return elem + + +def _transform_string_property( # noqa: PLR0911 + elem: str, + property_schema: dict, +) -> t.Any: # noqa: ANN401 + if not elem and is_null_type(property_schema): + return None # if nullable, None for empty string + + if is_boolean_type(property_schema): + return ( + elem.lower() == "true" + ) # false for any non-"true" string (case-insensitive), including empty string + if is_integer_type(property_schema): + return int(elem or 0) # 0 for empty string + if is_number_type(property_schema): + d = decimal.Decimal(elem or 0) # 0 for empty string + return d if d.is_finite() else None + if is_array_type(property_schema): + return json.loads(elem) if elem else [] # empty array for empty string + if is_object_type(property_schema): + return json.loads(elem) if elem else {} # empty object for empty string + return elem diff --git a/tests/core/test_typing.py b/tests/core/test_typing.py index 44f344b97..b3991900e 100644 --- a/tests/core/test_typing.py +++ b/tests/core/test_typing.py @@ -14,6 +14,7 @@ from singer_sdk.helpers._typing import ( TypeConformanceLevel, _conform_primitive_property, + _transform_string_property, conform_record_data_types, ) from singer_sdk.typing import ( @@ -357,12 +358,68 @@ def test_conform_object_additional_properties(): pytest.param( decimal.Decimal("nan"), {"type": "number"}, None, id="decimal_nan_to_number" ), + pytest.param("", {"type": "string"}, "", id="string_empty_to_string"), + pytest.param( + "", + {"type": ["boolean", "null"]}, + None, + id="string_empty_to_any_nullable_non_string", + ), + pytest.param("true", {"type": "boolean"}, True, id="string_true_to_boolean"), + pytest.param( + "TRUE", + {"type": "boolean"}, + True, + id="string_true_uppercase_to_boolean", + ), + pytest.param("false", {"type": "boolean"}, False, id="string_false_to_boolean"), + pytest.param( + "something else", + {"type": "boolean"}, + False, + id="string_not_true_to_boolean", + ), + pytest.param("", {"type": "boolean"}, False, id="string_empty_to_boolean"), + pytest.param("3", {"type": "integer"}, 3, id="string_integer_to_integer"), + pytest.param("", {"type": "integer"}, 0, id="string_empty_to_integer"), + pytest.param( + "3.14", + {"type": "number"}, + decimal.Decimal("3.14"), + id="string_float_to_number", + ), + pytest.param("inf", {"type": "number"}, None, id="string_inf_to_number"), + pytest.param("nan", {"type": "number"}, None, id="string_nan_to_number"), + pytest.param( + "", + {"type": "number"}, + decimal.Decimal(0), + id="string_empty_to_number", + ), + pytest.param( + "[1, 2, 3]", + {"type": "array"}, + [1, 2, 3], + id="string_json_array_to_array", + ), + pytest.param("", {"type": "array"}, [], id="string_empty_to_array"), + pytest.param( + '{"a": 1, "b": true, "c": 3.14}', + {"type": "object"}, + {"a": 1, "b": True, "c": 3.14}, + id="string_json_object_to_object", + ), + pytest.param("", {"type": "object"}, {}, id="string_empty_to_object"), ], ) def test_conform_primitives(value: t.Any, type_dict: dict, expected: t.Any): assert _conform_primitive_property(value, type_dict) == expected +def test_transform_string_to_string(): + assert _transform_string_property("test", {"type": "string"}) == "test" + + @pytest.mark.filterwarnings("ignore:Use `JSONSchemaToSQL` instead.:DeprecationWarning") @pytest.mark.parametrize( "jsonschema_type,expected",