dbt-labs · colin-rogers-dbt · Feb 4, 2025 · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Added new equals macro that handles null value checks in sql
+time: 2024-12-17T11:05:36.363421+02:00
+custom:
+    Author: adrianburusdbt,versusfacit
+    Issue: "159"
@@ -104,7 +104,12 @@
         "name": "require_batched_execution_for_custom_microbatch_strategy",
         "default": False,
         "docs_url": "https://docs.getdbt.com/docs/build/incremental-microbatch",
-    }
+    },
+    {
+        "name": "enable_truthy_nulls_equals_macro",
+        "default": False,
+        "docs_url": "",
+    },
 ]
 
 

@@ -21,9 +21,9 @@
                 {% do predicates.append(this_key_match) %}
             {% endfor %}
         {% else %}
-            {% set unique_key_match %}
-                DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}
-            {% endset %}
+            {% set source_unique_key = ("DBT_INTERNAL_SOURCE." ~ unique_key) | trim %}
+	    {% set target_unique_key = ("DBT_INTERNAL_DEST." ~ unique_key) | trim %}
+	    {% set unique_key_match = equals(source_unique_key, target_unique_key) | trim %}
             {% do predicates.append(unique_key_match) %}
         {% endif %}
     {% else %}
@@ -62,11 +62,13 @@
 
     {% if unique_key %}
         {% if unique_key is sequence and unique_key is not string %}
-            delete from {{target }}
+            delete from {{ target }}
             using {{ source }}
             where (
                 {% for key in unique_key %}
-                    {{ source }}.{{ key }} = {{ target }}.{{ key }}
+		    {% set source_unique_key = (source ~ "." ~ key) | trim %}
+		    {% set target_unique_key = (target ~ "." ~ key) | trim %}
+                    {{ equals(source_unique_key, target_unique_key) }}
                     {{ "and " if not loop.last}}
                 {% endfor %}
                 {% if incremental_predicates %}

@@ -55,8 +55,11 @@
         from {{ target_relation }}
         where
             {% if config.get('dbt_valid_to_current') %}
-               {# Check for either dbt_valid_to_current OR null, in order to correctly update records with nulls #}
-               ( {{ columns.dbt_valid_to }} = {{ config.get('dbt_valid_to_current') }} or {{ columns.dbt_valid_to }} is null)
+		{% set source_unique_key = columns.dbt_valid_to | trim %}
+		{% set target_unique_key = config.get('dbt_valid_to_current') | trim %}
+
+		{# The exact equals semantics between NULL values depends on the current behavior flag set. Also, update records if the source field is null #}
+                ( {{ equals(source_unique_key, target_unique_key) }} or {{ source_unique_key }} is null )
             {% else %}
                 {{ columns.dbt_valid_to }} is null
             {% endif %}
@@ -279,7 +282,9 @@
 {% macro unique_key_join_on(unique_key, identifier, from_identifier) %}
     {% if unique_key | is_list %}
         {% for key in unique_key %}
-            {{ identifier }}.dbt_unique_key_{{ loop.index }} = {{ from_identifier }}.dbt_unique_key_{{ loop.index }}
+	    {% set source_unique_key = (identifier ~ ".dbt_unique_key_" ~ loop.index) | trim %}
+	    {% set target_unique_key = (from_identifier ~ ".dbt_unique_key_" ~ loop.index) | trim %}
+	    {{ equals(source_unique_key, target_unique_key) }}
             {%- if not loop.last %} and {%- endif %}
         {% endfor %}
     {% else %}

@@ -15,8 +15,10 @@
 
     when matched
      {% if config.get("dbt_valid_to_current") %}
-       and (DBT_INTERNAL_DEST.{{ columns.dbt_valid_to }} = {{ config.get('dbt_valid_to_current') }} or
-            DBT_INTERNAL_DEST.{{ columns.dbt_valid_to }} is null)
+	{% set source_unique_key = ("DBT_INTERNAL_DEST." ~ columns.dbt_valid_to) | trim %}
+	{% set target_unique_key = config.get('dbt_valid_to_current') | trim %}
+	and ({{ equals(source_unique_key, target_unique_key) }} or {{ source_unique_key }} is null)
+
      {% else %}
        and DBT_INTERNAL_DEST.{{ columns.dbt_valid_to }} is null
      {% endif %}

@@ -0,0 +1,14 @@
+{% macro equals(expr1, expr2) %}
+    {{ return(adapter.dispatch('equals', 'dbt') (expr1, expr2)) }}
+{%- endmacro %}
+
+{% macro default__equals(expr1, expr2) -%}
+{%- if adapter.behavior.enable_truthy_nulls_equals_macro.no_warn %}
+    case when (({{ expr1 }} = {{ expr2 }}) or ({{ expr1 }} is null and {{ expr2 }} is null))
+        then 0
+        else 1
+    end = 0
+{%- else -%}
+    ({{ expr1 }} = {{ expr2 }})
+{%- endif %}
+{% endmacro %}
@@ -1,5 +1,150 @@
-from dbt.tests.adapter.incremental.test_incremental_unique_id import BaseIncrementalUniqueKey
+import pytest
 
+from dbt.contracts.results import RunStatus
 
-class TestUniqueKeyBigQuery(BaseIncrementalUniqueKey):
+from dbt.tests.adapter.incremental.test_incremental_unique_id import SubBaseIncrementalUniqueKey
+
+
+class IncrementalUniqueKeyFalseyNullsEquals(SubBaseIncrementalUniqueKey):
+    def test__bad_unique_key(self, project):
+        """expect compilation error from unique key not being a column"""
+
+        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
+            incremental_model_name="not_found_unique_key"
+        )
+
+        assert status == RunStatus.Error
+        assert "thisisnotacolumn" in exc.lower()
+
+    # test unique_key as list
+    def test__empty_unique_key_list(self, project):
+        """with no unique keys, seed and model should match"""
+
+        expected_fields = self.get_expected_fields(relation="seed", seed_rows=9)
+        test_case_fields = self.get_test_fields(
+            project,
+            seed="seed",
+            incremental_model="empty_unique_key_list",
+            update_sql_file="add_new_rows",
+        )
+        self.check_scenario_correctness(expected_fields, test_case_fields, project)
+
+    def test__one_unique_key(self, project):
+        """with one unique key, model will overwrite existing row"""
+
+        expected_fields = self.get_expected_fields(
+            relation="one_str__overwrite", seed_rows=8, opt_model_count=1
+        )
+        test_case_fields = self.get_test_fields(
+            project,
+            seed="seed",
+            incremental_model="str_unique_key",
+            update_sql_file="duplicate_insert",
+            opt_model_count=self.update_incremental_model("one_str__overwrite"),
+        )
+        self.check_scenario_correctness(expected_fields, test_case_fields, project)
+
+    def test__bad_unique_key_list(self, project):
+        """expect compilation error from unique key not being a column"""
+
+        (status, exc) = self.fail_to_build_inc_missing_unique_key_column(
+            incremental_model_name="not_found_unique_key_list"
+        )
+
+        assert status == RunStatus.Error
+        assert "thisisnotacolumn" in exc.lower()
+
+
+class IncrementalUniqueKeyTruthyNullsEquals(SubBaseIncrementalUniqueKey):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {"flags": {"enable_truthy_nulls_equals_macro": True}}
+
+    # no unique_key test
+    def test__no_unique_keys(self, project):
+        """with no unique keys, seed and model should match"""
+
+        expected_fields = self.get_expected_fields(relation="seed", seed_rows=9)
+        test_case_fields = self.get_test_fields(
+            project, seed="seed", incremental_model="no_unique_key", update_sql_file="add_new_rows"
+        )
+        self.check_scenario_correctness(expected_fields, test_case_fields, project)
+
+    # unique_key as str tests
+    def test__empty_str_unique_key(self, project):
+        """with empty string for unique key, seed and model should match"""
+
+        expected_fields = self.get_expected_fields(relation="seed", seed_rows=9)
+        test_case_fields = self.get_test_fields(
+            project,
+            seed="seed",
+            incremental_model="empty_str_unique_key",
+            update_sql_file="add_new_rows",
+        )
+        self.check_scenario_correctness(expected_fields, test_case_fields, project)
+
+    def test__unary_unique_key_list(self, project):
+        """with one unique key, model will overwrite existing row"""
+
+        expected_fields = self.get_expected_fields(
+            relation="unique_key_list__inplace_overwrite", seed_rows=8, opt_model_count=1
+        )
+        test_case_fields = self.get_test_fields(
+            project,
+            seed="seed",
+            incremental_model="unary_unique_key_list",
+            update_sql_file="duplicate_insert",
+            opt_model_count=self.update_incremental_model("unique_key_list__inplace_overwrite"),
+        )
+        self.check_scenario_correctness(expected_fields, test_case_fields, project)
+
+    def test__duplicated_unary_unique_key_list(self, project):
+        """with two of the same unique key, model will overwrite existing row"""
+
+        expected_fields = self.get_expected_fields(
+            relation="unique_key_list__inplace_overwrite", seed_rows=8, opt_model_count=1
+        )
+        test_case_fields = self.get_test_fields(
+            project,
+            seed="seed",
+            incremental_model="duplicated_unary_unique_key_list",
+            update_sql_file="duplicate_insert",
+            opt_model_count=self.update_incremental_model("unique_key_list__inplace_overwrite"),
+        )
+        self.check_scenario_correctness(expected_fields, test_case_fields, project)
+
+    def test__trinary_unique_key_list(self, project):
+        """with three unique keys, model will overwrite existing row"""
+
+        expected_fields = self.get_expected_fields(
+            relation="unique_key_list__inplace_overwrite", seed_rows=8, opt_model_count=1
+        )
+        test_case_fields = self.get_test_fields(
+            project,
+            seed="seed",
+            incremental_model="trinary_unique_key_list",
+            update_sql_file="duplicate_insert",
+            opt_model_count=self.update_incremental_model("unique_key_list__inplace_overwrite"),
+        )
+        self.check_scenario_correctness(expected_fields, test_case_fields, project)
+
+    def test__trinary_unique_key_list_no_update(self, project):
+        """even with three unique keys, adding distinct rows to seed does not
+        cause seed and model to diverge"""
+
+        expected_fields = self.get_expected_fields(relation="seed", seed_rows=9)
+        test_case_fields = self.get_test_fields(
+            project,
+            seed="seed",
+            incremental_model="nontyped_trinary_unique_key_list",
+            update_sql_file="add_new_rows",
+        )
+        self.check_scenario_correctness(expected_fields, test_case_fields, project)
+
+
+class TestIncrementalUniqueKeyFalseyNullsEquals(IncrementalUniqueKeyFalseyNullsEquals):
+    pass
+
+
+class TestIncrementalUniqueKeyTruthyNullsEquals(IncrementalUniqueKeyTruthyNullsEquals):
     pass