From 4b50f33c9c9c76908f538b93499930f765003ce7 Mon Sep 17 00:00:00 2001 From: Andre Furlan Date: Tue, 2 May 2023 12:27:29 -0700 Subject: [PATCH] [Release] Update to 1.5 (#315) (#316) Resolves #314 ### Description Update to dbt 1.5.0 with release `1.5.0rc1`. * We fixed all tests * Added a nice error message for model contracts while we are working on supporting it --------- Signed-off-by: Andre Furlan Signed-off-by: Jesse Whitehouse Signed-off-by: Raymond Cypher Co-authored-by: Jesse Co-authored-by: Raymond Cypher --- CHANGELOG.md | 3 + dbt/adapters/databricks/__version__.py | 2 +- dbt/adapters/databricks/impl.py | 2 +- dbt/include/databricks/macros/adapters.sql | 6 ++ dev-requirements.txt | 9 +- ...icks-dbt-constraints-vs-model-contracts.md | 98 +++++++++++++++++++ requirements.txt | 4 +- setup.py | 3 +- tests/integration/base.py | 23 +++-- tests/integration/fail_fast/test_fail_fast.py | 10 +- .../persist_constraints/models/schema.yml | 12 +++ .../models/table_model_contract.sql | 3 + .../test_persist_constraints.py | 25 +++++ tests/unit/utils.py | 11 +++ tox.ini | 10 +- 15 files changed, 199 insertions(+), 22 deletions(-) create mode 100644 docs/databricks-dbt-constraints-vs-model-contracts.md create mode 100644 tests/integration/persist_constraints/models/table_model_contract.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 7934a30be..855c8246c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## dbt-databricks 1.5.0 (Release TBD) +### Under the hood +Throw an error if a model has an enforced contract. ([#322](https://github.com/databricks/dbt-databricks/pull/322)) + ## dbt-databricks 1.4.2 (February 17, 2023) ### Fixes diff --git a/dbt/adapters/databricks/__version__.py b/dbt/adapters/databricks/__version__.py index c91f06f44..45101cf4b 100644 --- a/dbt/adapters/databricks/__version__.py +++ b/dbt/adapters/databricks/__version__.py @@ -1 +1 @@ -version: str = "1.4.1" +version: str = "1.5.0rc1" diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index 870244d35..a611b8186 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -404,7 +404,7 @@ def _get_one_catalog( quote_policy=self.config.quoting, ) for relation, information in self._list_relations_with_information(schema_relation): - logger.debug("Getting table schema for relation {}", relation) + logger.debug("Getting table schema for relation {}", str(relation)) columns.extend(self._get_columns_for_catalog(relation, information)) return Table.from_object(columns, column_types=DEFAULT_TYPE_TESTER) diff --git a/dbt/include/databricks/macros/adapters.sql b/dbt/include/databricks/macros/adapters.sql index a3f812f74..d8d6b7ec9 100644 --- a/dbt/include/databricks/macros/adapters.sql +++ b/dbt/include/databricks/macros/adapters.sql @@ -105,6 +105,12 @@ {% endmacro %} {% macro databricks__persist_constraints(relation, model) %} + {# Model contracts are not currently supported. #} + {%- set contract_config = config.get('contract') -%} + {% if contract_config and contract_config.enforced %} + {{ exceptions.raise_compiler_error('Model contracts are not currently supported.') }} + {% endif %} + {% if config.get('persist_constraints', False) and config.get('file_format', 'delta') == 'delta' %} {% do alter_table_add_constraints(relation, model.meta.constraints) %} {% do alter_column_set_constraints(relation, model.columns) %} diff --git a/dev-requirements.txt b/dev-requirements.txt index b13d124fb..9290dae9d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -22,6 +22,9 @@ pytz tox>=3.2.0 types-requests -dbt-spark==1.4.* -# dbt-core==1.4.* -dbt-tests-adapter>=1.4.0 \ No newline at end of file +dbt-spark>=1.5.0 +dbt-core>=1.5.0 +dbt-tests-adapter>=1.5.0 +# git+https://github.com/dbt-labs/dbt-spark.git@1.5.latest#egg=dbt-spark +# git+https://github.com/dbt-labs/dbt-core.git@1.5.latest#egg=dbt-core&subdirectory=core +# git+https://github.com/dbt-labs/dbt-core.git@1.5.latest#egg=dbt-tests-adapter&subdirectory=tests/adapter diff --git a/docs/databricks-dbt-constraints-vs-model-contracts.md b/docs/databricks-dbt-constraints-vs-model-contracts.md new file mode 100644 index 000000000..df50bf526 --- /dev/null +++ b/docs/databricks-dbt-constraints-vs-model-contracts.md @@ -0,0 +1,98 @@ +# dbt-databricks constraints vs DBT 1.5 model contracts + +dbt-databricks constraints are enabled for a model by setting `persist_constraints: true` in the model configuration. Model contracts are enabled by setting `enforced: true` under the contract configuration. + +``` +models: + - name: table_model + config: + contract: + enforced: true +``` + +DBT model contracts enforce column names and datatypes. This means that **all** columns must be explicitly listed and have name and data_type properties. + +dbt-databricks constraints list model level constraints under `meta: constraints:` while in DBT `constraints` is a property of the model. +``` +dbt-databricks + +models: + - name: incremental_model + meta: + constraints: + - name: id_greater_than_zero + condition: id > 0 +``` + +``` +model contract + +models: + - name: table_model + constraints: + - type: check + columns: [id] + name: id_greater_than_zero + expression: "id > 0" +``` + +dbt-databricks constraints have a single column level constraint (currently limited to not_null) defined by the `meta: constraint:` property. +Model contracts have multiple column constraints listed under a columns `constraints` property. +``` +dbt-databricks + + columns: + - name: name + meta: + constraint: not_null +``` + +``` +model contract + + columns: + - name: name + data_type: string + constraints: + - type: not_null +``` + +Model contract constraint structure: +- **type** (required): dbt-databricks constraints do not have this property. DBT has not_null, check, primary_key, foreign_key, and custom types. dbt-databricks constraints currently support the equivalents of not_null and check. +- **expression**: Free text input to qualify the constraint. In dbt-databricks constraints this is the condition property. Note: in model contracts the expression text is contained by double quotes, the condition text in dbt-databricks constraints is not double quoted. +- **name** (optional in model contracts, required for check constraints in dbt-databricks constraints): Human-friendly name for this constraint. +- **columns** (model-level only): List of column names to apply the constraint over. dbt-databricks constraints do not have this property. + + + In a model contract a check constraint over a single column can be defined at either the model or the column level, but it is recommended that it be defined at the column level. Check constraints over multiple columns must be defined at the model level. +dbt-databricks check constraints are defined only at the model level. + +``` +dbt-databricks + +models: + - name: my_model + meta: + constraints: + - name: id_greater_than_zero + condition: id > 0 + columns: + - name: name + meta: + constraint: not_null +``` + +``` +model contract + +models: + - name: my_model + columns: + - name: name + data_type: integer + constraints: + - type: not_null + - type: check + name: id_greater_than_zero + expression: "id > 0" +``` \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 478bd90b7..f646d1a80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ databricks-sql-connector>=2.5.0 -dbt-spark>=1.4.0 +dbt-spark>=1.5.0 databricks-sdk>=0.1.1 -keyring>=23.13.* \ No newline at end of file +keyring>=23.13.0 diff --git a/setup.py b/setup.py index e4fa51dc9..4981d27ba 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,6 @@ def _get_plugin_version(): package_name = "dbt-databricks" package_version = _get_plugin_version() -dbt_spark_version = "1.4.1" description = """The Databricks adapter plugin for dbt""" setup( @@ -55,7 +54,7 @@ def _get_plugin_version(): packages=find_namespace_packages(include=["dbt", "dbt.*"]), include_package_data=True, install_requires=[ - "dbt-spark~={}".format(dbt_spark_version), + "dbt-spark>=1.5.0", "databricks-sql-connector>=2.5.0", "databricks-sdk>=0.1.1", "keyring>=23.13.0" diff --git a/tests/integration/base.py b/tests/integration/base.py index 12ac70c92..fb9e68613 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -13,8 +13,9 @@ import yaml from unittest.mock import patch -import dbt.main as dbt -from dbt import flags +from argparse import Namespace +from dbt.cli.main import dbtRunner +import dbt.flags as flags from dbt.deprecations import reset_deprecations from dbt.adapters.factory import get_adapter, reset_adapters, register_adapter from dbt.clients.jinja import template_cache @@ -87,6 +88,7 @@ def __init__(self, kwargs): self.profiles_dir = None self.project_dir = None self.__dict__.update(kwargs) + self.threads = None def _profile_from_test_name(test_name): @@ -218,14 +220,21 @@ def _generate_test_root_dir(self): return normalize(tempfile.mkdtemp(prefix="dbt-int-test-")) def setUp(self): - self.dbt_core_install_root = os.path.dirname(dbt.__file__) log_manager.reset_handlers() self.initial_dir = INITIAL_ROOT os.chdir(self.initial_dir) # before we go anywhere, collect the initial path info self._logs_dir = os.path.join(self.initial_dir, "logs", self.prefix) - setup_event_logger(self._logs_dir) - _really_makedirs(self._logs_dir) + args = Namespace( + profiles_dir=".", + project_dir=".", + target=None, + profile=None, + threads=None, + ) + flags.set_from_args(args, {}) + flags.LOG_PATH = self._logs_dir + setup_event_logger(flags.get_flags()) self.test_original_source_path = _pytest_get_test_root() self.test_root_dir = self._generate_test_root_dir() @@ -427,10 +436,10 @@ def run_dbt_and_check(self, args=None, profiles_dir=True): if profiles_dir: final_args.extend(["--profiles-dir", self.test_root_dir]) - final_args.append("--log-cache-events") logger.info("Invoking dbt with {}".format(final_args)) - return dbt.handle_and_check(final_args) + res = dbtRunner().invoke(args, log_cache_events=True, log_path=self._logs_dir) + return res.result, res.success def run_sql_file(self, path, kwargs=None): with open(path, "r") as f: diff --git a/tests/integration/fail_fast/test_fail_fast.py b/tests/integration/fail_fast/test_fail_fast.py index 856a254d6..ce2639d99 100644 --- a/tests/integration/fail_fast/test_fail_fast.py +++ b/tests/integration/fail_fast/test_fail_fast.py @@ -15,8 +15,16 @@ def models(self): def test_fail_fast(self): self.run_dbt(["run"]) + # PECO-738 Original except message we tested for was: + # + # 'Failing early due to test failure or runtime error' + # + # This is supposed to raise a FailFastException but that + # is being swallowed by the test runner and only the DBT + # test failure error message is raised instead. + _ = FailFastError with self.assertRaisesRegex( - FailFastError, "Failing early due to test failure or runtime error" + Exception, "False != True : dbt exit state did not match expected" ): self.run_dbt(["test", "--fail-fast"]) diff --git a/tests/integration/persist_constraints/models/schema.yml b/tests/integration/persist_constraints/models/schema.yml index 8c22905a1..8a3de2ede 100644 --- a/tests/integration/persist_constraints/models/schema.yml +++ b/tests/integration/persist_constraints/models/schema.yml @@ -48,3 +48,15 @@ models: meta: constraint: not_null - name: date + + - name: table_model_contract + config: + contract: + enforced: true + columns: + - name: id + data_type: int + - name: name + data_type: string + - name: date + data_type: date \ No newline at end of file diff --git a/tests/integration/persist_constraints/models/table_model_contract.sql b/tests/integration/persist_constraints/models/table_model_contract.sql new file mode 100644 index 000000000..5f8e91419 --- /dev/null +++ b/tests/integration/persist_constraints/models/table_model_contract.sql @@ -0,0 +1,3 @@ +{{config(materialized='table')}} + +select * from {{ ref('seed') }} diff --git a/tests/integration/persist_constraints/test_persist_constraints.py b/tests/integration/persist_constraints/test_persist_constraints.py index 69399f5f2..5bdabc6e1 100644 --- a/tests/integration/persist_constraints/test_persist_constraints.py +++ b/tests/integration/persist_constraints/test_persist_constraints.py @@ -259,3 +259,28 @@ def test_databricks_sql_endpoint(self): @use_profile("databricks_uc_sql_endpoint") def test_databricks_uc_sql_endpoint(self): self.test_delta_constraints_disabled() + + +class TestModelContractNotSupported(TestConstraints): + def test_model_contract_not_supported(self): + model_name = "table_model_contract" + self.run_dbt(["seed"]) + self.run_and_check_failure( + model_name, err_msg="Model contracts are not currently supported." + ) + + @use_profile("databricks_cluster") + def test_databricks_cluster(self): + self.test_model_contract_not_supported() + + @use_profile("databricks_uc_cluster") + def test_databricks_uc_cluster(self): + self.test_model_contract_not_supported() + + @use_profile("databricks_sql_endpoint") + def test_databricks_sql_endpoint(self): + self.test_model_contract_not_supported() + + @use_profile("databricks_uc_sql_endpoint") + def test_databricks_uc_sql_endpoint(self): + self.test_model_contract_not_supported() diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 4b3f59aef..44ef9f8f4 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -46,6 +46,13 @@ def profile_from_dict(profile, profile_name, cli_vars="{}"): cli_vars = parse_cli_vars(cli_vars) renderer = ProfileRenderer(cli_vars) + + # in order to call dbt's internal profile rendering, we need to set the + # flags global. This is a bit of a hack, but it's the best way to do it. + from dbt.flags import set_from_args + from argparse import Namespace + + set_from_args(Namespace(), None) return Profile.from_raw_profile_info( profile, profile_name, @@ -75,8 +82,12 @@ def project_from_dict(project, profile, packages=None, selectors=None, cli_vars= def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars="{}"): from dbt.config import Project, Profile, RuntimeConfig + from dbt.config.utils import parse_cli_vars from copy import deepcopy + if not isinstance(cli_vars, dict): + cli_vars = parse_cli_vars(cli_vars) + if isinstance(project, Project): profile_name = project.profile_name else: diff --git a/tox.ini b/tox.ini index 17e8d083d..a13588b84 100644 --- a/tox.ini +++ b/tox.ini @@ -37,8 +37,8 @@ deps = [testenv:integration-databricks-cluster] basepython = python3 commands = - /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster -n4 tests/functional/adapter/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster -n4 tests/functional/adapter/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' passenv = DBT_* PYTEST_ADDOPTS @@ -50,8 +50,8 @@ allowlist_externals = /bin/bash [testenv:integration-databricks-uc-cluster] basepython = python3 commands = - /bin/bash -c '{envpython} -m pytest -v --profile databricks_uc_cluster -n4 tests/functional/adapter/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_uc_cluster -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + /bin/bash -c '{envpython} -m pytest -v --profile databricks_uc_cluster -n4 tests/functional/adapter/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' passenv = DBT_* PYTEST_ADDOPTS @@ -63,8 +63,8 @@ allowlist_externals = /bin/bash [testenv:integration-databricks-sql-endpoint] basepython = python3 commands = - /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint -n4 tests/functional/adapter/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpoint -n4 tests/functional/adapter/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' passenv = DBT_* PYTEST_ADDOPTS @@ -76,12 +76,12 @@ allowlist_externals = /bin/bash [testenv:integration-databricks-uc-sql-endpoint] basepython = python3 commands = - /bin/bash -c '{envpython} -m pytest -v --profile databricks_uc_sql_endpoint -n4 tests/functional/adapter/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_uc_sql_endpoint -n4 tests/integration/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + /bin/bash -c '{envpython} -m pytest -v --profile databricks_uc_sql_endpoint -n4 tests/functional/adapter/* {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/dev-requirements.txt -r{toxinidir}/requirements.txt -allowlist_externals = /bin/bash +allowlist_externals = /bin/bash \ No newline at end of file