diff --git a/core/setup.py b/core/setup.py index 911109a65..345fb19a2 100644 --- a/core/setup.py +++ b/core/setup.py @@ -4,27 +4,27 @@ from setuptools import setup, find_namespace_packages if sys.version_info < (3, 7): - print('Error: Soda SQL requires at least Python 3.7') - print('Error: Please upgrade your Python version to 3.7 or later') + print("Error: Soda SQL requires at least Python 3.7") + print("Error: Please upgrade your Python version to 3.7 or later") sys.exit(1) package_name = "soda-sql-core" # Managed by tbump - don't change manually # And we can't have nice semver (..--) # like "-alpha-1" as long as this is open >> https://github.com/pypa/setuptools/issues/2181 -package_version = '2.1.0' +package_version = "2.1.0" description = "Soda SQL Core" long_description = (pathlib.Path(__file__).parent / "README.md").read_text() requires = [ - 'Jinja2>=2.11.3, <3.0', - 'click>=7.1.2, <8.0', - 'cryptography==3.3.2', - 'pyyaml>=5.4.1, <6.0', - 'requests>=2.23.0, <3.0', - 'Deprecated>=1.2.13, <1.3', - 'opentelemetry-exporter-otlp-proto-http>=1.6.2,<1.7', + "Jinja2>=2.11.3, <3.0", + "click>=8.0, <9.0", + "cryptography==3.3.2", + "pyyaml>=5.4.1, <6.0", + "requests>=2.23.0, <3.0", + "Deprecated>=1.2.13, <1.3", + "opentelemetry-exporter-otlp-proto-http>=1.6.2,<1.7", ] # TODO Fix the params # TODO Add a warning that installing core doesn't give any warehouse functionality @@ -38,24 +38,16 @@ long_description_content_type="text/markdown", packages=find_namespace_packages(include=["sodasql*"]), install_requires=requires, - entry_points={ - "console_scripts": - [ - "soda=sodasql.cli.cli:main" - ] - }, + entry_points={"console_scripts": ["soda=sodasql.cli.cli:main"]}, classifiers=[ - 'Development Status :: 4 - Beta', - - 'License :: OSI Approved :: Apache Software License', - - 'Operating System :: Microsoft :: Windows', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: POSIX :: Linux', - - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', + "Development Status :: 4 - Beta", + "License :: OSI Approved :: Apache Software License", + "Operating System :: Microsoft :: Windows", + "Operating System :: MacOS :: MacOS X", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], python_requires=">=3.7", ) diff --git a/core/sodasql/cli/ingest.py b/core/sodasql/cli/ingest.py index bbaeb7f71..691a2805f 100644 --- a/core/sodasql/cli/ingest.py +++ b/core/sodasql/cli/ingest.py @@ -25,6 +25,7 @@ @dataclasses.dataclass(frozen=True) class Table: """Represents a table.""" + name: str schema: str database: str @@ -54,10 +55,11 @@ def map_dbt_run_result_to_test_result( dbt_tests_with_soda_test = { test_node.unique_id: Test( id=test_node.unique_id, - title=f"Number of failures for {test_node.unique_id}", + title=f"{test_node.name}", expression=test_node.raw_sql, metrics=None, column=test_node.column_name, + source="dbt", ) for test_node in test_nodes.values() } @@ -107,9 +109,7 @@ def map_dbt_test_results_iterator( model_nodes, seed_nodes, test_nodes = soda_dbt.parse_manifest(manifest) parsed_run_results = soda_dbt.parse_run_results(run_results) - tests_with_test_result = map_dbt_run_result_to_test_result( - test_nodes, parsed_run_results - ) + tests_with_test_result = map_dbt_run_result_to_test_result(test_nodes, parsed_run_results) model_and_seed_nodes = {**model_nodes, **seed_nodes} models_with_tests = soda_dbt.create_nodes_to_tests_mapping( model_and_seed_nodes, test_nodes, parsed_run_results @@ -151,9 +151,7 @@ def flush_test_results( """ for table, test_results in test_results_iterator: test_results_jsons = [ - test_result.to_dict() - for test_result in test_results - if not test_result.skipped + test_result.to_dict() for test_result in test_results if not test_result.skipped ] if len(test_results_jsons) == 0: continue @@ -214,9 +212,7 @@ def ingest( raise ValueError(f"Dbt manifest is required: {dbt_manifest}") if dbt_run_results is None: raise ValueError(f"Dbt run results is required: {dbt_run_results}") - test_results_iterator = map_dbt_test_results_iterator( - dbt_manifest, dbt_run_results - ) + test_results_iterator = map_dbt_test_results_iterator(dbt_manifest, dbt_run_results) else: raise ValueError(f"Unknown tool: {tool}") diff --git a/core/sodasql/scan/test.py b/core/sodasql/scan/test.py index b242a49a1..12935a505 100644 --- a/core/sodasql/scan/test.py +++ b/core/sodasql/scan/test.py @@ -9,12 +9,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional, List from jinja2 import Template logger = logging.getLogger(__name__) + @dataclass class Test: id: str @@ -22,19 +23,28 @@ class Test: expression: str metrics: List[str] column: Optional[str] - expression_delimiters = ['<=', '>=', '<', '>', '=='] + expression_delimiters = ["<=", ">=", "<", ">", "=="] + source: str = field(default="") - def evaluate(self, test_variables: dict, group_values: Optional[dict] = None, - template_variables: Optional[dict] = None): + def evaluate( + self, + test_variables: dict, + group_values: Optional[dict] = None, + template_variables: Optional[dict] = None, + ): from sodasql.scan.test_result import TestResult try: values = {key: test_variables[key] for key in test_variables if key in self.metrics} if template_variables is not None: self.expression = Template(self.expression).render(template_variables) - if 'None' not in self.expression and any(v is None for v in values.values()): - logger.warning(f'Skipping test {self.expression} since corresponding metrics are None ({values}) ') - return TestResult(test=self, skipped=True, passed=True, values=values, group_values=group_values) + if "None" not in self.expression and any(v is None for v in values.values()): + logger.warning( + f"Skipping test {self.expression} since corresponding metrics are None ({values}) " + ) + return TestResult( + test=self, skipped=True, passed=True, values=values, group_values=group_values + ) else: passed = bool(eval(self.expression, test_variables)) @@ -43,16 +53,23 @@ def evaluate(self, test_variables: dict, group_values: Optional[dict] = None, if delimiter in self.expression: left, _, _ = self.expression.partition(delimiter) # Make sure the expression result is the first key in the resulting dict. - expression_result = {'expression_result': eval(left, test_variables)} + expression_result = {"expression_result": eval(left, test_variables)} expression_result.update(values) values = expression_result break - test_result = TestResult(test=self, passed=passed, skipped=False, values=values, - group_values=group_values) + test_result = TestResult( + test=self, + passed=passed, + skipped=False, + values=values, + group_values=group_values, + ) logger.debug(str(test_result)) return test_result except Exception as e: logger.error(f'Test error for "{self.expression}": {e}') - return TestResult(test=self, passed=False, skipped=False, error=e, group_values=group_values) + return TestResult( + test=self, passed=False, skipped=False, error=e, group_values=group_values + ) diff --git a/core/sodasql/scan/test_result.py b/core/sodasql/scan/test_result.py index 8b3c97f08..721843d12 100644 --- a/core/sodasql/scan/test_result.py +++ b/core/sodasql/scan/test_result.py @@ -28,43 +28,49 @@ class TestResult: def __str__(self): if self.passed: - status_str = 'passed' + status_str = "passed" elif self.skipped: - status_str = 'skipped' + status_str = "skipped" else: - status_str = 'failed' - return (f'Test {self.test.title} {status_str}' + - (f" with group values {self.group_values}" if self.group_values else '') + - f' with measurements {json.dumps(JsonHelper.to_jsonnable(self.values))}') + status_str = "failed" + return ( + f"Test {self.test.title} {status_str}" + + (f" with group values {self.group_values}" if self.group_values else "") + + f" with measurements {json.dumps(JsonHelper.to_jsonnable(self.values))}" + ) def to_dict(self) -> dict: if not self.test or not self.test.expression: - return { - 'error': 'Invalid test result' - } + return {"error": "Invalid test result"} test_result_json = { - 'id': self.test.id, - 'title': self.test.title, - 'description': self.test.title, # for backwards compatibility - 'expression': self.test.expression + "id": self.test.id, + "title": self.test.title, + "description": self.test.title, # for backwards compatibility + "expression": self.test.expression, } if self.test.column: - test_result_json['columnName'] = self.test.column + test_result_json["columnName"] = self.test.column + + if self.test.source: + test_result_json["source"] = self.test.source + else: + test_result_json["source"] = "soda-sql" if self.error: - test_result_json['error'] = str(self.error) + test_result_json["error"] = str(self.error) + else: - test_result_json['passed'] = self.passed - test_result_json['skipped'] = self.skipped - test_result_json['values'] = JsonHelper.to_jsonnable(self.values) + test_result_json["passed"] = self.passed + test_result_json["skipped"] = self.skipped + test_result_json["values"] = JsonHelper.to_jsonnable(self.values) if self.group_values: - test_result_json['groupValues'] = JsonHelper.to_jsonnable(self.group_values) + test_result_json["groupValues"] = JsonHelper.to_jsonnable(self.group_values) return test_result_json - @deprecated(version='2.1.0b19', reason='This function is deprecated, please use to_dict') + @deprecated(version="2.1.0b19", reason="This function is deprecated, please use to_dict") def to_json(self): return self.to_dict() diff --git a/dev-requirements.in b/dev-requirements.in index 64cc7f853..920b5382b 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -1,4 +1,4 @@ -pip-tools==5.5.0 +pip-tools==6.4.0 pytest~=6.0.1 python-dotenv~=0.13.0 tox~=3.24.0 @@ -9,6 +9,7 @@ requests==2.26.0 twine~=3.4.2 Faker==8.1.2 tbump==6.3.1 +click>=8.0,<9.0 urllib3==1.26.5 pygments==2.10.0 diff --git a/dev-requirements.txt b/dev-requirements.txt index 7235c814b..0bfb433b7 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -18,8 +18,10 @@ charset-normalizer==2.0.4 # via requests cli-ui==0.10.3 # via tbump -click==7.1.2 - # via pip-tools +click==8.0.3 + # via + # -r dev-requirements.in + # pip-tools colorama==0.4.4 # via # cli-ui @@ -57,7 +59,9 @@ packaging==20.9 # bleach # pytest # tox -pip-tools==5.5.0 +pep517==0.12.0 + # via pip-tools +pip-tools==6.4.0 # via -r dev-requirements.in pkginfo==1.7.0 # via twine @@ -125,6 +129,8 @@ toml==0.10.2 # via # pytest # tox +tomli==2.0.0 + # via pep517 tomlkit==0.5.11 # via tbump tox-docker==2.0.0 @@ -149,8 +155,11 @@ webencodings==0.5.1 # via bleach websocket-client==0.57.0 # via docker +wheel==0.37.0 + # via pip-tools zipp==3.5.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # pip +# setuptools diff --git a/packages/dbt/setup.py b/packages/dbt/setup.py index 3864da6bd..0f57ac7b4 100644 --- a/packages/dbt/setup.py +++ b/packages/dbt/setup.py @@ -3,18 +3,18 @@ from setuptools import setup, find_namespace_packages if sys.version_info < (3, 7): - print('Error: Soda SQL requires at least Python 3.7') - print('Error: Please upgrade your Python version to 3.7 or later') + print("Error: Soda SQL requires at least Python 3.7") + print("Error: Please upgrade your Python version to 3.7 or later") sys.exit(1) package_name = "soda-sql-dbt" -package_version = '2.1.0' +package_version = "2.1.0" # TODO Add proper description description = "Soda SQL DBT" requires = [ - f'soda-sql-core=={package_version}', - "dbt-core~=0.21.0", + f"soda-sql-core=={package_version}", + "dbt-core~=1.0.0", ] # TODO Fix the params # TODO Add a warning that installing core doesn't give any warehouse functionality @@ -22,5 +22,5 @@ name=package_name, version=package_version, install_requires=requires, - packages=find_namespace_packages(include=["sodasql*"]) + packages=find_namespace_packages(include=["sodasql*"]), ) diff --git a/packages/dbt/sodasql/dbt.py b/packages/dbt/sodasql/dbt.py index c726c338d..8e10d90c6 100644 --- a/packages/dbt/sodasql/dbt.py +++ b/packages/dbt/sodasql/dbt.py @@ -9,12 +9,12 @@ from dbt.contracts.graph.compiled import ( CompiledModelNode, - CompiledSchemaTestNode, + CompiledGenericTestNode, CompiledSeedNode, ) from dbt.contracts.graph.parsed import ( ParsedModelNode, - ParsedSchemaTestNode, + ParsedGenericTestNode, ParsedSeedNode, ) from dbt.contracts.results import RunResultOutput @@ -26,7 +26,7 @@ def parse_manifest( ) -> tuple[ dict[str, ParsedModelNode | CompileModelNode], dict[str, ParsedSeedNode | CompiledSeedNode], - dict[str, ParsedSchemaTestNode | CompiledSchemaTestNode], + dict[str, ParsedGenericTestNode | CompiledGenericTestNode], ]: """ Parse the manifest. @@ -43,7 +43,7 @@ def parse_manifest( out : tuple[ dict[str, ParsedModelNode | CompileModelNode], dict[str, ParsedSeedNode | CompiledSeedNode], - dict[str, ParsedSchemaTestNode | CompiledSchemaTestNode], + dict[str, ParsedGenericTestNode | CompiledGenericTestNode], ] The parsed manifest. @@ -56,9 +56,9 @@ def parse_manifest( ------ https://docs.getdbt.com/reference/artifacts/manifest-json """ - dbt_v3_schema = "https://schemas.getdbt.com/dbt/manifest/v3.json" - if manifest["metadata"]["dbt_schema_version"] != dbt_v3_schema: - raise NotImplementedError("Dbt manifest parsing only supported for V3 schema.") + dbt_v4_schema = "https://schemas.getdbt.com/dbt/manifest/v4.json" + if manifest["metadata"]["dbt_schema_version"] != dbt_v4_schema: + raise NotImplementedError("Dbt manifest parsing only supported for V4 schema.") model_nodes = { node_name: CompiledModelNode(**node) @@ -68,16 +68,14 @@ def parse_manifest( if node["resource_type"] == NodeType.Model } seed_nodes = { - node_name: CompiledSeedNode(**node) - if "compiled" in node.keys() - else ParsedSeedNode(**node) + node_name: CompiledSeedNode(**node) if "compiled" in node.keys() else ParsedSeedNode(**node) for node_name, node in manifest["nodes"].items() if node["resource_type"] == NodeType.Seed } test_nodes = { - node_name: CompiledSchemaTestNode(**node) + node_name: CompiledGenericTestNode(**node) if "compiled" in node.keys() - else ParsedSchemaTestNode(**node) + else ParsedGenericTestNode(**node) for node_name, node in manifest["nodes"].items() if node["resource_type"] == NodeType.Test } @@ -109,21 +107,17 @@ def parse_run_results(run_results: dict[str, Any]) -> list[RunResultOutput]: ------ https://docs.getdbt.com/reference/artifacts/run-results-json """ - dbt_v3_schema = "https://schemas.getdbt.com/dbt/run-results/v3.json" - if run_results["metadata"]["dbt_schema_version"] != dbt_v3_schema: - raise NotImplementedError( - "Dbt run results parsing only supported for V3 schema." - ) - - parsed_run_results = [ - RunResultOutput(**result) for result in run_results["results"] - ] + dbt_v4_schema = "https://schemas.getdbt.com/dbt/run-results/v4.json" + if run_results["metadata"]["dbt_schema_version"] != dbt_v4_schema: + raise NotImplementedError("Dbt run results parsing only supported for v4 schema.") + + parsed_run_results = [RunResultOutput(**result) for result in run_results["results"]] return parsed_run_results def create_nodes_to_tests_mapping( model_nodes: dict[str, ParsedModelNode], - test_nodes: dict[str, CompiledSchemaTestNode], + test_nodes: dict[str, CompiledGenericTestNode], run_results: list[RunResultOutput], ) -> dict[str, set[ParsedModelNode]]: """ @@ -133,7 +127,7 @@ def create_nodes_to_tests_mapping( ---------- model_nodes : Dict[str: ParsedModelNode] The parsed model nodes. - test_nodes : Dict[str: CompiledSchemaTestNode] + test_nodes : Dict[str: CompiledGenericTestNode] The compiled schema test nodes. run_results : List[RunResultOutput] The run results. @@ -156,10 +150,7 @@ def create_nodes_to_tests_mapping( model_unique_ids = reduce( or_, - [ - model_unique_ids - for model_unique_ids in models_that_tests_depends_on.values() - ], + [model_unique_ids for model_unique_ids in models_that_tests_depends_on.values()], ) models_with_tests = defaultdict(set) diff --git a/tests/dbt/data/manifest.json b/tests/dbt/data/manifest.json index e78bff8d1..542d429d9 100644 --- a/tests/dbt/data/manifest.json +++ b/tests/dbt/data/manifest.json @@ -1,6 +1,6 @@ { "metadata": { - "dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v3.json" + "dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v4.json" }, "nodes": { "seed.soda.soda_warehouses": { diff --git a/tests/dbt/data/run_results.json b/tests/dbt/data/run_results.json index 31e7c0830..2e2c1cce5 100644 --- a/tests/dbt/data/run_results.json +++ b/tests/dbt/data/run_results.json @@ -1,6 +1,6 @@ { "metadata": { - "dbt_schema_version": "https://schemas.getdbt.com/dbt/run-results/v3.json" + "dbt_schema_version": "https://schemas.getdbt.com/dbt/run-results/v4.json" }, "results": [ {