Skip to content

Commit

Permalink
feat: add dbt as source, fix test names, use dbt v1 schemas (sodadata…
Browse files Browse the repository at this point in the history
…#598)

* fix test name display, add source, bump dbt v1

Co-authored-by: Vijay Kiran <[email protected]>
  • Loading branch information
bastienboutonnet and vijaykiran authored Dec 14, 2021
1 parent 45bb3fa commit ddb23ed
Show file tree
Hide file tree
Showing 10 changed files with 119 additions and 107 deletions.
46 changes: 19 additions & 27 deletions core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,27 @@
from setuptools import setup, find_namespace_packages

if sys.version_info < (3, 7):
print('Error: Soda SQL requires at least Python 3.7')
print('Error: Please upgrade your Python version to 3.7 or later')
print("Error: Soda SQL requires at least Python 3.7")
print("Error: Please upgrade your Python version to 3.7 or later")
sys.exit(1)

package_name = "soda-sql-core"
# Managed by tbump - don't change manually
# And we can't have nice semver (<major>.<minor>.<patch>-<pre-release>-<build>)
# like "-alpha-1" as long as this is open >> https://github.com/pypa/setuptools/issues/2181
package_version = '2.1.0'
package_version = "2.1.0"
description = "Soda SQL Core"

long_description = (pathlib.Path(__file__).parent / "README.md").read_text()

requires = [
'Jinja2>=2.11.3, <3.0',
'click>=7.1.2, <8.0',
'cryptography==3.3.2',
'pyyaml>=5.4.1, <6.0',
'requests>=2.23.0, <3.0',
'Deprecated>=1.2.13, <1.3',
'opentelemetry-exporter-otlp-proto-http>=1.6.2,<1.7',
"Jinja2>=2.11.3, <3.0",
"click>=8.0, <9.0",
"cryptography==3.3.2",
"pyyaml>=5.4.1, <6.0",
"requests>=2.23.0, <3.0",
"Deprecated>=1.2.13, <1.3",
"opentelemetry-exporter-otlp-proto-http>=1.6.2,<1.7",
]
# TODO Fix the params
# TODO Add a warning that installing core doesn't give any warehouse functionality
Expand All @@ -38,24 +38,16 @@
long_description_content_type="text/markdown",
packages=find_namespace_packages(include=["sodasql*"]),
install_requires=requires,
entry_points={
"console_scripts":
[
"soda=sodasql.cli.cli:main"
]
},
entry_points={"console_scripts": ["soda=sodasql.cli.cli:main"]},
classifiers=[
'Development Status :: 4 - Beta',

'License :: OSI Approved :: Apache Software License',

'Operating System :: Microsoft :: Windows',
'Operating System :: MacOS :: MacOS X',
'Operating System :: POSIX :: Linux',

'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
"Development Status :: 4 - Beta",
"License :: OSI Approved :: Apache Software License",
"Operating System :: Microsoft :: Windows",
"Operating System :: MacOS :: MacOS X",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
],
python_requires=">=3.7",
)
16 changes: 6 additions & 10 deletions core/sodasql/cli/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
@dataclasses.dataclass(frozen=True)
class Table:
"""Represents a table."""

name: str
schema: str
database: str
Expand Down Expand Up @@ -54,10 +55,11 @@ def map_dbt_run_result_to_test_result(
dbt_tests_with_soda_test = {
test_node.unique_id: Test(
id=test_node.unique_id,
title=f"Number of failures for {test_node.unique_id}",
title=f"{test_node.name}",
expression=test_node.raw_sql,
metrics=None,
column=test_node.column_name,
source="dbt",
)
for test_node in test_nodes.values()
}
Expand Down Expand Up @@ -107,9 +109,7 @@ def map_dbt_test_results_iterator(

model_nodes, seed_nodes, test_nodes = soda_dbt.parse_manifest(manifest)
parsed_run_results = soda_dbt.parse_run_results(run_results)
tests_with_test_result = map_dbt_run_result_to_test_result(
test_nodes, parsed_run_results
)
tests_with_test_result = map_dbt_run_result_to_test_result(test_nodes, parsed_run_results)
model_and_seed_nodes = {**model_nodes, **seed_nodes}
models_with_tests = soda_dbt.create_nodes_to_tests_mapping(
model_and_seed_nodes, test_nodes, parsed_run_results
Expand Down Expand Up @@ -151,9 +151,7 @@ def flush_test_results(
"""
for table, test_results in test_results_iterator:
test_results_jsons = [
test_result.to_dict()
for test_result in test_results
if not test_result.skipped
test_result.to_dict() for test_result in test_results if not test_result.skipped
]
if len(test_results_jsons) == 0:
continue
Expand Down Expand Up @@ -214,9 +212,7 @@ def ingest(
raise ValueError(f"Dbt manifest is required: {dbt_manifest}")
if dbt_run_results is None:
raise ValueError(f"Dbt run results is required: {dbt_run_results}")
test_results_iterator = map_dbt_test_results_iterator(
dbt_manifest, dbt_run_results
)
test_results_iterator = map_dbt_test_results_iterator(dbt_manifest, dbt_run_results)
else:
raise ValueError(f"Unknown tool: {tool}")

Expand Down
39 changes: 28 additions & 11 deletions core/sodasql/scan/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,42 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Optional, List
from jinja2 import Template

logger = logging.getLogger(__name__)


@dataclass
class Test:
id: str
title: str
expression: str
metrics: List[str]
column: Optional[str]
expression_delimiters = ['<=', '>=', '<', '>', '==']
expression_delimiters = ["<=", ">=", "<", ">", "=="]
source: str = field(default="")

def evaluate(self, test_variables: dict, group_values: Optional[dict] = None,
template_variables: Optional[dict] = None):
def evaluate(
self,
test_variables: dict,
group_values: Optional[dict] = None,
template_variables: Optional[dict] = None,
):
from sodasql.scan.test_result import TestResult

try:
values = {key: test_variables[key] for key in test_variables if key in self.metrics}
if template_variables is not None:
self.expression = Template(self.expression).render(template_variables)
if 'None' not in self.expression and any(v is None for v in values.values()):
logger.warning(f'Skipping test {self.expression} since corresponding metrics are None ({values}) ')
return TestResult(test=self, skipped=True, passed=True, values=values, group_values=group_values)
if "None" not in self.expression and any(v is None for v in values.values()):
logger.warning(
f"Skipping test {self.expression} since corresponding metrics are None ({values}) "
)
return TestResult(
test=self, skipped=True, passed=True, values=values, group_values=group_values
)
else:
passed = bool(eval(self.expression, test_variables))

Expand All @@ -43,16 +53,23 @@ def evaluate(self, test_variables: dict, group_values: Optional[dict] = None,
if delimiter in self.expression:
left, _, _ = self.expression.partition(delimiter)
# Make sure the expression result is the first key in the resulting dict.
expression_result = {'expression_result': eval(left, test_variables)}
expression_result = {"expression_result": eval(left, test_variables)}
expression_result.update(values)
values = expression_result

break

test_result = TestResult(test=self, passed=passed, skipped=False, values=values,
group_values=group_values)
test_result = TestResult(
test=self,
passed=passed,
skipped=False,
values=values,
group_values=group_values,
)
logger.debug(str(test_result))
return test_result
except Exception as e:
logger.error(f'Test error for "{self.expression}": {e}')
return TestResult(test=self, passed=False, skipped=False, error=e, group_values=group_values)
return TestResult(
test=self, passed=False, skipped=False, error=e, group_values=group_values
)
46 changes: 26 additions & 20 deletions core/sodasql/scan/test_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,43 +28,49 @@ class TestResult:

def __str__(self):
if self.passed:
status_str = 'passed'
status_str = "passed"
elif self.skipped:
status_str = 'skipped'
status_str = "skipped"
else:
status_str = 'failed'
return (f'Test {self.test.title} {status_str}' +
(f" with group values {self.group_values}" if self.group_values else '') +
f' with measurements {json.dumps(JsonHelper.to_jsonnable(self.values))}')
status_str = "failed"
return (
f"Test {self.test.title} {status_str}"
+ (f" with group values {self.group_values}" if self.group_values else "")
+ f" with measurements {json.dumps(JsonHelper.to_jsonnable(self.values))}"
)

def to_dict(self) -> dict:
if not self.test or not self.test.expression:
return {
'error': 'Invalid test result'
}
return {"error": "Invalid test result"}

test_result_json = {
'id': self.test.id,
'title': self.test.title,
'description': self.test.title, # for backwards compatibility
'expression': self.test.expression
"id": self.test.id,
"title": self.test.title,
"description": self.test.title, # for backwards compatibility
"expression": self.test.expression,
}

if self.test.column:
test_result_json['columnName'] = self.test.column
test_result_json["columnName"] = self.test.column

if self.test.source:
test_result_json["source"] = self.test.source
else:
test_result_json["source"] = "soda-sql"

if self.error:
test_result_json['error'] = str(self.error)
test_result_json["error"] = str(self.error)

else:
test_result_json['passed'] = self.passed
test_result_json['skipped'] = self.skipped
test_result_json['values'] = JsonHelper.to_jsonnable(self.values)
test_result_json["passed"] = self.passed
test_result_json["skipped"] = self.skipped
test_result_json["values"] = JsonHelper.to_jsonnable(self.values)

if self.group_values:
test_result_json['groupValues'] = JsonHelper.to_jsonnable(self.group_values)
test_result_json["groupValues"] = JsonHelper.to_jsonnable(self.group_values)

return test_result_json

@deprecated(version='2.1.0b19', reason='This function is deprecated, please use to_dict')
@deprecated(version="2.1.0b19", reason="This function is deprecated, please use to_dict")
def to_json(self):
return self.to_dict()
3 changes: 2 additions & 1 deletion dev-requirements.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pip-tools==5.5.0
pip-tools==6.4.0
pytest~=6.0.1
python-dotenv~=0.13.0
tox~=3.24.0
Expand All @@ -9,6 +9,7 @@ requests==2.26.0
twine~=3.4.2
Faker==8.1.2
tbump==6.3.1
click>=8.0,<9.0

urllib3==1.26.5
pygments==2.10.0
Expand Down
15 changes: 12 additions & 3 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ charset-normalizer==2.0.4
# via requests
cli-ui==0.10.3
# via tbump
click==7.1.2
# via pip-tools
click==8.0.3
# via
# -r dev-requirements.in
# pip-tools
colorama==0.4.4
# via
# cli-ui
Expand Down Expand Up @@ -57,7 +59,9 @@ packaging==20.9
# bleach
# pytest
# tox
pip-tools==5.5.0
pep517==0.12.0
# via pip-tools
pip-tools==6.4.0
# via -r dev-requirements.in
pkginfo==1.7.0
# via twine
Expand Down Expand Up @@ -125,6 +129,8 @@ toml==0.10.2
# via
# pytest
# tox
tomli==2.0.0
# via pep517
tomlkit==0.5.11
# via tbump
tox-docker==2.0.0
Expand All @@ -149,8 +155,11 @@ webencodings==0.5.1
# via bleach
websocket-client==0.57.0
# via docker
wheel==0.37.0
# via pip-tools
zipp==3.5.0
# via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# pip
# setuptools
12 changes: 6 additions & 6 deletions packages/dbt/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,24 @@
from setuptools import setup, find_namespace_packages

if sys.version_info < (3, 7):
print('Error: Soda SQL requires at least Python 3.7')
print('Error: Please upgrade your Python version to 3.7 or later')
print("Error: Soda SQL requires at least Python 3.7")
print("Error: Please upgrade your Python version to 3.7 or later")
sys.exit(1)

package_name = "soda-sql-dbt"
package_version = '2.1.0'
package_version = "2.1.0"
# TODO Add proper description
description = "Soda SQL DBT"

requires = [
f'soda-sql-core=={package_version}',
"dbt-core~=0.21.0",
f"soda-sql-core=={package_version}",
"dbt-core~=1.0.0",
]
# TODO Fix the params
# TODO Add a warning that installing core doesn't give any warehouse functionality
setup(
name=package_name,
version=package_version,
install_requires=requires,
packages=find_namespace_packages(include=["sodasql*"])
packages=find_namespace_packages(include=["sodasql*"]),
)
Loading

0 comments on commit ddb23ed

Please sign in to comment.