Skip to content

Commit

Permalink
Merge pull request elementary-data#587 from elementary-data/ele-1860-…
Browse files Browse the repository at this point in the history
…get-columns-descriptions-to-the-oss-and-to-the-cloud

Changed the columns models.
  • Loading branch information
ellakz authored Oct 23, 2023
2 parents 9aab795 + 948237a commit 017066c
Show file tree
Hide file tree
Showing 14 changed files with 144 additions and 41 deletions.
2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: "elementary"
version: "0.11.2"
version: "0.12.0"

require-dbt-version: [">=1.0.0", "<2.0.0"]

Expand Down
1 change: 1 addition & 0 deletions integration_tests/dbt_project/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ models:
description: This is a unique identifier for a customer

- name: name
data_type: string
description: Customer's name.

- name: orders
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{% macro get_information_schema_columns_materialized() %}
{% if var("sync", false) %}
{% do return("table") %}
{% endif %}
{% do return("view") %}
{% endmacro %}

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% macro get_dbt_columns_query(is_model_build_context=true) %}
{% macro get_information_schema_columns_query(is_model_build_context=true) %}
{% if is_model_build_context %}
{% set get_relation = ref %}
{% else %}
Expand Down Expand Up @@ -51,7 +51,7 @@
{{ elementary.get_columns_in_project() }}
),

dbt_columns as (
information_schema_columns as (
select col_info.*
from tables_information tbl_info
join columns_information col_info
Expand All @@ -62,5 +62,5 @@
)

select *
from dbt_columns
from information_schema_columns
{% endmacro %}
7 changes: 0 additions & 7 deletions macros/edr/dbt_artifacts/dbt_columns/upload_dbt_columns.sql

This file was deleted.

8 changes: 5 additions & 3 deletions macros/edr/dbt_artifacts/upload_artifacts_to_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
{% set flatten_artifact_dicts = [] %}
{% do elementary.file_log("[{}] Flattening the artifacts.".format(table_relation.identifier)) %}
{% for artifact in artifacts %}
{% set flatten_artifact_dict = flatten_artifact_callback(artifact) %}
{% if flatten_artifact_dict is not none %}
{% do flatten_artifact_dicts.append(flatten_artifact_dict) %}
{% set flatten_artifact = flatten_artifact_callback(artifact) %}
{% if flatten_artifact is mapping %}
{% do flatten_artifact_dicts.append(flatten_artifact) %}
{% elif flatten_artifact is iterable %}
{% do flatten_artifact_dicts.extend(flatten_artifact) %}
{% endif %}
{% endfor %}
{% do elementary.file_log("[{}] Flattened {} artifacts.".format(table_relation.identifier, flatten_artifact_dicts | length)) %}
Expand Down
4 changes: 1 addition & 3 deletions macros/edr/dbt_artifacts/upload_dbt_artifacts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"dbt_metrics": elementary.upload_dbt_metrics,
"dbt_exposures": elementary.upload_dbt_exposures,
"dbt_seeds": elementary.upload_dbt_seeds,
"dbt_columns": elementary.upload_dbt_columns,
}
%}

Expand All @@ -26,9 +27,6 @@
{% do elementary.file_log('[{}] Artifacts already ran.'.format(artifacts_model)) %}
{% endif %}
{% endfor %}
{% if elementary.get_dbt_columns_materialized() != "view" and elementary.get_config_var("upload_dbt_columns") %}
{% do elementary.upload_dbt_columns() %} {# dbt_columns upload must come after other artifacts, as it is dependant on them #}
{% endif %}
{% do elementary.file_log("Uploaded dbt artifacts.") %}
{% endif %}
{% endmacro %}
Expand Down
69 changes: 69 additions & 0 deletions macros/edr/dbt_artifacts/upload_dbt_columns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{%- macro upload_dbt_columns(should_commit=false, metadata_hashes=none) -%}
{% set relation = elementary.get_elementary_relation('dbt_columns') %}
{% if execute and relation %}
{% set tables = graph.nodes.values() | list + graph.sources.values() | list %}
{% do elementary.upload_artifacts_to_table(relation, tables, elementary.flatten_table_columns, should_commit=should_commit, metadata_hashes=metadata_hashes) %}
{%- endif -%}
{{- return('') -}}
{%- endmacro -%}

{% macro get_dbt_columns_empty_table_query() %}
{% set columns = [
('unique_id', 'string'),
('parent_unique_id', 'string'),
('name', 'string'),
('data_type', 'string'),
('tags', 'long_string'),
('meta', 'long_string'),
('database_name', 'string'),
('schema_name', 'string'),
('table_name', 'string'),
('description', 'long_string'),
('resource_type', 'string'),
('generated_at', 'string'),
('metadata_hash', 'string'),
] %}
{% set dbt_columns_empty_table_query = elementary.empty_table(columns) %}
{{ return(dbt_columns_empty_table_query) }}
{% endmacro %}

{% macro flatten_table_columns(table_node) %}
{% set column_nodes = table_node.get("columns") %}
{% if not column_nodes %}
{% do return(none) %}
{% endif %}

{% set flattened_columns = [] %}
{% for column_node in column_nodes.values() %}
{% do flattened_columns.append(elementary.flatten_column(table_node, column_node)) %}
{% endfor %}
{% do return(flattened_columns) %}
{% endmacro %}

{% macro flatten_column(table_node, column_node) %}
{% set config_dict = elementary.safe_get_with_default(column_node, 'config', {}) %}
{% set config_meta_dict = elementary.safe_get_with_default(config_dict, 'meta', {}) %}
{% set meta_dict = elementary.safe_get_with_default(column_node, 'meta', {}) %}
{% do meta_dict.update(config_meta_dict) %}
{% set config_tags = elementary.safe_get_with_default(config_dict, 'tags', []) %}
{% set global_tags = elementary.safe_get_with_default(column_node, 'tags', []) %}
{% set meta_tags = elementary.safe_get_with_default(meta_dict, 'tags', []) %}
{% set tags = elementary.union_lists(config_tags, global_tags) %}
{% set tags = elementary.union_lists(tags, meta_tags) %}
{% set flatten_column_metadata_dict = {
'parent_unique_id': table_node.get('unique_id'),
'unique_id': "column.{}.{}".format(table_node.get('unique_id'), column_node.get('name')),
'name': column_node.get('name'),
'data_type': column_node.get('data_type'),
'tags': elementary.filter_none_and_sort(tags),
'meta': meta_dict,
'description': column_node.get('description') or none,
'database_name': table_node.get('database'),
'schema_name': table_node.get('schema'),
'table_name': table_node.get('alias'),
'resource_type': table_node.get('resource_type'),
'generated_at': elementary.datetime_now_utc_as_string(),
} %}
{% do flatten_column_metadata_dict.update({"metadata_hash": elementary.get_artifact_metadata_hash(flatten_column_metadata_dict)}) %}
{% do return(flatten_column_metadata_dict) %}
{% endmacro %}
1 change: 0 additions & 1 deletion macros/edr/system/system_utils/get_config_var.sql
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
'calculate_failed_count': true,
'tests_use_temp_tables': false,
'collect_metrics': true,
'upload_dbt_columns': false,
'clean_elementary_temp_tables': true,
'force_metrics_backfill': false,
'ignore_small_changes': {
Expand Down
1 change: 1 addition & 0 deletions models/edr/dbt_artifacts/dbt_artifacts_hashes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"dbt_metrics",
"dbt_exposures",
"dbt_seeds",
"dbt_columns",
] %}

{% for artifact_model in artifact_models %}
Expand Down
16 changes: 9 additions & 7 deletions models/edr/dbt_artifacts/dbt_columns.sql
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
{{
config(
materialized=elementary.get_dbt_columns_materialized(),
)
materialized='incremental',
transient=False,
post_hook='{{ elementary.upload_dbt_columns() }}',
unique_key='unique_id',
on_schema_change='sync_all_columns',
full_refresh=elementary.get_config_var('elementary_full_refresh')
)
}}
{% if elementary.get_dbt_columns_materialized() == "view" %}
{{ elementary.get_dbt_columns_query() }}
{% else %}
{{ elementary.get_empty_columns_from_information_schema_table() }}
{% endif %}

{{ elementary.get_dbt_columns_empty_table_query() }}
37 changes: 37 additions & 0 deletions models/edr/dbt_artifacts/enriched_columns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{{
config(
materialized = 'view',
bind=False
)
}}


with information_schema_columns as (
select
lower(database_name) as database_name,
lower(schema_name) as schema_name,
lower(table_name) as table_name,
lower(column_name) as name,
data_type
from {{ ref("information_schema_columns") }}
),

dbt_columns as (
select
lower(database_name) as database_name,
lower(schema_name) as schema_name,
lower(table_name) as table_name,
lower(name) as name,
description
from {{ ref("dbt_columns") }}
)

select
database_name,
schema_name,
table_name,
name,
data_type,
description
from information_schema_columns
left join dbt_columns using (database_name, schema_name, table_name, name)
10 changes: 10 additions & 0 deletions models/edr/dbt_artifacts/information_schema_columns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{{
config(
materialized=elementary.get_information_schema_columns_materialized(),
)
}}
{% if elementary.get_information_schema_columns_materialized() == "view" %}
{{ elementary.get_information_schema_columns_query() }}
{% else %}
{{ elementary.get_empty_columns_from_information_schema_table() }}
{% endif %}

0 comments on commit 017066c

Please sign in to comment.