Skip to content

Commit

Permalink
Changed the columns models.
Browse files Browse the repository at this point in the history
'dbt_columns' now only contains columns that are documented in the dbt project.
'information_schema_columns' contains all the columns from all the schemas that your dbt project addresses.
  • Loading branch information
elongl committed Oct 9, 2023
1 parent ea690c4 commit c78d847
Show file tree
Hide file tree
Showing 12 changed files with 122 additions and 39 deletions.
1 change: 1 addition & 0 deletions integration_tests/dbt_project/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ models:
description: This is a unique identifier for a customer

- name: name
data_type: string
description: Customer's name.

- name: orders
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{% macro get_information_schema_columns_materialized() %}
{% if var("sync", false) %}
{% do return("table") %}
{% endif %}
{% do return(adapter.dispatch("get_information_schema_columns_materialized", "elementary")()) %}
{% endmacro %}


{% macro default__get_information_schema_columns_materialized() %}
{% do return("view") %}
{% endmacro %}


{% macro bigquery__get_information_schema_columns_materialized() %}
{% do return("incremental") %}
{% endmacro %}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% macro get_dbt_columns_query(is_model_build_context=true) %}
{% macro get_information_schema_columns_query(is_model_build_context=true) %}
{% if is_model_build_context %}
{% set get_relation = ref %}
{% else %}
Expand Down Expand Up @@ -51,7 +51,7 @@
{{ elementary.get_columns_in_project() }}
),

dbt_columns as (
information_schema_columns as (
select col_info.*
from tables_information tbl_info
join columns_information col_info
Expand All @@ -62,5 +62,5 @@
)

select *
from dbt_columns
from information_schema_columns
{% endmacro %}
7 changes: 0 additions & 7 deletions macros/edr/dbt_artifacts/dbt_columns/upload_dbt_columns.sql

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{% macro upload_information_schema_columns() %}
{% set relation = elementary.get_elementary_relation("information_schema_columns") %}
{% if execute and relation %}
{% set information_schema_columns_query = elementary.get_information_schema_columns_query(is_model_build_context=false) %}
{% do elementary.create_or_replace(false, relation, information_schema_columns_query) %}
{% endif %}
{% endmacro %}
8 changes: 5 additions & 3 deletions macros/edr/dbt_artifacts/upload_artifacts_to_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
{% set flatten_artifact_dicts = [] %}
{% do elementary.file_log("[{}] Flattening the artifacts.".format(table_relation.identifier)) %}
{% for artifact in artifacts %}
{% set flatten_artifact_dict = flatten_artifact_callback(artifact) %}
{% if flatten_artifact_dict is not none %}
{% do flatten_artifact_dicts.append(flatten_artifact_dict) %}
{% set flatten_artifact = flatten_artifact_callback(artifact) %}
{% if flatten_artifact is mapping %}
{% do flatten_artifact_dicts.append(flatten_artifact) %}
{% elif flatten_artifact is iterable %}
{% do flatten_artifact_dicts.extend(flatten_artifact) %}
{% endif %}
{% endfor %}
{% do elementary.file_log("[{}] Flattened {} artifacts.".format(table_relation.identifier, flatten_artifact_dicts | length)) %}
Expand Down
5 changes: 3 additions & 2 deletions macros/edr/dbt_artifacts/upload_dbt_artifacts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"dbt_metrics": elementary.upload_dbt_metrics,
"dbt_exposures": elementary.upload_dbt_exposures,
"dbt_seeds": elementary.upload_dbt_seeds,
"dbt_columns": elementary.upload_dbt_columns,
}
%}

Expand All @@ -26,8 +27,8 @@
{% do elementary.file_log('[{}] Artifacts already ran.'.format(artifacts_model)) %}
{% endif %}
{% endfor %}
{% if elementary.get_dbt_columns_materialized() != "view" and elementary.get_config_var("upload_dbt_columns") %}
{% do elementary.upload_dbt_columns() %} {# dbt_columns upload must come after other artifacts, as it is dependant on them #}
{% if elementary.get_information_schema_columns_materialized() != "view" and elementary.get_config_var("upload_information_schema_columns") %}
{% do elementary.upload_information_schema_columns() %} {# information_schema_columns upload must come after other artifacts, as it is dependant on them #}
{% endif %}
{% do elementary.file_log("Uploaded dbt artifacts.") %}
{% endif %}
Expand Down
67 changes: 67 additions & 0 deletions macros/edr/dbt_artifacts/upload_dbt_columns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{%- macro upload_dbt_columns(should_commit=false, metadata_hashes=none) -%}
{% set relation = elementary.get_elementary_relation('dbt_columns') %}
{% if execute and relation %}
{% set tables = graph.nodes.values() | list + graph.sources.values() | list %}
{% do elementary.upload_artifacts_to_table(relation, tables, elementary.flatten_table_columns, should_commit=should_commit, metadata_hashes=metadata_hashes) %}
{%- endif -%}
{{- return('') -}}
{%- endmacro -%}

{% macro get_dbt_columns_empty_table_query() %}
{% set columns = [
('unique_id', 'string'),
('parent_unique_id', 'string'),
('name', 'string'),
('data_type', 'string'),
('tags', 'long_string'),
('meta', 'long_string'),
('database_name', 'string'),
('schema_name', 'string'),
('table_name', 'string'),
('description', 'long_string'),
('generated_at', 'string'),
('metadata_hash', 'string'),
] %}
{% set dbt_columns_empty_table_query = elementary.empty_table(columns) %}
{{ return(dbt_columns_empty_table_query) }}
{% endmacro %}

{% macro flatten_table_columns(table_node) %}
{% set column_nodes = table_node.get("columns") %}
{% if not column_nodes %}
{% do return(none) %}
{% endif %}

{% set flattened_columns = [] %}
{% for column_node in column_nodes.values() %}
{% do flattened_columns.append(elementary.flatten_column(table_node, column_node)) %}
{% endfor %}
{% do return(flattened_columns) %}
{% endmacro %}

{% macro flatten_column(table_node, column_node) %}
{% set config_dict = elementary.safe_get_with_default(column_node, 'config', {}) %}
{% set config_meta_dict = elementary.safe_get_with_default(config_dict, 'meta', {}) %}
{% set meta_dict = elementary.safe_get_with_default(column_node, 'meta', {}) %}
{% do meta_dict.update(config_meta_dict) %}
{% set config_tags = elementary.safe_get_with_default(config_dict, 'tags', []) %}
{% set global_tags = elementary.safe_get_with_default(column_node, 'tags', []) %}
{% set meta_tags = elementary.safe_get_with_default(meta_dict, 'tags', []) %}
{% set tags = elementary.union_lists(config_tags, global_tags) %}
{% set tags = elementary.union_lists(tags, meta_tags) %}
{% set flatten_column_metadata_dict = {
'parent_unique_id': table_node.get('unique_id'),
'unique_id': "column.{}.{}".format(table_node.get('unique_id'), column_node.get('name')),
'name': column_node.get('name'),
'data_type': column_node.get('data_type'),
'tags': elementary.filter_none_and_sort(tags),
'meta': meta_dict,
'description': column_node.get('description'),
'database_name': table_node.get('database'),
'schema_name': table_node.get('schema'),
'table_name': table_node.get('alias'),
'generated_at': elementary.datetime_now_utc_as_string(),
} %}
{% do flatten_column_metadata_dict.update({"metadata_hash": elementary.get_artifact_metadata_hash(flatten_column_metadata_dict)}) %}
{% do return(flatten_column_metadata_dict) %}
{% endmacro %}
2 changes: 1 addition & 1 deletion macros/edr/system/system_utils/get_config_var.sql
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
'calculate_failed_count': true,
'tests_use_temp_tables': false,
'collect_metrics': true,
'upload_dbt_columns': false,
'upload_information_schema_columns': false,
'clean_elementary_temp_tables': true,
'force_metrics_backfill': false,
'ignore_small_changes': {
Expand Down
16 changes: 9 additions & 7 deletions models/edr/dbt_artifacts/dbt_columns.sql
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
{{
config(
materialized=elementary.get_dbt_columns_materialized(),
)
materialized='incremental',
transient=False,
post_hook='{{ elementary.upload_dbt_columns() }}',
unique_key='unique_id',
on_schema_change='sync_all_columns',
full_refresh=elementary.get_config_var('elementary_full_refresh')
)
}}
{% if elementary.get_dbt_columns_materialized() == "view" %}
{{ elementary.get_dbt_columns_query() }}
{% else %}
{{ elementary.get_empty_columns_from_information_schema_table() }}
{% endif %}

{{ elementary.get_dbt_columns_empty_table_query() }}
10 changes: 10 additions & 0 deletions models/edr/dbt_artifacts/information_schema_columns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{{
config(
materialized=elementary.get_information_schema_columns_materialized(),
)
}}
{% if elementary.get_information_schema_columns_materialized() == "view" %}
{{ elementary.get_information_schema_columns_query() }}
{% else %}
{{ elementary.get_empty_columns_from_information_schema_table() }}
{% endif %}

0 comments on commit c78d847

Please sign in to comment.