Skip to content

Commit

Permalink
Added config tables
Browse files Browse the repository at this point in the history
  • Loading branch information
Maayan-s authored and oravi committed Mar 3, 2022
1 parent 0d17787 commit 07ff04c
Show file tree
Hide file tree
Showing 24 changed files with 254 additions and 191 deletions.
2 changes: 2 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ models:

vars:
monitoring_configuration: MONITORING_CONFIGURATION
tables_config: EDC_METABASE_EXPRIMENTS.SCHEMA_CHANGES.TABLES_CONFIG
columns_config: EDC_METABASE_EXPRIMENTS.SCHEMA_CHANGES.COLUMNS_CONFIG
table_to_monitor: snowflake.account_usage.query_history
timestamp_field: end_time
column_to_monitor: total_elapsed_time
Expand Down
55 changes: 43 additions & 12 deletions macros/data_monitoring/metrics collection/collect_metrics.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,19 @@
{% endmacro %}


{% macro table_timeframe_metrics_query(monitored_table, timestamp_field, timeframe_start, timeframe_end, table_monitors, column_config) %}
{% macro table_one_timeframe_metrics_query(monitored_table, timestamp_field, timeframe_start, timeframe_end, timeframe_duration, table_monitors, column_config) %}

(
with timeframe_data as (

select *
from {{ monitored_table }}
where
{% if timestamp_field|length %}
{{ timestamp_field }} > {{ timeframe_start }} and {{ timestamp_field }} < {{ timeframe_end }}
{% else %}
true
{% endif %}

),

Expand All @@ -71,12 +75,14 @@
metrics_final as (

select
'{{ table_to_monitor }}' as table_name,
'{{ monitored_table }}' as table_name,
column_name,
metric_name,
metric_value,
{{ timeframe_start }} as timeframe_start,
{{ timeframe_end }} as timeframe_end
{{ timeframe_end }} as timeframe_end,
{{ timeframe_duration }} as timeframe_duration,
{{ run_start_column() }} as run_started_at
from
union_metrics

Expand All @@ -89,19 +95,44 @@



{% macro table_metrics_query(table_to_monitor, timestamp_field, days_back, timeframe_duration, table_monitors, column_config) %}
{% macro table_metrics_query(table_to_monitor, timestamp_field, days_back, timeframe_duration, table_monitors, column_config, should_backfill) %}

{%- set max_timeframe_end = "'" ~ max_timeframe_end(timeframe_duration) ~ "'" -%}

{%- for i in range(days_back) -%}
{%- set time_diff_end = -(i*timeframe_duration) -%}
{%- set time_diff_start = -((i+1)*timeframe_duration) -%}
{%- set timeframe_end = dbt_utils.dateadd('hour', time_diff_end , max_timeframe_end) -%}
{%- set timeframe_start = dbt_utils.dateadd('hour', time_diff_start , max_timeframe_end) -%}
{% if should_backfill and timestamp_field|length %}

{{- metrics_calc_query(table_to_monitor, timestamp_field, timeframe_start, timeframe_end) -}}
{%- if not loop.last %} union all {%- endif %}
{%- set timeframes = (days_back*24/timeframe_duration)|int %}
{%- for i in range(timeframes) -%}
{%- set time_diff_end = -(i*timeframe_duration) -%}
{%- set time_diff_start = -((i+1)*timeframe_duration) -%}
{%- set timeframe_end = dbt_utils.dateadd('hour', time_diff_end , max_timeframe_end) -%}
{%- set timeframe_start = dbt_utils.dateadd('hour', time_diff_start , max_timeframe_end) -%}

{%- endfor -%}
{{- table_one_timeframe_metrics_query(table_to_monitor, timestamp_field, timeframe_start, timeframe_end, timeframe_duration, table_monitors, column_config) -}}

{%- if not loop.last %} union all {%- endif %}

{%- endfor -%}

{% elif should_backfill is sameas false and timestamp_field|length %}

{%- set hours_back = timeframe_to_query(days_back) %}
{%- set timeframes = (hours_back/timeframe_duration)|int %}
{%- for i in range(timeframes) -%}
{%- set time_diff_end = -(i*timeframe_duration) -%}
{%- set time_diff_start = -((i+1)*timeframe_duration) -%}
{%- set timeframe_end = dbt_utils.dateadd('hour', time_diff_end , max_timeframe_end) -%}
{%- set timeframe_start = dbt_utils.dateadd('hour', time_diff_start , max_timeframe_end) -%}

{{- table_one_timeframe_metrics_query(table_to_monitor, timestamp_field, timeframe_start, timeframe_end, timeframe_duration, table_monitors, column_config) -}}
{%- if not loop.last %} union all {%- endif %}

{%- endfor -%}

{% else %}

{{- table_one_timeframe_metrics_query(table_to_monitor, timestamp_field, null, null, null, table_monitors, column_config) -}}

{% endif %}

{% endmacro %}
32 changes: 12 additions & 20 deletions macros/data_monitoring/metrics collection/timeframes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,7 @@
{% endmacro %}


-- for each metric+table or metric+column, i need to know when was the last time it was collected
-- so I could support configuration changes and gaps in runs.
-- do you have an idea for that?
-- could you pass me a 'new' flag for metric added to the config? maybe I can decide on what is new with the validated_config?

-- idea: let's re-run on table if something was added to it's config (easier than setting timeframe for each metric)

{% macro timeframe_for_query(days_back) %}
{% macro timeframe_to_query(days_back) %}

{%- set timeframe_end = "'"~ run_started_at.strftime("%Y-%m-%d %H:%M:%S") ~ "'" %}
{%- set days_subtract = '-' ~ days_back %}
Expand All @@ -41,21 +34,20 @@
with start_times as (
select run_started_at as last_run, {{ dbt_utils.dateadd('day', days_subtract, timeframe_end ) }} as start_limit
from {{ ref('elementary_runs')}}
order by run_started_at_timestamp desc
limit 2 offset 1
)
select case when start_limit > last_run then start_limit
order by run_started_at desc
limit 1 offset 1
),
start_time_limit as (
select case when start_limit > last_run then start_limit
else last_run end as start_time
from start_times
from start_times
)
select {{ dbt_utils.datediff('start_time', timeframe_end, 'hour') }} as timeframe_to_query
from start_time_limit
{%- endset -%}

{%- set timeframe_start_limit = result_column_to_list(query_start_time)[0] %}
{%- set timeframe_to_query = result_column_to_list(query_start_time)[0] %}

{% if timeframe_start_limit|length %}
{%- set timeframe_start = timeframe_start_limit %}
{% else %}
-- figure out this else
{%- set timeframe_start = 0%}
{% endif %}
{{ return(timeframe_to_query) }}

{% endmacro %}
12 changes: 12 additions & 0 deletions macros/system/configuration/delete_if_incremental.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{% macro delete_if_incremental(where_clause) %}

{% set query%}
delete from {{ this }}
where {{ where_clause }}
{% endset %}

{% if is_incremental() %}
{% do run_query(query) %}
{% endif %}

{% endmacro %}
3 changes: 1 addition & 2 deletions macros/system/configuration/get_configured_schemas.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
with monitoring_configuration as (

select {{ full_schema_name() }} as full_schema_name
from {{ get_configuration_path() }}
where alert_on_schema_changes = true
from {{ var('tables_config') }}
group by 1

)
Expand Down
5 changes: 3 additions & 2 deletions macros/utils/full_names.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{% macro full_table_name() -%}
upper(concat(database_name, '.', schema_name, '.', table_name))
{% macro full_table_name(alias) -%}
{% if alias is defined %}{%- set alias_dot = alias ~ '.' %}{% endif %}
upper(concat({{ alias_dot }}database_name, '.', {{ alias_dot }}schema_name, '.', {{ alias_dot }}table_name))
{%- endmacro %}

{% macro full_schema_name() -%}
Expand Down
9 changes: 9 additions & 0 deletions macros/utils/get_row_count.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{% macro get_row_count(full_table_name) %}
{% set query_row_count %}
select count(*) from {{ full_table_name }}
{% endset %}
{% if execute %}
{% set result = run_query(query_row_count).columns[0].values()[0] %}
{% endif %}
{{ return(result) }}
{% endmacro %}}
2 changes: 1 addition & 1 deletion models/data monitoring/table/recent_metrics.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
from
{{ ref('table_metrics') }}
where
timeframe_end = {{ max_timeframe_end() }}
timeframe_end = '{{ max_timeframe_end(24) }}'
2 changes: 1 addition & 1 deletion models/data monitoring/table/table_metrics.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
{% set timestamp_field = var('timestamp_field') %}
{% set days_back = var('days_back') %}

{{ metrics_calc(table_to_monitor, timestamp_field, days_back, 24) }}
{{ table_metrics_query(table_to_monitor, timestamp_field, days_back, 24, ['row_count'], [{'column_name': 'total_elapsed_time', 'monitors': ['min','max']}], true) }}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
}}

-- TODO_CONFIG update get schemas
{% set configured_schemas = ['ELEMENTARY_DB.STRESSTEST'] %}
{% set configured_schemas = get_configured_schemas() %}

with filtered_information_schema_columns as (

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
}}

-- TODO_CONFIG: change configured_schemas
{% set configured_schemas = ['ELEMENTARY_DB.STRESSTEST', 'ELEMENTARY_DB.TEST'] %}
{% set configured_schemas = get_configured_schemas() %}

with filtered_information_schema_tables as (

Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

with columns_snapshot as (

select * from {{ ref('columns_snapshot') }}
select * from {{ ref('schema_columns_snapshot') }}
),

this_run_time as (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

with tables_snapshot as (

select * from {{ ref('tables_snapshot') }}
select * from {{ ref('schema_tables_snapshot') }}
),

this_run_time as (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

with columns_snapshot as (

select * from {{ ref('columns_snapshot') }}
select * from {{ ref('schema_columns_snapshot') }}
),

previous_run_time as (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

with tables_snapshot as (

select * from {{ ref('tables_snapshot') }}
select * from {{ ref('schema_tables_snapshot') }}
),

previous_run_time as (
Expand Down
81 changes: 81 additions & 0 deletions models/system/configuration/edr_columns_config.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{{
config(
materialized = 'incremental',
unique_key = 'config_id'
)
}}


with columns_config as (

select * from {{ var('columns_config') }}

),

information_schema_columns as (

select * from {{ ref('information_schema_columns') }}

),

config_existing_columns as (

select
{{ dbt_utils.surrogate_key([
'config.database_name', 'config.schema_name', 'config.table_name', 'config.column_name', 'config.monitors',
]) }} as config_id,
{{ full_table_name('config') }} as full_table_name,
upper(config.database_name) as database_name,
upper(config.schema_name) as schema_name,
upper(config.table_name) as table_name,
upper(config.column_name) as column_name,
info_schema.data_type,
monitored,
monitors,
{{ run_start_column() }} as config_loaded_at
from
information_schema_columns as info_schema join columns_config as config
on (upper(info_schema.database_name) = upper(config.database_name)
and upper(info_schema.schema_name) = upper(config.schema_name)
and upper(info_schema.table_name) = upper(config.table_name)
and upper(info_schema.column_name) = upper(config.column_name))
),

final as (

select
config_id,
full_table_name,
database_name,
schema_name,
table_name,
column_name,
data_type,
monitored,
monitors,

{% if is_incremental() %}
{%- set active_configs_query %}
select config_id from {{ this }}
where config_loaded_at = (select max(config_loaded_at) from {{ this }})
and monitored = true
{% endset %}
{%- set active_configs = result_column_to_list(active_configs_query) %}

case when
config_id not in {{ strings_list_to_tuple(active_configs) }}
then true
else false end
as should_backfill,
{% else %}
true as should_backfill,
{% endif %}

max(config_loaded_at) as config_loaded_at

from config_existing_columns
group by 1,2,3,4,5,6,7,8,9,10
)

select *
from final
Loading

0 comments on commit 07ff04c

Please sign in to comment.