Skip to content

Commit b49a984

Browse files
committed
Add fenix derived tables and views for health scorecards
This adds the following Glean Health Scorecard related views and derived tables for fenix: - `telemetry_health_glean_errors` - `telemetry_health_ping_latency` - `telemetry_health_ping_volume_p80` - `telemetry_health_sequence_holes`
1 parent b3d2745 commit b49a984

File tree

20 files changed

+389
-0
lines changed

20 files changed

+389
-0
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
friendly_name: Telemetry Health Glean Errors
2+
description: |-
3+
Counts the number of Glean metrics with recording errors that exceed 1% of clients per day.
4+
owners:
5+
6+
labels:
7+
owner: tlong
8+
require_column_descriptions: false
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE OR REPLACE VIEW
2+
`moz-fx-data-shared-prod.fenix.telemetry_health_glean_errors`
3+
AS
4+
SELECT
5+
*
6+
FROM
7+
`moz-fx-data-shared-prod.fenix_derived.telemetry_health_glean_errors_v1`
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
friendly_name: Telemetry Health Ping Latency
2+
description: |-
3+
Reports latency percentiles (p95, median) for collection-to-submission, submission-to-ingestion, and collection-to-ingestion for telemetry pings per day.
4+
owners:
5+
6+
labels:
7+
owner: tlong
8+
require_column_descriptions: false
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE OR REPLACE VIEW
2+
`moz-fx-data-shared-prod.fenix.telemetry_health_ping_latency`
3+
AS
4+
SELECT
5+
*
6+
FROM
7+
`moz-fx-data-shared-prod.fenix_derived.telemetry_health_ping_latency_v1`
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
friendly_name: Telemetry Health Ping Volume 80th Percentile
2+
description: |-
3+
Calculates the 80th percentile of ping volume per client per day.
4+
owners:
5+
6+
labels:
7+
owner: tlong
8+
require_column_descriptions: false
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE OR REPLACE VIEW
2+
`moz-fx-data-shared-prod.fenix.telemetry_health_ping_volume_p80`
3+
AS
4+
SELECT
5+
*
6+
FROM
7+
`moz-fx-data-shared-prod.fenix_derived.telemetry_health_ping_volume_p80_v1`
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
friendly_name: Telemetry Health Sequence Holes
2+
description: |-
3+
Counts the number of clients experiencing sequence holes in their Glean pings per day.
4+
owners:
5+
6+
labels:
7+
owner: tlong
8+
require_column_descriptions: false
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE OR REPLACE VIEW
2+
`moz-fx-data-shared-prod.fenix.telemetry_health_sequence_holes`
3+
AS
4+
SELECT
5+
*
6+
FROM
7+
`moz-fx-data-shared-prod.fenix_derived.telemetry_health_sequence_holes_v1`
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
friendly_name: Telemetry Health Glean Errors
2+
description: |-
3+
Counts the number of Glean metrics with recording errors that exceed 1% of clients per day.
4+
owners:
5+
6+
labels:
7+
incremental: true
8+
owner1: tlong
9+
scheduling:
10+
dag_name: bqetl_default
11+
bigquery:
12+
time_partitioning:
13+
type: day
14+
field: submission_date
15+
require_partition_filter: true
16+
expiration_days: 775
17+
range_partitioning: null
18+
require_column_descriptions: false
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
-- Query for telemetry health glean errors
2+
WITH sample AS (
3+
SELECT
4+
client_info.client_id AS client_id,
5+
normalized_channel,
6+
DATE(submission_timestamp) AS submission_date,
7+
metrics.labeled_counter.glean_error_invalid_value AS ev,
8+
metrics.labeled_counter.glean_error_invalid_label AS el,
9+
metrics.labeled_counter.glean_error_invalid_state AS es,
10+
metrics.labeled_counter.glean_error_invalid_overflow AS eo
11+
FROM
12+
`moz-fx-data-shared-prod.fenix.metrics`
13+
WHERE
14+
sample_id = 0
15+
AND submission_timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 DAY)
16+
),
17+
-- Denominator: distinct clients per app and day
18+
app_day_totals AS (
19+
SELECT
20+
submission_date,
21+
normalized_channel,
22+
COUNT(DISTINCT client_id) AS total_clients
23+
FROM
24+
sample
25+
GROUP BY
26+
submission_date,
27+
normalized_channel
28+
),
29+
-- Numerator per metric key: distinct clients with any error for that key on that day
30+
metric_clients_by_day AS (
31+
SELECT
32+
s.normalized_channel,
33+
s.submission_date,
34+
e.key AS metric_key,
35+
COUNT(DISTINCT s.client_id) AS clients_with_error
36+
FROM
37+
sample AS s
38+
JOIN
39+
UNNEST(ARRAY_CONCAT(IFNULL(ev, []), IFNULL(el, []), IFNULL(es, []), IFNULL(eo, []))) AS e
40+
WHERE
41+
NOT STARTS_WITH(e.key, 'glean')
42+
AND NOT STARTS_WITH(e.key, 'fog')
43+
AND e.value > 0
44+
GROUP BY
45+
s.submission_date,
46+
s.normalized_channel,
47+
metric_key
48+
)
49+
SELECT
50+
m.normalized_channel,
51+
m.submission_date,
52+
COUNTIF(SAFE_DIVIDE(m.clients_with_error, t.total_clients) > 0.01) AS num_metrics_over_1pct
53+
FROM
54+
metric_clients_by_day AS m
55+
JOIN
56+
app_day_totals AS t
57+
USING (submission_date, normalized_channel)
58+
GROUP BY
59+
m.submission_date,
60+
m.normalized_channel

0 commit comments

Comments
 (0)