Create shredder job stats tables and views (#6115)

This commit is contained in:
Ben Wu 2024-08-27 18:52:34 +01:00 коммит произвёл GitHub
Родитель 124f75b45f
Коммит 119b5f08d1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
10 изменённых файлов: 174 добавлений и 0 удалений

Просмотреть файл

@ -145,6 +145,7 @@ dry_run:
- sql/moz-fx-data-shared-prod/search_derived/mobile_search_aggregates_v1/query.sql
- sql/moz-fx-data-shared-prod/monitoring_derived/telemetry_missing_columns_v1/view.sql
- sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/query.sql
- sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/query.sql
# No matching signature for function IF
- sql/moz-fx-data-shared-prod/static/fxa_amplitude_export_users_last_seen/query.sql
# Duplicate UDF

Просмотреть файл

@ -0,0 +1,4 @@
friendly_name: Shredder Per Job Stats
description: Runtime and compute stats for shredder jobs.
owners:
- bewu@mozilla.com

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.monitoring.shredder_per_job_stats`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.monitoring_derived.shredder_per_job_stats_v1`

Просмотреть файл

@ -0,0 +1,4 @@
friendly_name: Shredder Per Table Stats
description: Runtime and compute stats for shredder jobs, aggregated per target table.
owners:
- bewu@mozilla.com

Просмотреть файл

@ -0,0 +1,23 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.monitoring.shredder_per_table_stats`
AS
SELECT
shredder_run_date,
SPLIT(task_id, '$')[OFFSET(0)] AS table_id,
MIN(start_time) AS start_time,
MAX(end_time) AS end_time,
SUM(TIMESTAMP_DIFF(end_time, start_time, MINUTE)) / 60 AS run_time_hours,
SUM(total_slot_ms) / 1000 / GREATEST(
SUM(TIMESTAMP_DIFF(end_time, start_time, SECOND)),
1
) AS avg_slots,
SUM(tib_processed) AS tib_processed,
SUM(slot_hours) AS slot_hours,
COUNT(*) AS num_jobs,
FROM
`moz-fx-data-shared-prod.monitoring_derived.shredder_per_job_stats_v1`
GROUP BY
table_id,
shredder_run_date
ORDER BY
slot_hours DESC

Просмотреть файл

@ -0,0 +1,4 @@
friendly_name: Shredder Run Stats
description: Aggregated runtime and compute stats.
owners:
- bewu@mozilla.com

Просмотреть файл

@ -0,0 +1,30 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.monitoring.shredder_run_stats`
AS
SELECT
shredder_run_date,
MIN(start_time) AS start_time,
MAX(end_time) AS end_time,
TIMESTAMP_DIFF(MAX(end_time), MIN(start_time), HOUR) AS run_time_hours,
TIMESTAMP_DIFF(MAX(end_time), MIN(start_time), HOUR) / 24 AS run_time_days,
SUM(total_slot_ms) / 1000 / TIMESTAMP_DIFF(MAX(end_time), MIN(start_time), SECOND) AS avg_slots,
CASE
WHEN task_id LIKE 'moz-fx-data-shared-prod.telemetry_stable.main_v%'
THEN 'main'
WHEN task_id LIKE 'moz-fx-data-shared-prod.telemetry_stable.main_use_counter_v%'
THEN 'main_use_counters'
WHEN task_id LIKE 'moz-fx-data-experiments.%'
THEN 'experiments'
ELSE 'all'
END AS job_group,
SUM(tib_processed) AS tib_processed,
SUM(slot_hours) AS slot_hours,
COUNT(*) AS num_jobs,
FROM
`moz-fx-data-shared-prod.monitoring_derived.shredder_per_job_stats_v1`
GROUP BY
job_group,
shredder_run_date
ORDER BY
job_group,
shredder_run_date DESC

Просмотреть файл

@ -0,0 +1,15 @@
friendly_name: Shredder Per Job Stats
description: Runtime and compute stats for shredder jobs.
owners:
- bewu@mozilla.com
labels:
incremental: false
schedule: daily
scheduling:
dag_name: bqetl_monitoring
bigquery:
time_partitioning:
type: day
field: end_time
require_partition_filter: false
expiration_days: null

Просмотреть файл

@ -0,0 +1,43 @@
SELECT
SAFE.PARSE_DATE('%Y%m%d', SPLIT(task_id, '$')[SAFE_OFFSET(1)]) AS partition_date,
task_id,
shredder_state.job_id,
shredder_state.end_date AS shredder_run_date,
shredder_jobs.start_time,
shredder_jobs.end_time,
TIMESTAMP_DIFF(shredder_jobs.end_time, shredder_jobs.start_time, SECOND) / 60 AS run_time_minutes,
total_slot_ms / 1000 / TIMESTAMP_DIFF(
shredder_jobs.end_time,
shredder_jobs.start_time,
SECOND
) AS avg_slots,
total_bytes_processed,
total_bytes_processed / 1024 / 1024 / 1024 / 1024 AS tib_processed,
total_slot_ms,
total_slot_ms / 1000 / 60 / 60 AS slot_hours,
shredder_rows_deleted.deleted_row_count,
shredder_rows_deleted.partition_id,
FROM
`moz-fx-data-shredder.shredder_state.shredder_state` AS shredder_state
INNER JOIN
`moz-fx-data-shared-prod.monitoring_derived.jobs_by_organization_v1` AS shredder_jobs
ON (shredder_jobs.job_id = SPLIT(shredder_state.job_id, '.')[OFFSET(1)])
LEFT JOIN
`moz-fx-data-shared-prod.monitoring_derived.shredder_rows_deleted_v1` AS shredder_rows_deleted
ON (shredder_jobs.job_id = SPLIT(shredder_rows_deleted.job_id, '.')[OFFSET(1)])
WHERE
{% if is_init() %}
shredder_state.job_created >= "2024-01-22"
AND shredder_jobs.creation_time >= "2024-01-22"
AND shredder_jobs.end_time IS NOT NULL
{% else %}
DATE(shredder_state.job_created)
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND @submission_date
AND DATE(shredder_jobs.creation_time)
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND @submission_date
AND DATE(shredder_jobs.end_time) = @submission_date
{% endif %}
QUALIFY
ROW_NUMBER() OVER (PARTITION BY shredder_state.job_id) = 1

Просмотреть файл

@ -0,0 +1,43 @@
fields:
- name: partition_date
type: DATE
mode: NULLABLE
- name: task_id
type: STRING
mode: NULLABLE
- name: job_id
type: STRING
mode: NULLABLE
- name: shredder_run_date
type: DATE
mode: NULLABLE
- name: start_time
type: TIMESTAMP
mode: NULLABLE
- name: end_time
type: TIMESTAMP
mode: NULLABLE
- name: run_time_minutes
type: FLOAT
mode: NULLABLE
- name: avg_slots
type: FLOAT
mode: NULLABLE
- name: total_bytes_processed
type: INTEGER
mode: NULLABLE
- name: tib_processed
type: FLOAT
mode: NULLABLE
- name: total_slot_ms
type: INTEGER
mode: NULLABLE
- name: slot_hours
type: FLOAT
mode: NULLABLE
- name: deleted_row_count
type: INTEGER
mode: NULLABLE
- name: partition_id
type: STRING
mode: NULLABLE