Create shredder job stats tables and views (#6115)
This commit is contained in:
Родитель
124f75b45f
Коммит
119b5f08d1
|
@ -145,6 +145,7 @@ dry_run:
|
|||
- sql/moz-fx-data-shared-prod/search_derived/mobile_search_aggregates_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/monitoring_derived/telemetry_missing_columns_v1/view.sql
|
||||
- sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/query.sql
|
||||
# No matching signature for function IF
|
||||
- sql/moz-fx-data-shared-prod/static/fxa_amplitude_export_users_last_seen/query.sql
|
||||
# Duplicate UDF
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
friendly_name: Shredder Per Job Stats
|
||||
description: Runtime and compute stats for shredder jobs.
|
||||
owners:
|
||||
- bewu@mozilla.com
|
|
@ -0,0 +1,7 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.monitoring.shredder_per_job_stats`
|
||||
AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.monitoring_derived.shredder_per_job_stats_v1`
|
|
@ -0,0 +1,4 @@
|
|||
friendly_name: Shredder Per Table Stats
|
||||
description: Runtime and compute stats for shredder jobs, aggregated per target table.
|
||||
owners:
|
||||
- bewu@mozilla.com
|
|
@ -0,0 +1,23 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.monitoring.shredder_per_table_stats`
|
||||
AS
|
||||
SELECT
|
||||
shredder_run_date,
|
||||
SPLIT(task_id, '$')[OFFSET(0)] AS table_id,
|
||||
MIN(start_time) AS start_time,
|
||||
MAX(end_time) AS end_time,
|
||||
SUM(TIMESTAMP_DIFF(end_time, start_time, MINUTE)) / 60 AS run_time_hours,
|
||||
SUM(total_slot_ms) / 1000 / GREATEST(
|
||||
SUM(TIMESTAMP_DIFF(end_time, start_time, SECOND)),
|
||||
1
|
||||
) AS avg_slots,
|
||||
SUM(tib_processed) AS tib_processed,
|
||||
SUM(slot_hours) AS slot_hours,
|
||||
COUNT(*) AS num_jobs,
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.monitoring_derived.shredder_per_job_stats_v1`
|
||||
GROUP BY
|
||||
table_id,
|
||||
shredder_run_date
|
||||
ORDER BY
|
||||
slot_hours DESC
|
|
@ -0,0 +1,4 @@
|
|||
friendly_name: Shredder Run Stats
|
||||
description: Aggregated runtime and compute stats.
|
||||
owners:
|
||||
- bewu@mozilla.com
|
|
@ -0,0 +1,30 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.monitoring.shredder_run_stats`
|
||||
AS
|
||||
SELECT
|
||||
shredder_run_date,
|
||||
MIN(start_time) AS start_time,
|
||||
MAX(end_time) AS end_time,
|
||||
TIMESTAMP_DIFF(MAX(end_time), MIN(start_time), HOUR) AS run_time_hours,
|
||||
TIMESTAMP_DIFF(MAX(end_time), MIN(start_time), HOUR) / 24 AS run_time_days,
|
||||
SUM(total_slot_ms) / 1000 / TIMESTAMP_DIFF(MAX(end_time), MIN(start_time), SECOND) AS avg_slots,
|
||||
CASE
|
||||
WHEN task_id LIKE 'moz-fx-data-shared-prod.telemetry_stable.main_v%'
|
||||
THEN 'main'
|
||||
WHEN task_id LIKE 'moz-fx-data-shared-prod.telemetry_stable.main_use_counter_v%'
|
||||
THEN 'main_use_counters'
|
||||
WHEN task_id LIKE 'moz-fx-data-experiments.%'
|
||||
THEN 'experiments'
|
||||
ELSE 'all'
|
||||
END AS job_group,
|
||||
SUM(tib_processed) AS tib_processed,
|
||||
SUM(slot_hours) AS slot_hours,
|
||||
COUNT(*) AS num_jobs,
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.monitoring_derived.shredder_per_job_stats_v1`
|
||||
GROUP BY
|
||||
job_group,
|
||||
shredder_run_date
|
||||
ORDER BY
|
||||
job_group,
|
||||
shredder_run_date DESC
|
|
@ -0,0 +1,15 @@
|
|||
friendly_name: Shredder Per Job Stats
|
||||
description: Runtime and compute stats for shredder jobs.
|
||||
owners:
|
||||
- bewu@mozilla.com
|
||||
labels:
|
||||
incremental: false
|
||||
schedule: daily
|
||||
scheduling:
|
||||
dag_name: bqetl_monitoring
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
field: end_time
|
||||
require_partition_filter: false
|
||||
expiration_days: null
|
|
@ -0,0 +1,43 @@
|
|||
SELECT
|
||||
SAFE.PARSE_DATE('%Y%m%d', SPLIT(task_id, '$')[SAFE_OFFSET(1)]) AS partition_date,
|
||||
task_id,
|
||||
shredder_state.job_id,
|
||||
shredder_state.end_date AS shredder_run_date,
|
||||
shredder_jobs.start_time,
|
||||
shredder_jobs.end_time,
|
||||
TIMESTAMP_DIFF(shredder_jobs.end_time, shredder_jobs.start_time, SECOND) / 60 AS run_time_minutes,
|
||||
total_slot_ms / 1000 / TIMESTAMP_DIFF(
|
||||
shredder_jobs.end_time,
|
||||
shredder_jobs.start_time,
|
||||
SECOND
|
||||
) AS avg_slots,
|
||||
total_bytes_processed,
|
||||
total_bytes_processed / 1024 / 1024 / 1024 / 1024 AS tib_processed,
|
||||
total_slot_ms,
|
||||
total_slot_ms / 1000 / 60 / 60 AS slot_hours,
|
||||
shredder_rows_deleted.deleted_row_count,
|
||||
shredder_rows_deleted.partition_id,
|
||||
FROM
|
||||
`moz-fx-data-shredder.shredder_state.shredder_state` AS shredder_state
|
||||
INNER JOIN
|
||||
`moz-fx-data-shared-prod.monitoring_derived.jobs_by_organization_v1` AS shredder_jobs
|
||||
ON (shredder_jobs.job_id = SPLIT(shredder_state.job_id, '.')[OFFSET(1)])
|
||||
LEFT JOIN
|
||||
`moz-fx-data-shared-prod.monitoring_derived.shredder_rows_deleted_v1` AS shredder_rows_deleted
|
||||
ON (shredder_jobs.job_id = SPLIT(shredder_rows_deleted.job_id, '.')[OFFSET(1)])
|
||||
WHERE
|
||||
{% if is_init() %}
|
||||
shredder_state.job_created >= "2024-01-22"
|
||||
AND shredder_jobs.creation_time >= "2024-01-22"
|
||||
AND shredder_jobs.end_time IS NOT NULL
|
||||
{% else %}
|
||||
DATE(shredder_state.job_created)
|
||||
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
AND @submission_date
|
||||
AND DATE(shredder_jobs.creation_time)
|
||||
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
AND @submission_date
|
||||
AND DATE(shredder_jobs.end_time) = @submission_date
|
||||
{% endif %}
|
||||
QUALIFY
|
||||
ROW_NUMBER() OVER (PARTITION BY shredder_state.job_id) = 1
|
|
@ -0,0 +1,43 @@
|
|||
fields:
|
||||
- name: partition_date
|
||||
type: DATE
|
||||
mode: NULLABLE
|
||||
- name: task_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: job_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: shredder_run_date
|
||||
type: DATE
|
||||
mode: NULLABLE
|
||||
- name: start_time
|
||||
type: TIMESTAMP
|
||||
mode: NULLABLE
|
||||
- name: end_time
|
||||
type: TIMESTAMP
|
||||
mode: NULLABLE
|
||||
- name: run_time_minutes
|
||||
type: FLOAT
|
||||
mode: NULLABLE
|
||||
- name: avg_slots
|
||||
type: FLOAT
|
||||
mode: NULLABLE
|
||||
- name: total_bytes_processed
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: tib_processed
|
||||
type: FLOAT
|
||||
mode: NULLABLE
|
||||
- name: total_slot_ms
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: slot_hours
|
||||
type: FLOAT
|
||||
mode: NULLABLE
|
||||
- name: deleted_row_count
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: partition_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
Загрузка…
Ссылка в новой задаче