diff --git a/bqetl_project.yaml b/bqetl_project.yaml index f392da74f5..74454cf403 100644 --- a/bqetl_project.yaml +++ b/bqetl_project.yaml @@ -145,6 +145,7 @@ dry_run: - sql/moz-fx-data-shared-prod/search_derived/mobile_search_aggregates_v1/query.sql - sql/moz-fx-data-shared-prod/monitoring_derived/telemetry_missing_columns_v1/view.sql - sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/query.sql + - sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/query.sql # No matching signature for function IF - sql/moz-fx-data-shared-prod/static/fxa_amplitude_export_users_last_seen/query.sql # Duplicate UDF diff --git a/sql/moz-fx-data-shared-prod/monitoring/shredder_per_job_stats/metadata.yaml b/sql/moz-fx-data-shared-prod/monitoring/shredder_per_job_stats/metadata.yaml new file mode 100644 index 0000000000..6a5ea7d18d --- /dev/null +++ b/sql/moz-fx-data-shared-prod/monitoring/shredder_per_job_stats/metadata.yaml @@ -0,0 +1,4 @@ +friendly_name: Shredder Per Job Stats +description: Runtime and compute stats for shredder jobs. +owners: + - bewu@mozilla.com diff --git a/sql/moz-fx-data-shared-prod/monitoring/shredder_per_job_stats/view.sql b/sql/moz-fx-data-shared-prod/monitoring/shredder_per_job_stats/view.sql new file mode 100644 index 0000000000..b8b2482fd3 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/monitoring/shredder_per_job_stats/view.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.monitoring.shredder_per_job_stats` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.monitoring_derived.shredder_per_job_stats_v1` diff --git a/sql/moz-fx-data-shared-prod/monitoring/shredder_per_table_stats/metadata.yaml b/sql/moz-fx-data-shared-prod/monitoring/shredder_per_table_stats/metadata.yaml new file mode 100644 index 0000000000..cdb526b5f5 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/monitoring/shredder_per_table_stats/metadata.yaml @@ -0,0 +1,4 @@ +friendly_name: Shredder Per Table Stats +description: Runtime and compute stats for shredder jobs, aggregated per target table. +owners: + - bewu@mozilla.com diff --git a/sql/moz-fx-data-shared-prod/monitoring/shredder_per_table_stats/view.sql b/sql/moz-fx-data-shared-prod/monitoring/shredder_per_table_stats/view.sql new file mode 100644 index 0000000000..3f1c1d8eea --- /dev/null +++ b/sql/moz-fx-data-shared-prod/monitoring/shredder_per_table_stats/view.sql @@ -0,0 +1,23 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.monitoring.shredder_per_table_stats` +AS +SELECT + shredder_run_date, + SPLIT(task_id, '$')[OFFSET(0)] AS table_id, + MIN(start_time) AS start_time, + MAX(end_time) AS end_time, + SUM(TIMESTAMP_DIFF(end_time, start_time, MINUTE)) / 60 AS run_time_hours, + SUM(total_slot_ms) / 1000 / GREATEST( + SUM(TIMESTAMP_DIFF(end_time, start_time, SECOND)), + 1 + ) AS avg_slots, + SUM(tib_processed) AS tib_processed, + SUM(slot_hours) AS slot_hours, + COUNT(*) AS num_jobs, +FROM + `moz-fx-data-shared-prod.monitoring_derived.shredder_per_job_stats_v1` +GROUP BY + table_id, + shredder_run_date +ORDER BY + slot_hours DESC diff --git a/sql/moz-fx-data-shared-prod/monitoring/shredder_run_stats/metadata.yaml b/sql/moz-fx-data-shared-prod/monitoring/shredder_run_stats/metadata.yaml new file mode 100644 index 0000000000..7cc1c05a5c --- /dev/null +++ b/sql/moz-fx-data-shared-prod/monitoring/shredder_run_stats/metadata.yaml @@ -0,0 +1,4 @@ +friendly_name: Shredder Run Stats +description: Aggregated runtime and compute stats. +owners: + - bewu@mozilla.com diff --git a/sql/moz-fx-data-shared-prod/monitoring/shredder_run_stats/view.sql b/sql/moz-fx-data-shared-prod/monitoring/shredder_run_stats/view.sql new file mode 100644 index 0000000000..66b43f4b7e --- /dev/null +++ b/sql/moz-fx-data-shared-prod/monitoring/shredder_run_stats/view.sql @@ -0,0 +1,30 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.monitoring.shredder_run_stats` +AS +SELECT + shredder_run_date, + MIN(start_time) AS start_time, + MAX(end_time) AS end_time, + TIMESTAMP_DIFF(MAX(end_time), MIN(start_time), HOUR) AS run_time_hours, + TIMESTAMP_DIFF(MAX(end_time), MIN(start_time), HOUR) / 24 AS run_time_days, + SUM(total_slot_ms) / 1000 / TIMESTAMP_DIFF(MAX(end_time), MIN(start_time), SECOND) AS avg_slots, + CASE + WHEN task_id LIKE 'moz-fx-data-shared-prod.telemetry_stable.main_v%' + THEN 'main' + WHEN task_id LIKE 'moz-fx-data-shared-prod.telemetry_stable.main_use_counter_v%' + THEN 'main_use_counters' + WHEN task_id LIKE 'moz-fx-data-experiments.%' + THEN 'experiments' + ELSE 'all' + END AS job_group, + SUM(tib_processed) AS tib_processed, + SUM(slot_hours) AS slot_hours, + COUNT(*) AS num_jobs, +FROM + `moz-fx-data-shared-prod.monitoring_derived.shredder_per_job_stats_v1` +GROUP BY + job_group, + shredder_run_date +ORDER BY + job_group, + shredder_run_date DESC diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/metadata.yaml new file mode 100644 index 0000000000..27b72bc9e3 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/metadata.yaml @@ -0,0 +1,15 @@ +friendly_name: Shredder Per Job Stats +description: Runtime and compute stats for shredder jobs. +owners: + - bewu@mozilla.com +labels: + incremental: false + schedule: daily +scheduling: + dag_name: bqetl_monitoring +bigquery: + time_partitioning: + type: day + field: end_time + require_partition_filter: false + expiration_days: null diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/query.sql b/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/query.sql new file mode 100644 index 0000000000..fb55500285 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/query.sql @@ -0,0 +1,43 @@ +SELECT + SAFE.PARSE_DATE('%Y%m%d', SPLIT(task_id, '$')[SAFE_OFFSET(1)]) AS partition_date, + task_id, + shredder_state.job_id, + shredder_state.end_date AS shredder_run_date, + shredder_jobs.start_time, + shredder_jobs.end_time, + TIMESTAMP_DIFF(shredder_jobs.end_time, shredder_jobs.start_time, SECOND) / 60 AS run_time_minutes, + total_slot_ms / 1000 / TIMESTAMP_DIFF( + shredder_jobs.end_time, + shredder_jobs.start_time, + SECOND + ) AS avg_slots, + total_bytes_processed, + total_bytes_processed / 1024 / 1024 / 1024 / 1024 AS tib_processed, + total_slot_ms, + total_slot_ms / 1000 / 60 / 60 AS slot_hours, + shredder_rows_deleted.deleted_row_count, + shredder_rows_deleted.partition_id, +FROM + `moz-fx-data-shredder.shredder_state.shredder_state` AS shredder_state +INNER JOIN + `moz-fx-data-shared-prod.monitoring_derived.jobs_by_organization_v1` AS shredder_jobs + ON (shredder_jobs.job_id = SPLIT(shredder_state.job_id, '.')[OFFSET(1)]) +LEFT JOIN + `moz-fx-data-shared-prod.monitoring_derived.shredder_rows_deleted_v1` AS shredder_rows_deleted + ON (shredder_jobs.job_id = SPLIT(shredder_rows_deleted.job_id, '.')[OFFSET(1)]) +WHERE + {% if is_init() %} + shredder_state.job_created >= "2024-01-22" + AND shredder_jobs.creation_time >= "2024-01-22" + AND shredder_jobs.end_time IS NOT NULL + {% else %} + DATE(shredder_state.job_created) + BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY) + AND @submission_date + AND DATE(shredder_jobs.creation_time) + BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY) + AND @submission_date + AND DATE(shredder_jobs.end_time) = @submission_date + {% endif %} +QUALIFY + ROW_NUMBER() OVER (PARTITION BY shredder_state.job_id) = 1 diff --git a/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/schema.yaml b/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/schema.yaml new file mode 100644 index 0000000000..8cf8a37b01 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/monitoring_derived/shredder_per_job_stats_v1/schema.yaml @@ -0,0 +1,43 @@ +fields: +- name: partition_date + type: DATE + mode: NULLABLE +- name: task_id + type: STRING + mode: NULLABLE +- name: job_id + type: STRING + mode: NULLABLE +- name: shredder_run_date + type: DATE + mode: NULLABLE +- name: start_time + type: TIMESTAMP + mode: NULLABLE +- name: end_time + type: TIMESTAMP + mode: NULLABLE +- name: run_time_minutes + type: FLOAT + mode: NULLABLE +- name: avg_slots + type: FLOAT + mode: NULLABLE +- name: total_bytes_processed + type: INTEGER + mode: NULLABLE +- name: tib_processed + type: FLOAT + mode: NULLABLE +- name: total_slot_ms + type: INTEGER + mode: NULLABLE +- name: slot_hours + type: FLOAT + mode: NULLABLE +- name: deleted_row_count + type: INTEGER + mode: NULLABLE +- name: partition_id + type: STRING + mode: NULLABLE