add query_id and username columns to bigquery_usage_v2 table - joinin… (#4203)
* add query_id and username columns to bigquery_usage_v2 table - joining JOBS_BY_ORG and JOBS_BY_PROJECT tables * change jobs_by_project query to use variable {project} instead of * adjust formatting * refactor query to use jobs_by_organization_derived table * remove data ops from codeowner of bigquery_table_usage_v2 * add marlene as a code owner to the bigquery_table_usage_v2 table * add marlene as owner to DAG for bq_table_usage_v2
This commit is contained in:
Родитель
e3208aeecc
Коммит
7040463033
|
@ -15,6 +15,5 @@
|
|||
# Contextual Services
|
||||
/sql/moz-fx-data-shared-prod/contextual_services_derived/request_payload_suggest_v2 @mozilla/request_payload_reviewers
|
||||
/sql/moz-fx-data-shared-prod/contextual_services_derived/request_payload_tiles_v2 @mozilla/request_payload_reviewers
|
||||
/sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_usage_v2 @mozilla/dataops
|
||||
/sql/moz-fx-data-shared-prod/contextual_services_derived/suggest_revenue_levers_daily_v1 @mozilla/revenue_forecasting_data_reviewers
|
||||
/sql/moz-fx-data-shared-prod/monitoring_derived/jobs_by_organization_v1 @mozilla/dataops
|
||||
|
|
|
@ -138,7 +138,7 @@ with DAG(
|
|||
+ ["--date", "{{ ds }}"],
|
||||
docker_image="gcr.io/moz-fx-data-airflow-prod-88e0/bigquery-etl:latest",
|
||||
owner="wichan@mozilla.com",
|
||||
email=["ascholtz@mozilla.com", "wichan@mozilla.com"],
|
||||
email=["ascholtz@mozilla.com", "mhirose@mozilla.com", "wichan@mozilla.com"],
|
||||
)
|
||||
|
||||
monitoring_derived__column_size__v1 = gke_command(
|
||||
|
|
|
@ -2,6 +2,7 @@ friendly_name: BigQuery Query Usage
|
|||
description: BigQuery usage, partitioned by day.
|
||||
owners:
|
||||
- wichan@mozilla.com
|
||||
- mhirose@mozilla.com
|
||||
labels:
|
||||
incremental: true
|
||||
schedule: daily
|
||||
|
|
|
@ -11,13 +11,6 @@ from argparse import ArgumentParser
|
|||
|
||||
from google.cloud import bigquery
|
||||
|
||||
DEFAULT_PROJECTS = [
|
||||
"mozdata",
|
||||
"moz-fx-data-shared-prod",
|
||||
"moz-fx-data-marketing-prod",
|
||||
"moz-fx-data-bq-data-science",
|
||||
]
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--date", required=True) # expect string with format yyyy-mm-dd
|
||||
parser.add_argument("--project", default="moz-fx-data-shared-prod")
|
||||
|
@ -28,42 +21,90 @@ parser.add_argument("--destination_table", default="bigquery_usage_v2")
|
|||
|
||||
def create_query(date, project):
|
||||
"""Create query with filter for source projects."""
|
||||
return f"""
|
||||
SELECT
|
||||
t1.project_id AS source_project,
|
||||
DATE('{date}') AS creation_date,
|
||||
return """
|
||||
WITH jobs_by_org AS (
|
||||
SELECT
|
||||
t1.project_id AS source_project,
|
||||
creation_date,
|
||||
job_id,
|
||||
job_type,
|
||||
reservation_id,
|
||||
cache_hit,
|
||||
state,
|
||||
statement_type,
|
||||
referenced_tables.project_id AS reference_project_id,
|
||||
referenced_tables.dataset_id AS reference_dataset_id,
|
||||
referenced_tables.table_id AS reference_table_id,
|
||||
destination_table.project_id AS destination_project_id,
|
||||
destination_table.dataset_id AS destination_dataset_id,
|
||||
destination_table.table_id AS destination_table_id,
|
||||
user_email,
|
||||
end_time-start_time as task_duration,
|
||||
ROUND(total_bytes_processed / 1024 / 1024 / 1024 / 1024, 4)
|
||||
AS total_terabytes_processed,
|
||||
ROUND(total_bytes_billed / 1024 / 1024 / 1024 / 1024, 4)
|
||||
AS total_terabytes_billed,
|
||||
total_slot_ms,
|
||||
error_result.location AS error_location,
|
||||
error_result.reason AS error_reason,
|
||||
error_result.message AS error_message,
|
||||
query_info_resource_warning AS resource_warning,
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.monitoring_derived.jobs_by_organization_v1` AS t1
|
||||
LEFT JOIN
|
||||
UNNEST(referenced_tables) AS referenced_tables
|
||||
),
|
||||
jobs_by_project AS (
|
||||
SELECT
|
||||
jp.project_id AS source_project,
|
||||
date(creation_time) as creation_date,
|
||||
job_id,
|
||||
referenced_tables.project_id AS reference_project_id,
|
||||
referenced_tables.dataset_id AS reference_dataset_id,
|
||||
referenced_tables.table_id AS reference_table_id,
|
||||
user_email,
|
||||
REGEXP_EXTRACT(query, r'Username: (.*?),') AS username,
|
||||
REGEXP_EXTRACT(query, r'Query ID: (\\w+),') AS query_id,
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT` jp
|
||||
LEFT JOIN
|
||||
UNNEST(referenced_tables) AS referenced_tables
|
||||
)
|
||||
SELECT
|
||||
jo.source_project,
|
||||
jo.creation_date,
|
||||
jo.job_id,
|
||||
jo.job_type,
|
||||
jo.reservation_id,
|
||||
jo.cache_hit,
|
||||
jo.state,
|
||||
jo.statement_type,
|
||||
jp.query_id,
|
||||
jo.reference_project_id,
|
||||
jo.reference_dataset_id,
|
||||
jo.reference_table_id,
|
||||
jo.destination_project_id,
|
||||
jo.destination_dataset_id,
|
||||
jo.destination_table_id,
|
||||
jo.user_email,
|
||||
jp.username,
|
||||
jo.task_duration,
|
||||
jo.total_terabytes_processed,
|
||||
jo.total_terabytes_billed,
|
||||
jo.total_slot_ms,
|
||||
jo.error_location,
|
||||
jo.error_reason,
|
||||
jo.error_message,
|
||||
jo.resource_warning
|
||||
FROM jobs_by_org jo
|
||||
LEFT JOIN jobs_by_project jp
|
||||
USING(source_project,
|
||||
creation_date,
|
||||
job_id,
|
||||
job_type,
|
||||
reservation_id,
|
||||
cache_hit,
|
||||
state,
|
||||
statement_type,
|
||||
referenced_tables.project_id AS reference_project_id,
|
||||
dataset_id AS reference_dataset_id,
|
||||
table_id AS reference_table_id,
|
||||
destination_table.project_id AS destination_project_id,
|
||||
destination_table.dataset_id AS destination_dataset_id,
|
||||
destination_table.table_id AS destination_table_id,
|
||||
user_email,
|
||||
end_time-start_time as task_duration,
|
||||
ROUND(total_bytes_processed / 1024 / 1024 / 1024 / 1024, 4)
|
||||
AS total_terabytes_processed,
|
||||
ROUND(total_bytes_billed / 1024 / 1024 / 1024 / 1024, 4)
|
||||
AS total_terabytes_billed,
|
||||
total_slot_ms,
|
||||
error_result.location AS error_location,
|
||||
error_result.reason AS error_reason,
|
||||
error_result.message AS error_message,
|
||||
query_info.resource_warning AS resource_warning,
|
||||
FROM
|
||||
`{project}.region-us.INFORMATION_SCHEMA.JOBS_BY_ORGANIZATION` AS t1
|
||||
LEFT JOIN
|
||||
UNNEST(referenced_tables) AS referenced_tables
|
||||
WHERE
|
||||
DATE(creation_time) = '{date}'
|
||||
AND (t1.project_id IN UNNEST({DEFAULT_PROJECTS})
|
||||
OR referenced_tables.project_id IN UNNEST({DEFAULT_PROJECTS})
|
||||
OR destination_table.project_id IN UNNEST({DEFAULT_PROJECTS}))
|
||||
reference_project_id,
|
||||
reference_dataset_id,
|
||||
reference_table_id)
|
||||
WHERE creation_date > '2023-03-01'
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
@ -40,6 +40,11 @@ fields:
|
|||
type: STRING
|
||||
description: The type of query statement
|
||||
|
||||
- mode: NULLABLE
|
||||
name: query_id
|
||||
type: STRING
|
||||
description: The id of the query
|
||||
|
||||
- mode: NULLABLE
|
||||
name: reference_project_id
|
||||
type: STRING
|
||||
|
@ -75,6 +80,11 @@ fields:
|
|||
type: STRING
|
||||
description: Email address or service account of the user who ran the job
|
||||
|
||||
- mode: NULLABLE
|
||||
name: username
|
||||
type: STRING
|
||||
description: The name of the user who ran the job
|
||||
|
||||
- mode: NULLABLE
|
||||
name: task_duration
|
||||
type: INTERVAL
|
||||
|
|
Загрузка…
Ссылка в новой задаче