Initial version of a mozregression aggregates dataset (#1760)

This commit is contained in:
William Lachance 2021-02-11 09:43:34 -05:00 коммит произвёл GitHub
Родитель f08fb90d27
Коммит d7e7503d86
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 179 добавлений и 0 удалений

Просмотреть файл

@ -387,3 +387,22 @@ bqetl_desktop_platform:
]
retries: 2
retry_delay: 30m
bqetl_internal_tooling:
description: >
This DAG schedules queries for populating queries related to Mozilla's
internal developer tooling (e.g. mozregression).
default_args:
depends_on_past: false
email:
- wlachance@mozilla.com
- telemetry-alerts@mozilla.com
email_on_failure: true
email_on_retry: true
end_date: null
owner: wlachance@mozilla.com
retries: 2
retry_delay: 30m
start_date: '2020-06-01'
schedule_interval: 0 4 * * *

Просмотреть файл

@ -0,0 +1,64 @@
# Generated via https://github.com/mozilla/bigquery-etl/blob/master/bigquery_etl/query_scheduling/generate_airflow_dags.py
from airflow import DAG
from airflow.operators.sensors import ExternalTaskSensor
import datetime
from utils.gcp import bigquery_etl_query, gke_command
docs = """
### bqetl_internal_tooling
Built from bigquery-etl repo, [`dags/bqetl_internal_tooling.py`](https://github.com/mozilla/bigquery-etl/blob/master/dags/bqetl_internal_tooling.py)
#### Description
This DAG schedules queries for populating queries related to Mozilla's internal developer tooling (e.g. mozregression).
#### Owner
wlachance@mozilla.com
"""
default_args = {
"owner": "wlachance@mozilla.com",
"start_date": datetime.datetime(2020, 6, 1, 0, 0),
"end_date": None,
"email": ["wlachance@mozilla.com", "telemetry-alerts@mozilla.com"],
"depends_on_past": False,
"retry_delay": datetime.timedelta(seconds=1800),
"email_on_failure": True,
"email_on_retry": True,
"retries": 2,
}
with DAG(
"bqetl_internal_tooling",
default_args=default_args,
schedule_interval="0 4 * * *",
doc_md=docs,
) as dag:
mozregression_aggregates__v1 = bigquery_etl_query(
task_id="mozregression_aggregates__v1",
destination_table="mozregression_aggregates_v1",
dataset_id="org_mozilla_mozregression_derived",
project_id="moz-fx-data-shared-prod",
owner="wlachance@mozilla.com",
email=["telemetry-alerts@mozilla.com", "wlachance@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
wait_for_copy_deduplicate_all = ExternalTaskSensor(
task_id="wait_for_copy_deduplicate_all",
external_dag_id="copy_deduplicate",
external_task_id="copy_deduplicate_all",
execution_delta=datetime.timedelta(seconds=10800),
check_existence=True,
mode="reschedule",
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
mozregression_aggregates__v1.set_upstream(wait_for_copy_deduplicate_all)

Просмотреть файл

@ -40,6 +40,21 @@ with DAG(
) as dag:
docker_image = "mozilla/bigquery-etl:latest"
export_public_data_json_mozregression_aggregates__v1 = GKEPodOperator(
task_id="export_public_data_json_mozregression_aggregates__v1",
name="export_public_data_json_mozregression_aggregates__v1",
arguments=["script/publish_public_data_json"]
+ [
"--query_file=sql/moz-fx-data-shared-prod/org_mozilla_mozregression_derived/mozregression_aggregates_v1/query.sql"
]
+ ["--destination_table=mozregression_aggregates${{ds_nodash}}"]
+ ["--dataset_id=org_mozilla_mozregression_derived"]
+ ["--project_id=moz-fx-data-shared-prod"]
+ ["--parameter=submission_date:DATE:{{ds}}"],
image=docker_image,
dag=dag,
)
export_public_data_json_telemetry_derived__ssl_ratios__v1 = GKEPodOperator(
task_id="export_public_data_json_telemetry_derived__ssl_ratios__v1",
name="export_public_data_json_telemetry_derived__ssl_ratios__v1",
@ -55,6 +70,19 @@ with DAG(
dag=dag,
)
wait_for_mozregression_aggregates__v1 = ExternalTaskSensor(
task_id="wait_for_mozregression_aggregates__v1",
external_dag_id="bqetl_internal_tooling",
external_task_id="mozregression_aggregates__v1",
check_existence=True,
mode="reschedule",
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
export_public_data_json_mozregression_aggregates__v1.set_upstream(
wait_for_mozregression_aggregates__v1
)
wait_for_telemetry_derived__ssl_ratios__v1 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__ssl_ratios__v1",
external_dag_id="bqetl_ssl_ratios",
@ -78,6 +106,7 @@ with DAG(
public_data_gcs_metadata.set_upstream(
[
export_public_data_json_mozregression_aggregates__v1,
export_public_data_json_telemetry_derived__ssl_ratios__v1,
]
)

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.org_mozilla_mozregression.mozregression_aggregates`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.org_mozilla_mozregression_derived.mozregression_aggregates_v1`

Просмотреть файл

@ -0,0 +1,26 @@
CREATE OR REPLACE TABLE
`moz-fx-data-shared-prod`.org_mozilla_mozregression_derived.mozregression_aggregates_v1
PARTITION BY
date
AS
SELECT
DATE(submission_timestamp) AS date,
client_info.app_display_version AS mozregression_version,
metrics.string.usage_variant AS mozregression_variant,
metrics.string.usage_app AS app_used,
normalized_os AS os,
mozfun.norm.truncate_version(normalized_os_version, "minor") AS os_version,
count(DISTINCT(client_info.client_id)) AS distinct_clients,
count(*) AS total_uses
FROM
`moz-fx-data-shared-prod`.org_mozilla_mozregression.usage
WHERE
client_info.app_display_version NOT LIKE '%.dev%'
AND DATE(submission_timestamp) > '2020-04-01'
GROUP BY
date,
mozregression_version,
mozregression_variant,
app_used,
os,
os_version;

Просмотреть файл

@ -0,0 +1,13 @@
description: Aggregated metrics of mozregression usage
friendly_name: mozregression aggregates
labels:
incremental: true
public_bigquery: true
public_json: true
review_bugs:
- '1691105'
owners:
- wlachance@mozilla.com
scheduling:
dag_name: bqetl_internal_tooling
task_name: mozregression_aggregates__v1

Просмотреть файл

@ -0,0 +1,21 @@
SELECT
DATE(submission_timestamp) AS date,
client_info.app_display_version AS mozregression_version,
metrics.string.usage_variant AS mozregression_variant,
metrics.string.usage_app AS app_used,
normalized_os AS os,
normalized_os_version AS os_version,
count(DISTINCT(client_info.client_id)) AS distinct_clients,
count(*) AS total_uses
FROM
`moz-fx-data-shared-prod`.org_mozilla_mozregression.usage
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_display_version NOT LIKE '%.dev%'
GROUP BY
date,
mozregression_version,
mozregression_variant,
app_used,
os,
os_version;