Initial version of a mozregression aggregates dataset (#1760)
This commit is contained in:
Родитель
f08fb90d27
Коммит
d7e7503d86
19
dags.yaml
19
dags.yaml
|
@ -387,3 +387,22 @@ bqetl_desktop_platform:
|
||||||
]
|
]
|
||||||
retries: 2
|
retries: 2
|
||||||
retry_delay: 30m
|
retry_delay: 30m
|
||||||
|
|
||||||
|
bqetl_internal_tooling:
|
||||||
|
description: >
|
||||||
|
This DAG schedules queries for populating queries related to Mozilla's
|
||||||
|
internal developer tooling (e.g. mozregression).
|
||||||
|
default_args:
|
||||||
|
depends_on_past: false
|
||||||
|
email:
|
||||||
|
- wlachance@mozilla.com
|
||||||
|
- telemetry-alerts@mozilla.com
|
||||||
|
email_on_failure: true
|
||||||
|
email_on_retry: true
|
||||||
|
end_date: null
|
||||||
|
owner: wlachance@mozilla.com
|
||||||
|
retries: 2
|
||||||
|
retry_delay: 30m
|
||||||
|
start_date: '2020-06-01'
|
||||||
|
schedule_interval: 0 4 * * *
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
# Generated via https://github.com/mozilla/bigquery-etl/blob/master/bigquery_etl/query_scheduling/generate_airflow_dags.py
|
||||||
|
|
||||||
|
from airflow import DAG
|
||||||
|
from airflow.operators.sensors import ExternalTaskSensor
|
||||||
|
import datetime
|
||||||
|
from utils.gcp import bigquery_etl_query, gke_command
|
||||||
|
|
||||||
|
docs = """
|
||||||
|
### bqetl_internal_tooling
|
||||||
|
|
||||||
|
Built from bigquery-etl repo, [`dags/bqetl_internal_tooling.py`](https://github.com/mozilla/bigquery-etl/blob/master/dags/bqetl_internal_tooling.py)
|
||||||
|
|
||||||
|
#### Description
|
||||||
|
|
||||||
|
This DAG schedules queries for populating queries related to Mozilla's internal developer tooling (e.g. mozregression).
|
||||||
|
|
||||||
|
#### Owner
|
||||||
|
|
||||||
|
wlachance@mozilla.com
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
default_args = {
|
||||||
|
"owner": "wlachance@mozilla.com",
|
||||||
|
"start_date": datetime.datetime(2020, 6, 1, 0, 0),
|
||||||
|
"end_date": None,
|
||||||
|
"email": ["wlachance@mozilla.com", "telemetry-alerts@mozilla.com"],
|
||||||
|
"depends_on_past": False,
|
||||||
|
"retry_delay": datetime.timedelta(seconds=1800),
|
||||||
|
"email_on_failure": True,
|
||||||
|
"email_on_retry": True,
|
||||||
|
"retries": 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
with DAG(
|
||||||
|
"bqetl_internal_tooling",
|
||||||
|
default_args=default_args,
|
||||||
|
schedule_interval="0 4 * * *",
|
||||||
|
doc_md=docs,
|
||||||
|
) as dag:
|
||||||
|
|
||||||
|
mozregression_aggregates__v1 = bigquery_etl_query(
|
||||||
|
task_id="mozregression_aggregates__v1",
|
||||||
|
destination_table="mozregression_aggregates_v1",
|
||||||
|
dataset_id="org_mozilla_mozregression_derived",
|
||||||
|
project_id="moz-fx-data-shared-prod",
|
||||||
|
owner="wlachance@mozilla.com",
|
||||||
|
email=["telemetry-alerts@mozilla.com", "wlachance@mozilla.com"],
|
||||||
|
date_partition_parameter="submission_date",
|
||||||
|
depends_on_past=False,
|
||||||
|
dag=dag,
|
||||||
|
)
|
||||||
|
|
||||||
|
wait_for_copy_deduplicate_all = ExternalTaskSensor(
|
||||||
|
task_id="wait_for_copy_deduplicate_all",
|
||||||
|
external_dag_id="copy_deduplicate",
|
||||||
|
external_task_id="copy_deduplicate_all",
|
||||||
|
execution_delta=datetime.timedelta(seconds=10800),
|
||||||
|
check_existence=True,
|
||||||
|
mode="reschedule",
|
||||||
|
pool="DATA_ENG_EXTERNALTASKSENSOR",
|
||||||
|
)
|
||||||
|
|
||||||
|
mozregression_aggregates__v1.set_upstream(wait_for_copy_deduplicate_all)
|
|
@ -40,6 +40,21 @@ with DAG(
|
||||||
) as dag:
|
) as dag:
|
||||||
docker_image = "mozilla/bigquery-etl:latest"
|
docker_image = "mozilla/bigquery-etl:latest"
|
||||||
|
|
||||||
|
export_public_data_json_mozregression_aggregates__v1 = GKEPodOperator(
|
||||||
|
task_id="export_public_data_json_mozregression_aggregates__v1",
|
||||||
|
name="export_public_data_json_mozregression_aggregates__v1",
|
||||||
|
arguments=["script/publish_public_data_json"]
|
||||||
|
+ [
|
||||||
|
"--query_file=sql/moz-fx-data-shared-prod/org_mozilla_mozregression_derived/mozregression_aggregates_v1/query.sql"
|
||||||
|
]
|
||||||
|
+ ["--destination_table=mozregression_aggregates${{ds_nodash}}"]
|
||||||
|
+ ["--dataset_id=org_mozilla_mozregression_derived"]
|
||||||
|
+ ["--project_id=moz-fx-data-shared-prod"]
|
||||||
|
+ ["--parameter=submission_date:DATE:{{ds}}"],
|
||||||
|
image=docker_image,
|
||||||
|
dag=dag,
|
||||||
|
)
|
||||||
|
|
||||||
export_public_data_json_telemetry_derived__ssl_ratios__v1 = GKEPodOperator(
|
export_public_data_json_telemetry_derived__ssl_ratios__v1 = GKEPodOperator(
|
||||||
task_id="export_public_data_json_telemetry_derived__ssl_ratios__v1",
|
task_id="export_public_data_json_telemetry_derived__ssl_ratios__v1",
|
||||||
name="export_public_data_json_telemetry_derived__ssl_ratios__v1",
|
name="export_public_data_json_telemetry_derived__ssl_ratios__v1",
|
||||||
|
@ -55,6 +70,19 @@ with DAG(
|
||||||
dag=dag,
|
dag=dag,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
wait_for_mozregression_aggregates__v1 = ExternalTaskSensor(
|
||||||
|
task_id="wait_for_mozregression_aggregates__v1",
|
||||||
|
external_dag_id="bqetl_internal_tooling",
|
||||||
|
external_task_id="mozregression_aggregates__v1",
|
||||||
|
check_existence=True,
|
||||||
|
mode="reschedule",
|
||||||
|
pool="DATA_ENG_EXTERNALTASKSENSOR",
|
||||||
|
)
|
||||||
|
|
||||||
|
export_public_data_json_mozregression_aggregates__v1.set_upstream(
|
||||||
|
wait_for_mozregression_aggregates__v1
|
||||||
|
)
|
||||||
|
|
||||||
wait_for_telemetry_derived__ssl_ratios__v1 = ExternalTaskSensor(
|
wait_for_telemetry_derived__ssl_ratios__v1 = ExternalTaskSensor(
|
||||||
task_id="wait_for_telemetry_derived__ssl_ratios__v1",
|
task_id="wait_for_telemetry_derived__ssl_ratios__v1",
|
||||||
external_dag_id="bqetl_ssl_ratios",
|
external_dag_id="bqetl_ssl_ratios",
|
||||||
|
@ -78,6 +106,7 @@ with DAG(
|
||||||
|
|
||||||
public_data_gcs_metadata.set_upstream(
|
public_data_gcs_metadata.set_upstream(
|
||||||
[
|
[
|
||||||
|
export_public_data_json_mozregression_aggregates__v1,
|
||||||
export_public_data_json_telemetry_derived__ssl_ratios__v1,
|
export_public_data_json_telemetry_derived__ssl_ratios__v1,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
CREATE OR REPLACE VIEW
|
||||||
|
`moz-fx-data-shared-prod.org_mozilla_mozregression.mozregression_aggregates`
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
`moz-fx-data-shared-prod.org_mozilla_mozregression_derived.mozregression_aggregates_v1`
|
|
@ -0,0 +1,26 @@
|
||||||
|
CREATE OR REPLACE TABLE
|
||||||
|
`moz-fx-data-shared-prod`.org_mozilla_mozregression_derived.mozregression_aggregates_v1
|
||||||
|
PARTITION BY
|
||||||
|
date
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
DATE(submission_timestamp) AS date,
|
||||||
|
client_info.app_display_version AS mozregression_version,
|
||||||
|
metrics.string.usage_variant AS mozregression_variant,
|
||||||
|
metrics.string.usage_app AS app_used,
|
||||||
|
normalized_os AS os,
|
||||||
|
mozfun.norm.truncate_version(normalized_os_version, "minor") AS os_version,
|
||||||
|
count(DISTINCT(client_info.client_id)) AS distinct_clients,
|
||||||
|
count(*) AS total_uses
|
||||||
|
FROM
|
||||||
|
`moz-fx-data-shared-prod`.org_mozilla_mozregression.usage
|
||||||
|
WHERE
|
||||||
|
client_info.app_display_version NOT LIKE '%.dev%'
|
||||||
|
AND DATE(submission_timestamp) > '2020-04-01'
|
||||||
|
GROUP BY
|
||||||
|
date,
|
||||||
|
mozregression_version,
|
||||||
|
mozregression_variant,
|
||||||
|
app_used,
|
||||||
|
os,
|
||||||
|
os_version;
|
|
@ -0,0 +1,13 @@
|
||||||
|
description: Aggregated metrics of mozregression usage
|
||||||
|
friendly_name: mozregression aggregates
|
||||||
|
labels:
|
||||||
|
incremental: true
|
||||||
|
public_bigquery: true
|
||||||
|
public_json: true
|
||||||
|
review_bugs:
|
||||||
|
- '1691105'
|
||||||
|
owners:
|
||||||
|
- wlachance@mozilla.com
|
||||||
|
scheduling:
|
||||||
|
dag_name: bqetl_internal_tooling
|
||||||
|
task_name: mozregression_aggregates__v1
|
|
@ -0,0 +1,21 @@
|
||||||
|
SELECT
|
||||||
|
DATE(submission_timestamp) AS date,
|
||||||
|
client_info.app_display_version AS mozregression_version,
|
||||||
|
metrics.string.usage_variant AS mozregression_variant,
|
||||||
|
metrics.string.usage_app AS app_used,
|
||||||
|
normalized_os AS os,
|
||||||
|
normalized_os_version AS os_version,
|
||||||
|
count(DISTINCT(client_info.client_id)) AS distinct_clients,
|
||||||
|
count(*) AS total_uses
|
||||||
|
FROM
|
||||||
|
`moz-fx-data-shared-prod`.org_mozilla_mozregression.usage
|
||||||
|
WHERE
|
||||||
|
DATE(submission_timestamp) = @submission_date
|
||||||
|
AND client_info.app_display_version NOT LIKE '%.dev%'
|
||||||
|
GROUP BY
|
||||||
|
date,
|
||||||
|
mozregression_version,
|
||||||
|
mozregression_variant,
|
||||||
|
app_used,
|
||||||
|
os,
|
||||||
|
os_version;
|
Загрузка…
Ссылка в новой задаче