Initial version of a mozregression aggregates dataset (#1760)
This commit is contained in:
Родитель
f08fb90d27
Коммит
d7e7503d86
19
dags.yaml
19
dags.yaml
|
@ -387,3 +387,22 @@ bqetl_desktop_platform:
|
|||
]
|
||||
retries: 2
|
||||
retry_delay: 30m
|
||||
|
||||
bqetl_internal_tooling:
|
||||
description: >
|
||||
This DAG schedules queries for populating queries related to Mozilla's
|
||||
internal developer tooling (e.g. mozregression).
|
||||
default_args:
|
||||
depends_on_past: false
|
||||
email:
|
||||
- wlachance@mozilla.com
|
||||
- telemetry-alerts@mozilla.com
|
||||
email_on_failure: true
|
||||
email_on_retry: true
|
||||
end_date: null
|
||||
owner: wlachance@mozilla.com
|
||||
retries: 2
|
||||
retry_delay: 30m
|
||||
start_date: '2020-06-01'
|
||||
schedule_interval: 0 4 * * *
|
||||
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
# Generated via https://github.com/mozilla/bigquery-etl/blob/master/bigquery_etl/query_scheduling/generate_airflow_dags.py
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.operators.sensors import ExternalTaskSensor
|
||||
import datetime
|
||||
from utils.gcp import bigquery_etl_query, gke_command
|
||||
|
||||
docs = """
|
||||
### bqetl_internal_tooling
|
||||
|
||||
Built from bigquery-etl repo, [`dags/bqetl_internal_tooling.py`](https://github.com/mozilla/bigquery-etl/blob/master/dags/bqetl_internal_tooling.py)
|
||||
|
||||
#### Description
|
||||
|
||||
This DAG schedules queries for populating queries related to Mozilla's internal developer tooling (e.g. mozregression).
|
||||
|
||||
#### Owner
|
||||
|
||||
wlachance@mozilla.com
|
||||
"""
|
||||
|
||||
|
||||
default_args = {
|
||||
"owner": "wlachance@mozilla.com",
|
||||
"start_date": datetime.datetime(2020, 6, 1, 0, 0),
|
||||
"end_date": None,
|
||||
"email": ["wlachance@mozilla.com", "telemetry-alerts@mozilla.com"],
|
||||
"depends_on_past": False,
|
||||
"retry_delay": datetime.timedelta(seconds=1800),
|
||||
"email_on_failure": True,
|
||||
"email_on_retry": True,
|
||||
"retries": 2,
|
||||
}
|
||||
|
||||
with DAG(
|
||||
"bqetl_internal_tooling",
|
||||
default_args=default_args,
|
||||
schedule_interval="0 4 * * *",
|
||||
doc_md=docs,
|
||||
) as dag:
|
||||
|
||||
mozregression_aggregates__v1 = bigquery_etl_query(
|
||||
task_id="mozregression_aggregates__v1",
|
||||
destination_table="mozregression_aggregates_v1",
|
||||
dataset_id="org_mozilla_mozregression_derived",
|
||||
project_id="moz-fx-data-shared-prod",
|
||||
owner="wlachance@mozilla.com",
|
||||
email=["telemetry-alerts@mozilla.com", "wlachance@mozilla.com"],
|
||||
date_partition_parameter="submission_date",
|
||||
depends_on_past=False,
|
||||
dag=dag,
|
||||
)
|
||||
|
||||
wait_for_copy_deduplicate_all = ExternalTaskSensor(
|
||||
task_id="wait_for_copy_deduplicate_all",
|
||||
external_dag_id="copy_deduplicate",
|
||||
external_task_id="copy_deduplicate_all",
|
||||
execution_delta=datetime.timedelta(seconds=10800),
|
||||
check_existence=True,
|
||||
mode="reschedule",
|
||||
pool="DATA_ENG_EXTERNALTASKSENSOR",
|
||||
)
|
||||
|
||||
mozregression_aggregates__v1.set_upstream(wait_for_copy_deduplicate_all)
|
|
@ -40,6 +40,21 @@ with DAG(
|
|||
) as dag:
|
||||
docker_image = "mozilla/bigquery-etl:latest"
|
||||
|
||||
export_public_data_json_mozregression_aggregates__v1 = GKEPodOperator(
|
||||
task_id="export_public_data_json_mozregression_aggregates__v1",
|
||||
name="export_public_data_json_mozregression_aggregates__v1",
|
||||
arguments=["script/publish_public_data_json"]
|
||||
+ [
|
||||
"--query_file=sql/moz-fx-data-shared-prod/org_mozilla_mozregression_derived/mozregression_aggregates_v1/query.sql"
|
||||
]
|
||||
+ ["--destination_table=mozregression_aggregates${{ds_nodash}}"]
|
||||
+ ["--dataset_id=org_mozilla_mozregression_derived"]
|
||||
+ ["--project_id=moz-fx-data-shared-prod"]
|
||||
+ ["--parameter=submission_date:DATE:{{ds}}"],
|
||||
image=docker_image,
|
||||
dag=dag,
|
||||
)
|
||||
|
||||
export_public_data_json_telemetry_derived__ssl_ratios__v1 = GKEPodOperator(
|
||||
task_id="export_public_data_json_telemetry_derived__ssl_ratios__v1",
|
||||
name="export_public_data_json_telemetry_derived__ssl_ratios__v1",
|
||||
|
@ -55,6 +70,19 @@ with DAG(
|
|||
dag=dag,
|
||||
)
|
||||
|
||||
wait_for_mozregression_aggregates__v1 = ExternalTaskSensor(
|
||||
task_id="wait_for_mozregression_aggregates__v1",
|
||||
external_dag_id="bqetl_internal_tooling",
|
||||
external_task_id="mozregression_aggregates__v1",
|
||||
check_existence=True,
|
||||
mode="reschedule",
|
||||
pool="DATA_ENG_EXTERNALTASKSENSOR",
|
||||
)
|
||||
|
||||
export_public_data_json_mozregression_aggregates__v1.set_upstream(
|
||||
wait_for_mozregression_aggregates__v1
|
||||
)
|
||||
|
||||
wait_for_telemetry_derived__ssl_ratios__v1 = ExternalTaskSensor(
|
||||
task_id="wait_for_telemetry_derived__ssl_ratios__v1",
|
||||
external_dag_id="bqetl_ssl_ratios",
|
||||
|
@ -78,6 +106,7 @@ with DAG(
|
|||
|
||||
public_data_gcs_metadata.set_upstream(
|
||||
[
|
||||
export_public_data_json_mozregression_aggregates__v1,
|
||||
export_public_data_json_telemetry_derived__ssl_ratios__v1,
|
||||
]
|
||||
)
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.org_mozilla_mozregression.mozregression_aggregates`
|
||||
AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.org_mozilla_mozregression_derived.mozregression_aggregates_v1`
|
|
@ -0,0 +1,26 @@
|
|||
CREATE OR REPLACE TABLE
|
||||
`moz-fx-data-shared-prod`.org_mozilla_mozregression_derived.mozregression_aggregates_v1
|
||||
PARTITION BY
|
||||
date
|
||||
AS
|
||||
SELECT
|
||||
DATE(submission_timestamp) AS date,
|
||||
client_info.app_display_version AS mozregression_version,
|
||||
metrics.string.usage_variant AS mozregression_variant,
|
||||
metrics.string.usage_app AS app_used,
|
||||
normalized_os AS os,
|
||||
mozfun.norm.truncate_version(normalized_os_version, "minor") AS os_version,
|
||||
count(DISTINCT(client_info.client_id)) AS distinct_clients,
|
||||
count(*) AS total_uses
|
||||
FROM
|
||||
`moz-fx-data-shared-prod`.org_mozilla_mozregression.usage
|
||||
WHERE
|
||||
client_info.app_display_version NOT LIKE '%.dev%'
|
||||
AND DATE(submission_timestamp) > '2020-04-01'
|
||||
GROUP BY
|
||||
date,
|
||||
mozregression_version,
|
||||
mozregression_variant,
|
||||
app_used,
|
||||
os,
|
||||
os_version;
|
|
@ -0,0 +1,13 @@
|
|||
description: Aggregated metrics of mozregression usage
|
||||
friendly_name: mozregression aggregates
|
||||
labels:
|
||||
incremental: true
|
||||
public_bigquery: true
|
||||
public_json: true
|
||||
review_bugs:
|
||||
- '1691105'
|
||||
owners:
|
||||
- wlachance@mozilla.com
|
||||
scheduling:
|
||||
dag_name: bqetl_internal_tooling
|
||||
task_name: mozregression_aggregates__v1
|
|
@ -0,0 +1,21 @@
|
|||
SELECT
|
||||
DATE(submission_timestamp) AS date,
|
||||
client_info.app_display_version AS mozregression_version,
|
||||
metrics.string.usage_variant AS mozregression_variant,
|
||||
metrics.string.usage_app AS app_used,
|
||||
normalized_os AS os,
|
||||
normalized_os_version AS os_version,
|
||||
count(DISTINCT(client_info.client_id)) AS distinct_clients,
|
||||
count(*) AS total_uses
|
||||
FROM
|
||||
`moz-fx-data-shared-prod`.org_mozilla_mozregression.usage
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
AND client_info.app_display_version NOT LIKE '%.dev%'
|
||||
GROUP BY
|
||||
date,
|
||||
mozregression_version,
|
||||
mozregression_variant,
|
||||
app_used,
|
||||
os,
|
||||
os_version;
|
Загрузка…
Ссылка в новой задаче