From 5cbdfa0dcc28df341e0b25ee76ca2e20adf42746 Mon Sep 17 00:00:00 2001 From: Jeff Klukas Date: Wed, 3 Jun 2020 09:44:02 -0400 Subject: [PATCH] Add bqetl_amo_stats DAG --- dags.yaml | 9 ++ dags/bqetl_amo_stats.py | 84 +++++++++++++++++++ sql/amo_dev/amo_stats_dau_v1/metadata.yaml | 11 +++ .../amo_stats_installs_v1/metadata.yaml | 11 +++ sql/amo_prod/amo_stats_dau_v1/metadata.yaml | 18 ++++ .../amo_stats_installs_v1/metadata.yaml | 21 +++++ 6 files changed, 154 insertions(+) create mode 100644 dags/bqetl_amo_stats.py create mode 100644 sql/amo_dev/amo_stats_dau_v1/metadata.yaml create mode 100644 sql/amo_dev/amo_stats_installs_v1/metadata.yaml create mode 100644 sql/amo_prod/amo_stats_dau_v1/metadata.yaml create mode 100644 sql/amo_prod/amo_stats_installs_v1/metadata.yaml diff --git a/dags.yaml b/dags.yaml index 5d2cd24c5a..f69ad101d2 100644 --- a/dags.yaml +++ b/dags.yaml @@ -37,6 +37,15 @@ bqetl_deviations: retries: 2 retry_delay: 30m +bqetl_amo_stats: + schedule_interval: 0 1 * * * + default_args: + owner: jklukas@mozilla.com + start_date: '2020-06-01' + email: ['telemetry-alerts@mozilla.com', 'jklukas@mozilla.com'] + retries: 2 + retry_delay: 30m + # DAG for exporting query data marked as public to GCS # queries should not be explicitly assigned to this DAG (it's done automatically) bqetl_public_data_json: diff --git a/dags/bqetl_amo_stats.py b/dags/bqetl_amo_stats.py new file mode 100644 index 0000000000..72e1658ec5 --- /dev/null +++ b/dags/bqetl_amo_stats.py @@ -0,0 +1,84 @@ +# Generated via query_scheduling/generate_airflow_dags + +from airflow import DAG +from airflow.operators.sensors import ExternalTaskSensor +import datetime +from utils.gcp import bigquery_etl_query + +default_args = { + "owner": "jklukas@mozilla.com", + "start_date": datetime.datetime(2020, 6, 1, 0, 0), + "email": ["telemetry-alerts@mozilla.com", "jklukas@mozilla.com"], + "depends_on_past": False, + "retry_delay": datetime.timedelta(seconds=1800), + "email_on_failure": True, + "email_on_retry": True, + "retries": 2, +} + +with DAG( + "bqetl_amo_stats", default_args=default_args, schedule_interval="0 1 * * *" +) as dag: + + amo_dev__amo_stats_dau__v1 = bigquery_etl_query( + task_id="amo_dev__amo_stats_dau__v1", + destination_table="amo_stats_dau_v1", + dataset_id="amo_dev", + project_id="moz-fx-data-shared-prod", + owner="jklukas@mozilla.com", + email=["jklukas@mozilla.com"], + date_partition_parameter="submission_date", + depends_on_past=False, + dag=dag, + ) + + amo_dev__amo_stats_installs__v1 = bigquery_etl_query( + task_id="amo_dev__amo_stats_installs__v1", + destination_table="amo_stats_installs_v1", + dataset_id="amo_dev", + project_id="moz-fx-data-shared-prod", + owner="jklukas@mozilla.com", + email=["jklukas@mozilla.com"], + date_partition_parameter="submission_date", + depends_on_past=False, + dag=dag, + ) + + amo_prod__amo_stats_dau__v1 = bigquery_etl_query( + task_id="amo_prod__amo_stats_dau__v1", + destination_table="amo_stats_dau_v1", + dataset_id="amo_prod", + project_id="moz-fx-data-shared-prod", + owner="jklukas@mozilla.com", + email=["jklukas@mozilla.com"], + date_partition_parameter="submission_date", + depends_on_past=False, + dag=dag, + ) + + amo_prod__amo_stats_installs__v1 = bigquery_etl_query( + task_id="amo_prod__amo_stats_installs__v1", + destination_table="amo_stats_installs_v1", + dataset_id="amo_prod", + project_id="moz-fx-data-shared-prod", + owner="jklukas@mozilla.com", + email=["jklukas@mozilla.com"], + date_partition_parameter="submission_date", + depends_on_past=False, + dag=dag, + ) + + amo_dev__amo_stats_dau__v1.set_upstream(amo_prod__amo_stats_dau__v1) + + amo_dev__amo_stats_installs__v1.set_upstream(amo_prod__amo_stats_installs__v1) + + wait_for_main_summary_clients_daily = ExternalTaskSensor( + task_id="wait_for_main_summary_clients_daily", + external_dag_id="main_summary", + external_task_id="clients_daily", + dag=dag, + ) + + amo_prod__amo_stats_dau__v1.set_upstream(wait_for_main_summary_clients_daily) + + amo_prod__amo_stats_installs__v1.set_upstream(wait_for_main_summary_clients_daily) diff --git a/sql/amo_dev/amo_stats_dau_v1/metadata.yaml b/sql/amo_dev/amo_stats_dau_v1/metadata.yaml new file mode 100644 index 0000000000..d596134826 --- /dev/null +++ b/sql/amo_dev/amo_stats_dau_v1/metadata.yaml @@ -0,0 +1,11 @@ +friendly_name: AMO Stats DAU dev/stage +description: > + Reduced stats table for dev and stage versions of the AMO service. +owners: + - jklukas@mozilla.com +labels: + application: amo + incremental: true + schedule: daily +scheduling: + dag_name: bqetl_amo_stats diff --git a/sql/amo_dev/amo_stats_installs_v1/metadata.yaml b/sql/amo_dev/amo_stats_installs_v1/metadata.yaml new file mode 100644 index 0000000000..09001702c2 --- /dev/null +++ b/sql/amo_dev/amo_stats_installs_v1/metadata.yaml @@ -0,0 +1,11 @@ +friendly_name: AMO Installs dev/stage +description: > + Reduced daily installs table for dev and stage versions of the AMO service. +owners: + - jklukas@mozilla.com +labels: + application: amo + incremental: true + schedule: daily +scheduling: + dag_name: bqetl_amo_stats diff --git a/sql/amo_prod/amo_stats_dau_v1/metadata.yaml b/sql/amo_prod/amo_stats_dau_v1/metadata.yaml new file mode 100644 index 0000000000..fbe246c4a3 --- /dev/null +++ b/sql/amo_prod/amo_stats_dau_v1/metadata.yaml @@ -0,0 +1,18 @@ +friendly_name: AMO Stats DAU +description: > + Daily user statistics to power addons.mozilla.org stats pages. See bug 1572873. + + Each row in this table represents a particular addon on a particular day + and provides all the information needed to populate the various + "Daily Users" plots for the AMO stats dashboard. +owners: + - jklukas@mozilla.com +labels: + application: amo + incremental: true + schedule: daily +scheduling: + dag_name: bqetl_amo_stats + depends_on: + - task_id: clients_daily + dag_name: main_summary diff --git a/sql/amo_prod/amo_stats_installs_v1/metadata.yaml b/sql/amo_prod/amo_stats_installs_v1/metadata.yaml new file mode 100644 index 0000000000..1c3421f709 --- /dev/null +++ b/sql/amo_prod/amo_stats_installs_v1/metadata.yaml @@ -0,0 +1,21 @@ +friendly_name: AMO Stats DAU +description: > + Daily install statistics to power addons.mozilla.org stats pages. See bug 1572873. + + This query looks backward in time by two days in order to allow + some delay in installs actually being reported, which means each + submission_date partition actually reflects installs from two days + prior. We adjust for this in the user-facing view on top of this + table (telemetry.amo_stats_installs), where we replace + `submission_date` with `install_date`. +owners: + - jklukas@mozilla.com +labels: + application: amo + incremental: true + schedule: daily +scheduling: + dag_name: bqetl_amo_stats + depends_on: + - task_id: clients_daily + dag_name: main_summary