This commit is contained in:
Anna Scholtz 2020-07-10 12:35:45 -07:00
Родитель 8d773e5fff
Коммит 41b05f58d3
33 изменённых файлов: 139 добавлений и 178 удалений

Просмотреть файл

@ -18,7 +18,7 @@ bqetl_ssl_ratios:
retry_delay: 30m
bqetl_deviations:
schedule_interval: 0 2 * * *
schedule_interval: 0 4 * * *
default_args:
owner: ascholtz@mozilla.com
start_date: '2020-03-29'
@ -36,7 +36,7 @@ bqetl_amo_stats:
retry_delay: 30m
bqetl_vrbrowser:
schedule_interval: 0 3 * * *
schedule_interval: 0 2 * * *
default_args:
owner: jklukas@mozilla.com
start_date: '2019-07-25'
@ -54,7 +54,7 @@ bqetl_core:
retry_delay: 5m
bqetl_nondesktop:
schedule_interval: 0 2 * * *
schedule_interval: 0 3 * * *
default_args:
owner: jklukas@mozilla.com
start_date: '2019-07-25'
@ -63,7 +63,7 @@ bqetl_nondesktop:
retry_delay: 5m
bqetl_mobile_search:
schedule_interval: 0 3 * * *
schedule_interval: 0 2 * * *
default_args:
owner: bewu@mozilla.com
start_date: '2019-07-25'
@ -81,7 +81,7 @@ bqetl_gud:
retry_delay: 5m
bqetl_messaging_system:
schedule_interval: 0 3 * * *
schedule_interval: 0 2 * * *
default_args:
owner: najiang@mozilla.com
start_date: '2019-07-25'
@ -90,7 +90,7 @@ bqetl_messaging_system:
retry_delay: 5m
bqetl_activity_stream:
schedule_interval: 0 3 * * *
schedule_interval: 0 2 * * *
default_args:
owner: jklukas@mozilla.com
start_date: '2019-07-25'
@ -115,15 +115,6 @@ bqetl_addons:
email: ['telemetry-alerts@mozilla.com', 'bmiroglio@mozilla.com']
retries: 2
retry_delay: 30m
bqetl_clients_daily:
schedule_interval: 0 1 * * *
default_args:
owner: dthorn@mozilla.com
start_date: '2018-11-27'
email: ['telemetry-alerts@mozilla.com', 'dthorn@mozilla.com', 'jklukas@mozilla.com', 'frank@mozilla.com']
retries: 2
retry_delay: 30m
bqetl_devtools:
schedule_interval: 0 3 * * *
@ -135,7 +126,7 @@ bqetl_devtools:
retry_delay: 30m
bqetl_main_summary:
schedule_interval: 0 1 * * *
schedule_interval: 0 2 * * *
default_args:
owner: dthorn@mozilla.com
start_date: '2018-11-27'
@ -164,7 +155,7 @@ bqetl_document_sample:
# DAG for exporting query data marked as public to GCS
# queries should not be explicitly assigned to this DAG (it's done automatically)
bqetl_public_data_json:
schedule_interval: 0 2 * * *
schedule_interval: 0 4 * * *
default_args:
owner: ascholtz@mozilla.com
start_date: '2020-04-14'

Просмотреть файл

@ -17,7 +17,7 @@ default_args = {
}
with DAG(
"bqetl_activity_stream", default_args=default_args, schedule_interval="0 3 * * *"
"bqetl_activity_stream", default_args=default_args, schedule_interval="0 2 * * *"
) as dag:
activity_stream_bi__impression_stats_flat__v1 = bigquery_etl_query(
@ -36,7 +36,7 @@ with DAG(
task_id="wait_for_copy_deduplicate_copy_deduplicate_all",
external_dag_id="copy_deduplicate",
external_task_id="copy_deduplicate_all",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
dag=dag,

Просмотреть файл

@ -82,9 +82,9 @@ with DAG(
)
wait_for_telemetry_derived__clients_last_seen__v1 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__clients_last_seen__v1",
external_dag_id="bqetl_clients_daily",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__clients_last_seen__v1",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
)

Просмотреть файл

@ -98,9 +98,9 @@ with DAG(
wait_for_telemetry_derived__clients_daily__v6 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__clients_daily__v6",
external_dag_id="bqetl_clients_daily",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__clients_daily__v6",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
)

Просмотреть файл

@ -1,120 +0,0 @@
# Generated via https://github.com/mozilla/bigquery-etl/blob/master/bigquery_etl/query_scheduling/generate_airflow_dags.py
from airflow import DAG
from airflow.operators.sensors import ExternalTaskSensor
import datetime
from utils.gcp import bigquery_etl_query
default_args = {
"owner": "dthorn@mozilla.com",
"start_date": datetime.datetime(2018, 11, 27, 0, 0),
"email": [
"telemetry-alerts@mozilla.com",
"dthorn@mozilla.com",
"jklukas@mozilla.com",
"frank@mozilla.com",
],
"depends_on_past": False,
"retry_delay": datetime.timedelta(seconds=1800),
"email_on_failure": True,
"email_on_retry": True,
"retries": 2,
}
with DAG(
"bqetl_clients_daily", default_args=default_args, schedule_interval="0 1 * * *"
) as dag:
telemetry_derived__clients_first_seen__v1 = bigquery_etl_query(
task_id="telemetry_derived__clients_first_seen__v1",
destination_table="clients_first_seen_v1",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="jklukas@mozilla.com",
email=["jklukas@mozilla.com"],
start_date=datetime.datetime(2020, 5, 5, 0, 0),
date_partition_parameter=None,
depends_on_past=True,
parameters=["submission_date:DATE:{{ds}}"],
priority_weight=80,
dag=dag,
)
firefox_desktop_exact_mau28_by_client_count_dimensions = bigquery_etl_query(
task_id="firefox_desktop_exact_mau28_by_client_count_dimensions",
destination_table="firefox_desktop_exact_mau28_by_client_count_dimensions_v1",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="jklukas@mozilla.com",
email=["jklukas@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
telemetry_derived__clients_daily__v6 = bigquery_etl_query(
task_id="telemetry_derived__clients_daily__v6",
destination_table="clients_daily_v6",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="dthorn@mozilla.com",
email=["dthorn@mozilla.com"],
start_date=datetime.datetime(2019, 11, 5, 0, 0),
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
telemetry_derived__clients_last_seen__v1 = bigquery_etl_query(
task_id="telemetry_derived__clients_last_seen__v1",
destination_table="clients_last_seen_v1",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="dthorn@mozilla.com",
email=["dthorn@mozilla.com", "jklukas@mozilla.com"],
start_date=datetime.datetime(2019, 4, 15, 0, 0),
date_partition_parameter="submission_date",
depends_on_past=True,
priority_weight=85,
dag=dag,
)
firefox_desktop_exact_mau28_by_dimensions = bigquery_etl_query(
task_id="firefox_desktop_exact_mau28_by_dimensions",
destination_table="firefox_desktop_exact_mau28_by_dimensions_v1",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="relud@mozilla.com",
email=["relud@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
telemetry_derived__clients_first_seen__v1.set_upstream(
telemetry_derived__clients_daily__v6
)
firefox_desktop_exact_mau28_by_client_count_dimensions.set_upstream(
telemetry_derived__clients_last_seen__v1
)
wait_for_telemetry_derived__main_summary__v4 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__main_summary__v4",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__main_summary__v4",
check_existence=True,
mode="reschedule",
)
telemetry_derived__clients_daily__v6.set_upstream(
wait_for_telemetry_derived__main_summary__v4
)
telemetry_derived__clients_last_seen__v1.set_upstream(
telemetry_derived__clients_daily__v6
)
firefox_desktop_exact_mau28_by_dimensions.set_upstream(
telemetry_derived__clients_last_seen__v1
)

Просмотреть файл

@ -21,7 +21,7 @@ default_args = {
}
with DAG(
"bqetl_deviations", default_args=default_args, schedule_interval="0 2 * * *"
"bqetl_deviations", default_args=default_args, schedule_interval="0 4 * * *"
) as dag:
telemetry_derived__deviations__v1 = bigquery_etl_query(
@ -40,6 +40,7 @@ with DAG(
task_id="wait_for_anomdtct_anomdtct",
external_dag_id="anomdtct",
external_task_id="anomdtct",
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
dag=dag,

Просмотреть файл

@ -35,9 +35,9 @@ with DAG(
wait_for_telemetry_derived__clients_daily__v6 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__clients_daily__v6",
external_dag_id="bqetl_clients_daily",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__clients_daily__v6",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
)

Просмотреть файл

@ -34,9 +34,9 @@ with DAG(
wait_for_telemetry_derived__clients_daily__v6 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__clients_daily__v6",
external_dag_id="bqetl_clients_daily",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__clients_daily__v6",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
)

Просмотреть файл

@ -92,9 +92,9 @@ with DAG("bqetl_gud", default_args=default_args, schedule_interval="0 3 * * *")
wait_for_telemetry_derived__clients_last_seen__v1 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__clients_last_seen__v1",
external_dag_id="bqetl_clients_daily",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__clients_last_seen__v1",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
)

Просмотреть файл

@ -34,9 +34,9 @@ with DAG(
wait_for_telemetry_derived__clients_daily__v6 = ExternalTaskSensor(
task_id="wait_for_telemetry_derived__clients_daily__v6",
external_dag_id="bqetl_clients_daily",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__clients_daily__v6",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
)

Просмотреть файл

@ -22,9 +22,36 @@ default_args = {
}
with DAG(
"bqetl_main_summary", default_args=default_args, schedule_interval="0 1 * * *"
"bqetl_main_summary", default_args=default_args, schedule_interval="0 2 * * *"
) as dag:
telemetry_derived__clients_first_seen__v1 = bigquery_etl_query(
task_id="telemetry_derived__clients_first_seen__v1",
destination_table="clients_first_seen_v1",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="jklukas@mozilla.com",
email=["jklukas@mozilla.com"],
start_date=datetime.datetime(2020, 5, 5, 0, 0),
date_partition_parameter=None,
depends_on_past=True,
parameters=["submission_date:DATE:{{ds}}"],
priority_weight=80,
dag=dag,
)
firefox_desktop_exact_mau28_by_client_count_dimensions = bigquery_etl_query(
task_id="firefox_desktop_exact_mau28_by_client_count_dimensions",
destination_table="firefox_desktop_exact_mau28_by_client_count_dimensions_v1",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="jklukas@mozilla.com",
email=["jklukas@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
telemetry_derived__main_summary__v4 = bigquery_etl_query(
task_id="telemetry_derived__main_summary__v4",
destination_table="main_summary_v4",
@ -41,6 +68,53 @@ with DAG(
dag=dag,
)
telemetry_derived__clients_daily__v6 = bigquery_etl_query(
task_id="telemetry_derived__clients_daily__v6",
destination_table="clients_daily_v6",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="dthorn@mozilla.com",
email=["dthorn@mozilla.com"],
start_date=datetime.datetime(2019, 11, 5, 0, 0),
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
telemetry_derived__clients_last_seen__v1 = bigquery_etl_query(
task_id="telemetry_derived__clients_last_seen__v1",
destination_table="clients_last_seen_v1",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="dthorn@mozilla.com",
email=["dthorn@mozilla.com", "jklukas@mozilla.com"],
start_date=datetime.datetime(2019, 4, 15, 0, 0),
date_partition_parameter="submission_date",
depends_on_past=True,
priority_weight=85,
dag=dag,
)
firefox_desktop_exact_mau28_by_dimensions = bigquery_etl_query(
task_id="firefox_desktop_exact_mau28_by_dimensions",
destination_table="firefox_desktop_exact_mau28_by_dimensions_v1",
dataset_id="telemetry_derived",
project_id="moz-fx-data-shared-prod",
owner="relud@mozilla.com",
email=["relud@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
dag=dag,
)
telemetry_derived__clients_first_seen__v1.set_upstream(
telemetry_derived__clients_daily__v6
)
firefox_desktop_exact_mau28_by_client_count_dimensions.set_upstream(
telemetry_derived__clients_last_seen__v1
)
wait_for_copy_deduplicate_copy_deduplicate_main_ping = ExternalTaskSensor(
task_id="wait_for_copy_deduplicate_copy_deduplicate_main_ping",
external_dag_id="copy_deduplicate",
@ -54,3 +128,15 @@ with DAG(
telemetry_derived__main_summary__v4.set_upstream(
wait_for_copy_deduplicate_copy_deduplicate_main_ping
)
telemetry_derived__clients_daily__v6.set_upstream(
telemetry_derived__main_summary__v4
)
telemetry_derived__clients_last_seen__v1.set_upstream(
telemetry_derived__clients_daily__v6
)
firefox_desktop_exact_mau28_by_dimensions.set_upstream(
telemetry_derived__clients_last_seen__v1
)

Просмотреть файл

@ -17,7 +17,7 @@ default_args = {
}
with DAG(
"bqetl_messaging_system", default_args=default_args, schedule_interval="0 3 * * *"
"bqetl_messaging_system", default_args=default_args, schedule_interval="0 2 * * *"
) as dag:
messaging_system_derived__onboarding_users_daily__v1 = bigquery_etl_query(
@ -132,7 +132,7 @@ with DAG(
task_id="wait_for_copy_deduplicate_copy_deduplicate_all",
external_dag_id="copy_deduplicate",
external_task_id="copy_deduplicate_all",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
dag=dag,

Просмотреть файл

@ -17,7 +17,7 @@ default_args = {
}
with DAG(
"bqetl_mobile_search", default_args=default_args, schedule_interval="0 3 * * *"
"bqetl_mobile_search", default_args=default_args, schedule_interval="0 2 * * *"
) as dag:
search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query(
@ -48,7 +48,7 @@ with DAG(
task_id="wait_for_copy_deduplicate_copy_deduplicate_all",
external_dag_id="copy_deduplicate",
external_task_id="copy_deduplicate_all",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
dag=dag,

Просмотреть файл

@ -17,7 +17,7 @@ default_args = {
}
with DAG(
"bqetl_nondesktop", default_args=default_args, schedule_interval="0 2 * * *"
"bqetl_nondesktop", default_args=default_args, schedule_interval="0 3 * * *"
) as dag:
telemetry_derived__firefox_nondesktop_day_2_7_activation__v1 = bigquery_etl_query(
@ -60,6 +60,7 @@ with DAG(
task_id="wait_for_telemetry_derived__core_clients_last_seen__v1",
external_dag_id="bqetl_core",
external_task_id="telemetry_derived__core_clients_last_seen__v1",
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
)
@ -71,7 +72,7 @@ with DAG(
task_id="wait_for_copy_deduplicate_baseline_clients_last_seen",
external_dag_id="copy_deduplicate",
external_task_id="baseline_clients_last_seen",
execution_delta=datetime.timedelta(seconds=3600),
execution_delta=datetime.timedelta(seconds=7200),
check_existence=True,
mode="reschedule",
dag=dag,

Просмотреть файл

@ -18,7 +18,7 @@ default_args = {
}
with DAG(
"bqetl_public_data_json", default_args=default_args, schedule_interval="0 2 * * *"
"bqetl_public_data_json", default_args=default_args, schedule_interval="0 4 * * *"
) as dag:
docker_image = "mozilla/bigquery-etl:latest"
@ -52,6 +52,7 @@ with DAG(
task_id="wait_for_telemetry_derived__ssl_ratios__v1",
external_dag_id="bqetl_ssl_ratios",
external_task_id="telemetry_derived__ssl_ratios__v1",
execution_delta=datetime.timedelta(seconds=7200),
check_existence=True,
mode="reschedule",
)

Просмотреть файл

@ -84,7 +84,7 @@ with DAG(
task_id="wait_for_telemetry_derived__main_summary__v4",
external_dag_id="bqetl_main_summary",
external_task_id="telemetry_derived__main_summary__v4",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
)

Просмотреть файл

@ -21,7 +21,7 @@ default_args = {
}
with DAG(
"bqetl_vrbrowser", default_args=default_args, schedule_interval="0 3 * * *"
"bqetl_vrbrowser", default_args=default_args, schedule_interval="0 2 * * *"
) as dag:
org_mozilla_vrbrowser_derived__baseline_daily__v1 = bigquery_etl_query(
@ -76,7 +76,7 @@ with DAG(
task_id="wait_for_copy_deduplicate_copy_deduplicate_all",
external_dag_id="copy_deduplicate",
external_task_id="copy_deduplicate_all",
execution_delta=datetime.timedelta(seconds=7200),
execution_delta=datetime.timedelta(seconds=3600),
check_existence=True,
mode="reschedule",
dag=dag,

Просмотреть файл

@ -10,4 +10,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: copy_deduplicate_all
execution_delta: 2h
execution_delta: 1h

Просмотреть файл

@ -9,4 +9,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: copy_deduplicate_all
execution_delta: 2h
execution_delta: 1h

Просмотреть файл

@ -9,4 +9,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: copy_deduplicate_all
execution_delta: 2h
execution_delta: 1h

Просмотреть файл

@ -9,4 +9,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: copy_deduplicate_all
execution_delta: 2h
execution_delta: 1h

Просмотреть файл

@ -13,4 +13,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: copy_deduplicate_all
execution_delta: 2h
execution_delta: 1h

Просмотреть файл

@ -13,4 +13,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: copy_deduplicate_all
execution_delta: 2h
execution_delta: 1h

Просмотреть файл

@ -11,4 +11,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: copy_deduplicate_all
execution_delta: 2h
execution_delta: 1h

Просмотреть файл

@ -8,5 +8,5 @@ labels:
application: firefox
schedule: daily
scheduling:
dag_name: bqetl_clients_daily
dag_name: bqetl_main_summary
start_date: '2019-11-05'

Просмотреть файл

@ -10,7 +10,7 @@ labels:
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_clients_daily
dag_name: bqetl_main_summary
start_date: '2020-05-05'
priority: 80
depends_on_past: true

Просмотреть файл

@ -8,7 +8,7 @@ labels:
application: firefox
schedule: daily
scheduling:
dag_name: bqetl_clients_daily
dag_name: bqetl_main_summary
priority: 85
start_date: '2019-04-15'
email: ['dthorn@mozilla.com', 'jklukas@mozilla.com']

Просмотреть файл

@ -15,3 +15,4 @@ scheduling:
depends_on:
- task_id: anomdtct
dag_name: anomdtct
execution_delta: 1h

Просмотреть файл

@ -8,5 +8,5 @@ labels:
schedule: daily
incremental: true
scheduling:
dag_name: bqetl_clients_daily
dag_name: bqetl_main_summary
task_name: firefox_desktop_exact_mau28_by_client_count_dimensions

Просмотреть файл

@ -8,5 +8,5 @@ labels:
schedule: daily
incremental: true
scheduling:
dag_name: bqetl_clients_daily
dag_name: bqetl_main_summary
task_name: firefox_desktop_exact_mau28_by_dimensions

Просмотреть файл

@ -14,4 +14,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: baseline_clients_last_seen
execution_delta: 1h
execution_delta: 2h

Просмотреть файл

@ -13,4 +13,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: baseline_clients_last_seen
execution_delta: 1h
execution_delta: 2h

Просмотреть файл

@ -12,4 +12,4 @@ scheduling:
depends_on:
- dag_name: copy_deduplicate
task_id: baseline_clients_last_seen
execution_delta: 1h
execution_delta: 2h