Move check for dags being up to date after DAGs have been generated (#3508)

This commit is contained in:
Anna Scholtz 2023-01-17 08:49:29 -08:00 коммит произвёл GitHub
Родитель 8e2210525c
Коммит 031320dfcf
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 35 добавлений и 21 удалений

Просмотреть файл

@ -206,7 +206,12 @@ jobs:
name: Generate DAGs
command: |
PATH="venv/bin:$PATH" script/bqetl dag generate
mv dags/ /tmp/workspace/generated-sql
cp -R dags/ /tmp/workspace/generated-sql
- run:
name: Verify that DAGs were correctly generated and are up-to-date
command: |
git diff --exit-code dags/
diff <(git ls-files dags/*.py) <(ls dags/*.py)
# this task is overwriting the content produced by generate-sql;
# the behaviour here is additive, generated DAGs are just added to
# the generated-sql output
@ -260,23 +265,6 @@ jobs:
command: |
cd telemetry-airflow
bash bin/test-parse
verify-dags-up-to-date:
# todo: remove this step once we rely on generate-dags completely
docker: *docker
steps:
- checkout
- *restore_venv_cache
- *build
- *restore_mvn_cache
- *java_deps
- run:
name: Generate DAGs
command: PATH="venv/bin:$PATH" script/bqetl dag generate
- run:
name: Verify that DAGs were correctly generated and are up-to-date
command: |
git diff --exit-code
diff <(git ls-files dags/*.py) <(ls dags/*.py)
validate-docs:
docker: *docker
steps:
@ -588,7 +576,6 @@ workflows:
- validate-dags:
requires:
- generate-dags
- verify-dags-up-to-date
- validate-docs:
requires:
- generate-sql

Просмотреть файл

@ -142,7 +142,7 @@ EXTERNAL_TASKS = {
task_id="clients_last_seen_joined",
schedule_interval="0 1 * * *",
date_partition_offset=-1,
): ["*.clients_last_seen_joined"],
): ["*.clients_last_seen_joined*"],
# *_stable.* should be matched last since all
# pattern before are downstream dependencies of
# copy_deduplicate_all.

Просмотреть файл

@ -81,6 +81,21 @@ with DAG(
fenix_derived__new_profile_activation__v1.set_upstream(
wait_for_baseline_clients_last_seen
)
wait_for_copy_deduplicate_all = ExternalTaskSensor(
task_id="wait_for_copy_deduplicate_all",
external_dag_id="copy_deduplicate",
external_task_id="copy_deduplicate_all",
execution_delta=datetime.timedelta(days=-1, seconds=82800),
check_existence=True,
mode="reschedule",
allowed_states=ALLOWED_STATES,
failed_states=FAILED_STATES,
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
fenix_derived__new_profile_activation__v1.set_upstream(
wait_for_copy_deduplicate_all
)
wait_for_search_derived__mobile_search_clients_daily__v1 = ExternalTaskSensor(
task_id="wait_for_search_derived__mobile_search_clients_daily__v1",
external_dag_id="bqetl_mobile_search",

Просмотреть файл

@ -16,3 +16,16 @@ bigquery:
scheduling:
dag_name: bqetl_mobile_activation
date_partition_parameter: submission_date
referenced_tables:
- - 'moz-fx-data-shared-prod'
- 'org_mozilla_firefox_stable'
- 'first_session_v1'
- - 'moz-fx-data-shared-prod'
- 'search_derived'
- 'mobile_search_clients_daily_v1'
- - 'moz-fx-data-shared-prod'
- 'fenix'
- 'baseline_clients_first_seen'
- - 'moz-fx-data-shared-prod'
- 'fenix'
- 'baseline_clients_last_seen'

Просмотреть файл

@ -7,7 +7,6 @@ description: |-
owners:
- loines@mozilla.com
- shong@mozilla.com
- cdowhygelund@mozilla.com
- ascholtz@mozilla.com
- anicholson@mozilla.com
labels: