feat: create two new dags for the Firefox-CI ETL (#2039)

This adds two new DAGs:

* fxci_metric_export - Retrieves metrics from Google Cloud Monitoring
  and inserts them into BigQuery
* fxci_pulse_export - Connects to Taskcluster pulse queues, drains the
  events and inserts them into BigQuery

Bug: 1904928

Co-authored-by: akkomar <akkomar@users.noreply.github.com>
This commit is contained in:
Andrew Halberstadt 2024-07-12 12:15:26 -04:00 коммит произвёл GitHub
Родитель e2fc9a799c
Коммит b0ec26b650
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
2 изменённых файлов: 149 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,72 @@
"""
Exports Firefox-CI worker data from the Google Cloud Monitoring to BigQuery.
The container is defined in [fxci-etl](https://github.com/mozilla-releng/fxci-etl).
"""
from datetime import datetime, timedelta
from airflow import DAG
from airflow.providers.cncf.kubernetes.secret import Secret
from operators.gcp_container_operator import GKEPodOperator
from utils.tags import Tag
default_args = {
"owner": "ahalberstadt@mozilla.com",
"depends_on_past": False,
"start_date": datetime(2024, 7, 8),
"email_on_failure": True,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=30),
}
tags = [Tag.ImpactTier.tier_3]
env_vars = {
"FXCI_ETL_BIGQUERY_PROJECT": "moz-fx-data-shared-prod",
"FXCI_ETL_BIGQUERY_DATASET": "fxci",
"FXCI_ETL_STORAGE_PROJECT": "moz-fx-dev-releng",
"FXCI_ETL_STORAGE_BUCKET": "fxci-etl",
}
secrets = [
Secret(
deploy_type="env",
deploy_target="FXCI_ETL_STORAGE_CREDENTIALS",
secret="airflow-gke-secrets",
key="fxci_etl_secret__gcp-credentials",
),
Secret(
deploy_type="env",
deploy_target="FXCI_ETL_MONITORING_CREDENTIALS",
secret="airflow-gke-secrets",
key="fxci_etl_secret__gcp-credentials",
),
]
with DAG(
"fxci_metric_export",
default_args=default_args,
doc_md=__doc__,
schedule_interval="@daily",
tags=tags,
) as dag:
fxci_metric_export = GKEPodOperator(
task_id="fxci_metric_export",
arguments=[
"fxci-etl",
"metric",
"export",
"-vv",
],
env_vars=env_vars,
secrets=secrets,
image="gcr.io/moz-fx-data-airflow-prod-88e0/fxci-taskcluster-export_docker_etl:latest",
gcp_conn_id="google_cloud_airflow_gke",
dag=dag,
email=[
"ahalberstadt@mozilla.com",
],
)

77
dags/fxci_pulse_export.py Normal file
Просмотреть файл

@ -0,0 +1,77 @@
"""
Exports Firefox-CI task and run data from Taskcluster to BigQuery.
This connects to and drains three separate Taskcluster pulse queues, and
exports each message into BigQuery.
The container is defined in [fxci-etl](https://github.com/mozilla-releng/fxci-etl).
"""
from datetime import datetime, timedelta
from airflow import DAG
from airflow.providers.cncf.kubernetes.secret import Secret
from operators.gcp_container_operator import GKEPodOperator
from utils.tags import Tag
default_args = {
"owner": "ahalberstadt@mozilla.com",
"depends_on_past": False,
"start_date": datetime(2024, 7, 8),
"email_on_failure": True,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=30),
}
tags = [Tag.ImpactTier.tier_3]
env_vars = {
"FXCI_ETL_BIGQUERY_PROJECT": "moz-fx-data-shared-prod",
"FXCI_ETL_BIGQUERY_DATASET": "fxci",
"FXCI_ETL_STORAGE_PROJECT": "moz-fx-dev-releng",
"FXCI_ETL_STORAGE_BUCKET": "fxci-etl",
"FXCI_ETL_PULSE_USER": "fxci-etl",
}
secrets = [
Secret(
deploy_type="env",
deploy_target="FXCI_ETL_STORAGE_CREDENTIALS",
secret="airflow-gke-secrets",
key="fxci_etl_secret__gcp-credentials",
),
Secret(
deploy_type="env",
deploy_target="FXCI_ETL_PULSE_PASSWORD",
secret="airflow-gke-secrets",
key="fxci_etl_secret__pulse-password",
),
]
with DAG(
"fxci_pulse_export",
default_args=default_args,
doc_md=__doc__,
schedule_interval="30 */6 * * *",
tags=tags,
) as dag:
fxci_pulse_export = GKEPodOperator(
task_id="fxci_pulse_export",
arguments=[
"fxci-etl",
"pulse",
"drain",
"-vv",
],
env_vars=env_vars,
secrets=secrets,
image="gcr.io/moz-fx-data-airflow-prod-88e0/fxci-taskcluster-export_docker_etl:latest",
gcp_conn_id="google_cloud_airflow_gke",
dag=dag,
email=[
"ahalberstadt@mozilla.com",
],
)