chore: Remove data observability POC DAGs (#2049)

This commit is contained in:
Anna Scholtz 2024-07-22 12:21:23 -07:00 коммит произвёл GitHub
Родитель 23970b61a5
Коммит 79b3199fd4
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
6 изменённых файлов: 0 добавлений и 257 удалений

Просмотреть файл

@ -1,72 +0,0 @@
"""
DAG to test out the datahub Airflow integration.
This is just a POC. Ignore in Airflow triage
"""
from datetime import datetime, timedelta
from airflow import DAG
from datahub_airflow_plugin.operators.datahub_assertion_operator import (
DataHubAssertionOperator,
)
from utils.gcp import bigquery_etl_query
from utils.tags import Tag
default_args = {
"owner": "ascholtz@mozilla.com",
"email": [
"ascholtz@mozilla.com",
],
"depends_on_past": False,
"start_date": datetime(2024, 5, 21),
"email_on_failure": False,
"email_on_retry": False,
"retries": 2,
"retry_delay": timedelta(minutes=30),
}
tags = [Tag.ImpactTier.tier_3, Tag.Triage.no_triage]
with DAG(
"datahub_poc",
default_args=default_args,
schedule_interval="15 3 * * *",
doc_md=__doc__,
tags=tags,
) as dag:
latest_versions = bigquery_etl_query(
reattach_on_restart=True,
task_id="latest_versions",
destination_table="latest_versions_v1",
dataset_id="telemetry_derived",
sql_file_path="sql/data-observability-dev/telemetry_derived/latest_versions_v1/query.sql",
project_id="data-observability-dev",
date_partition_parameter=None,
arguments=("--replace",),
dag=dag,
)
datahub__telemetry_derived__latest_versions_v1 = DataHubAssertionOperator(
task_id="datahub__telemetry_derived__latest_versions_v1",
datahub_rest_conn_id="datahub_rest_default",
urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,data-observability-dev.telemetry_derived.latest_versions_v1,PROD)",
dag=dag,
)
latest_versions >> datahub__telemetry_derived__latest_versions_v1
datahub__fenix_derived__metrics_clients_last_seen_v1 = DataHubAssertionOperator(
task_id="datahub__fenix_derived__metrics_clients_last_seen_v1",
datahub_rest_conn_id="datahub_rest_default",
urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,data-observability-dev.fenix_derived.metrics_clients_last_seen_v1,PROD)",
dag=dag,
)
datahub__fenix_derived__firefox_android_anonymised_v1 = DataHubAssertionOperator(
task_id="datahub__fenix_derived__firefox_android_anonymised_v1",
datahub_rest_conn_id="datahub_rest_default",
urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,data-observability-dev.fenix_derived.firefox_android_anonymised_v1,PROD)",
dag=dag,
)

Просмотреть файл

@ -1,78 +0,0 @@
"""
DAG to test out the BigEye Airflow integration.
This is just a POC. Ignore in Airflow triage
"""
from datetime import datetime, timedelta
from airflow import DAG
from bigeye_airflow.operators.run_metrics_operator import RunMetricsOperator
from utils.gcp import bigquery_etl_query
from utils.tags import Tag
default_args = {
"owner": "ascholtz@mozilla.com",
"email": [
"ascholtz@mozilla.com",
],
"depends_on_past": False,
"start_date": datetime(2024, 5, 21),
"email_on_failure": False,
"email_on_retry": False,
"retries": 2,
"retry_delay": timedelta(minutes=30),
}
tags = [Tag.ImpactTier.tier_3, Tag.Triage.no_triage]
with DAG(
"bigeye_poc",
default_args=default_args,
schedule_interval="0 3 * * *",
doc_md=__doc__,
tags=tags,
) as dag:
latest_versions = bigquery_etl_query(
reattach_on_restart=True,
task_id="latest_versions",
destination_table="latest_versions_v1",
dataset_id="telemetry_derived",
sql_file_path="sql/data-observability-dev/telemetry_derived/latest_versions_v1/query.sql",
project_id="data-observability-dev",
date_partition_parameter=None,
arguments=("--replace",),
)
bigeye__telemetry_derived__latest_versions_v1 = RunMetricsOperator(
task_id="bigeye__telemetry_derived__latest_versions_v1",
connection_id="bigeye_connection",
warehouse_id=1817,
schema_name="data-observability-dev.telemetry_derived",
table_name="latest_versions_v1",
circuit_breaker_mode=True,
dag=dag,
)
latest_versions >> bigeye__telemetry_derived__latest_versions_v1
bigeye__fenix_derived__metrics_clients_last_seen_v1 = RunMetricsOperator(
task_id="bigeye__fenix_derived__metrics_clients_last_seen_v1",
connection_id="bigeye_connection",
warehouse_id=1817,
schema_name="data-observability-dev.fenix_derived",
table_name="metrics_clients_last_seen_v1",
circuit_breaker_mode=True,
dag=dag,
)
bigeye__fenix_derived__firefox_android_anonymised_v1 = RunMetricsOperator(
task_id="bigeye__fenix_derived__firefox_android_anonymised_v1",
connection_id="bigeye_connection",
warehouse_id=1817,
schema_name="data-observability-dev.fenix_derived",
table_name="firefox_android_anonymised_v1",
circuit_breaker_mode=True,
dag=dag,
)

Просмотреть файл

@ -1,70 +0,0 @@
"""
DAG to test out the monte_carlo Airflow integration.
This is just a POC. Ignore in Airflow triage
"""
from datetime import datetime, timedelta
from airflow import DAG
from airflow_mcd.operators import SimpleCircuitBreakerOperator
from utils.gcp import bigquery_etl_query
from utils.tags import Tag
default_args = {
"owner": "ascholtz@mozilla.com",
"email": [
"ascholtz@mozilla.com",
],
"depends_on_past": False,
"start_date": datetime(2024, 6, 3),
"email_on_failure": False,
"email_on_retry": False,
"retries": 2,
"retry_delay": timedelta(minutes=30),
}
tags = [Tag.ImpactTier.tier_3, Tag.Triage.no_triage]
with DAG(
"monte_carlo_poc",
default_args=default_args,
schedule_interval="30 3 * * *",
doc_md=__doc__,
tags=tags,
) as dag:
latest_versions = bigquery_etl_query(
reattach_on_restart=True,
task_id="latest_versions",
destination_table="latest_versions_v1",
dataset_id="telemetry_derived",
sql_file_path="sql/data-observability-dev/telemetry_derived/latest_versions_v1/query.sql",
project_id="data-observability-dev",
date_partition_parameter=None,
arguments=("--replace",),
)
monte_carlo__telemetry_derived__latest_versions_v1 = SimpleCircuitBreakerOperator(
task_id="monte_carlo__telemetry_derived__latest_versions_v1",
mcd_session_conn_id="monte_carlo_default_session_id",
rule_uuid="438a4215-ab4a-40f0-9c34-64346fb5c486",
)
latest_versions >> monte_carlo__telemetry_derived__latest_versions_v1
monte_carlo__fenix_derived__metrics_clients_last_seen_v1 = (
SimpleCircuitBreakerOperator(
task_id="monte_carlo__fenix_derived__metrics_clients_last_seen_v1",
mcd_session_conn_id="monte_carlo_default_session_id",
rule_uuid="3f9bd6be-e330-446c-a045-9394633b2c31",
)
)
monte_carlo__fenix_derived__metrics_clients_last_seen_v1 = (
SimpleCircuitBreakerOperator(
task_id="monte_carlo__fenix_derived__firefox_android_anonymised_v1",
mcd_session_conn_id="monte_carlo_default_session_id",
rule_uuid="eb9c26c5-604f-4c71-b7bf-df3d818c55f4",
)
)

Просмотреть файл

@ -22,9 +22,6 @@ bigeye-airflow
acryl-datahub-airflow-plugin
gql
# Monte Carlo integration
airflow-mcd
# Required for /app/dags/fivetran_acoustic.py, /app/dags/utils/acoustic/acoustic_client.py
xmltodict

Просмотреть файл

@ -9,7 +9,6 @@ acryl-datahub-airflow-plugin==0.13.2.4
aiofiles==23.2.1
aiohttp==3.9.3
aiosignal==1.3.1
airflow-mcd==0.3.0
airflow-provider-fivetran-async==2.0.2
alembic==1.13.1
amqp==5.2.0
@ -71,7 +70,6 @@ connexion==2.14.2
cron-descriptor==1.4.3
croniter==2.0.1
cryptography==41.0.7
dataclasses-json==0.6.6
db-dtypes==1.2.0
decorator==5.1.1
deprecated==1.2.14
@ -233,7 +231,6 @@ pyarrow==14.0.2
pyasn1==0.5.1
pyasn1-modules==0.3.0
pyathena==3.3.0
pycarlo==0.9.8
pycparser==2.21
pydantic==2.6.2
pydantic-core==2.16.3
@ -242,7 +239,6 @@ pygments==2.17.2
pyjwt==2.8.0
pyopenssl==24.0.0
pyparsing==3.1.1
python-box==7.1.1
python-daemon==3.0.1
python-dateutil==2.8.2
python-nvd3==0.15.0
@ -269,7 +265,6 @@ s3transfer==0.8.2
scramp==1.4.4
sentry-sdk==1.40.5
setproctitle==1.3.3
sgqlc==16.3
shapely==2.0.3
six==1.16.0
slack-sdk==3.27.0

Просмотреть файл

@ -158,34 +158,5 @@
"port": null,
"schema": null,
"extra": "{\"refresh_token\": \"dummy_refresh_token\"}"
},
"datahub_rest_default": {
"conn_type": "datahub-rest",
"description": "Used to authenticate to DataHub",
"password": null,
"host": "https://mozilla.acryl.io/gms",
"port": null,
"schema": null,
"extra": null
},
"bigeye_connection": {
"conn_type": "http",
"description": "Used to authenticate to BigEye",
"login": "",
"password": null,
"host": "app.bigeye.com",
"port": null,
"schema": "https",
"extra": ""
},
"monte_carlo_default_session_id": {
"conn_type": "http",
"description": "Used to authenticate to Monte Carlo",
"login": "",
"password": null,
"host": "",
"port": null,
"schema": "https",
"extra": ""
}
}