chore: Remove data observability POC DAGs (#2049)
This commit is contained in:
Родитель
23970b61a5
Коммит
79b3199fd4
|
@ -1,72 +0,0 @@
|
|||
"""
|
||||
DAG to test out the datahub Airflow integration.
|
||||
|
||||
This is just a POC. Ignore in Airflow triage
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from datahub_airflow_plugin.operators.datahub_assertion_operator import (
|
||||
DataHubAssertionOperator,
|
||||
)
|
||||
|
||||
from utils.gcp import bigquery_etl_query
|
||||
from utils.tags import Tag
|
||||
|
||||
default_args = {
|
||||
"owner": "ascholtz@mozilla.com",
|
||||
"email": [
|
||||
"ascholtz@mozilla.com",
|
||||
],
|
||||
"depends_on_past": False,
|
||||
"start_date": datetime(2024, 5, 21),
|
||||
"email_on_failure": False,
|
||||
"email_on_retry": False,
|
||||
"retries": 2,
|
||||
"retry_delay": timedelta(minutes=30),
|
||||
}
|
||||
|
||||
tags = [Tag.ImpactTier.tier_3, Tag.Triage.no_triage]
|
||||
|
||||
with DAG(
|
||||
"datahub_poc",
|
||||
default_args=default_args,
|
||||
schedule_interval="15 3 * * *",
|
||||
doc_md=__doc__,
|
||||
tags=tags,
|
||||
) as dag:
|
||||
latest_versions = bigquery_etl_query(
|
||||
reattach_on_restart=True,
|
||||
task_id="latest_versions",
|
||||
destination_table="latest_versions_v1",
|
||||
dataset_id="telemetry_derived",
|
||||
sql_file_path="sql/data-observability-dev/telemetry_derived/latest_versions_v1/query.sql",
|
||||
project_id="data-observability-dev",
|
||||
date_partition_parameter=None,
|
||||
arguments=("--replace",),
|
||||
dag=dag,
|
||||
)
|
||||
|
||||
datahub__telemetry_derived__latest_versions_v1 = DataHubAssertionOperator(
|
||||
task_id="datahub__telemetry_derived__latest_versions_v1",
|
||||
datahub_rest_conn_id="datahub_rest_default",
|
||||
urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,data-observability-dev.telemetry_derived.latest_versions_v1,PROD)",
|
||||
dag=dag,
|
||||
)
|
||||
|
||||
latest_versions >> datahub__telemetry_derived__latest_versions_v1
|
||||
|
||||
datahub__fenix_derived__metrics_clients_last_seen_v1 = DataHubAssertionOperator(
|
||||
task_id="datahub__fenix_derived__metrics_clients_last_seen_v1",
|
||||
datahub_rest_conn_id="datahub_rest_default",
|
||||
urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,data-observability-dev.fenix_derived.metrics_clients_last_seen_v1,PROD)",
|
||||
dag=dag,
|
||||
)
|
||||
|
||||
datahub__fenix_derived__firefox_android_anonymised_v1 = DataHubAssertionOperator(
|
||||
task_id="datahub__fenix_derived__firefox_android_anonymised_v1",
|
||||
datahub_rest_conn_id="datahub_rest_default",
|
||||
urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,data-observability-dev.fenix_derived.firefox_android_anonymised_v1,PROD)",
|
||||
dag=dag,
|
||||
)
|
|
@ -1,78 +0,0 @@
|
|||
"""
|
||||
DAG to test out the BigEye Airflow integration.
|
||||
|
||||
This is just a POC. Ignore in Airflow triage
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from bigeye_airflow.operators.run_metrics_operator import RunMetricsOperator
|
||||
|
||||
from utils.gcp import bigquery_etl_query
|
||||
from utils.tags import Tag
|
||||
|
||||
default_args = {
|
||||
"owner": "ascholtz@mozilla.com",
|
||||
"email": [
|
||||
"ascholtz@mozilla.com",
|
||||
],
|
||||
"depends_on_past": False,
|
||||
"start_date": datetime(2024, 5, 21),
|
||||
"email_on_failure": False,
|
||||
"email_on_retry": False,
|
||||
"retries": 2,
|
||||
"retry_delay": timedelta(minutes=30),
|
||||
}
|
||||
|
||||
tags = [Tag.ImpactTier.tier_3, Tag.Triage.no_triage]
|
||||
|
||||
with DAG(
|
||||
"bigeye_poc",
|
||||
default_args=default_args,
|
||||
schedule_interval="0 3 * * *",
|
||||
doc_md=__doc__,
|
||||
tags=tags,
|
||||
) as dag:
|
||||
latest_versions = bigquery_etl_query(
|
||||
reattach_on_restart=True,
|
||||
task_id="latest_versions",
|
||||
destination_table="latest_versions_v1",
|
||||
dataset_id="telemetry_derived",
|
||||
sql_file_path="sql/data-observability-dev/telemetry_derived/latest_versions_v1/query.sql",
|
||||
project_id="data-observability-dev",
|
||||
date_partition_parameter=None,
|
||||
arguments=("--replace",),
|
||||
)
|
||||
|
||||
bigeye__telemetry_derived__latest_versions_v1 = RunMetricsOperator(
|
||||
task_id="bigeye__telemetry_derived__latest_versions_v1",
|
||||
connection_id="bigeye_connection",
|
||||
warehouse_id=1817,
|
||||
schema_name="data-observability-dev.telemetry_derived",
|
||||
table_name="latest_versions_v1",
|
||||
circuit_breaker_mode=True,
|
||||
dag=dag,
|
||||
)
|
||||
|
||||
latest_versions >> bigeye__telemetry_derived__latest_versions_v1
|
||||
|
||||
bigeye__fenix_derived__metrics_clients_last_seen_v1 = RunMetricsOperator(
|
||||
task_id="bigeye__fenix_derived__metrics_clients_last_seen_v1",
|
||||
connection_id="bigeye_connection",
|
||||
warehouse_id=1817,
|
||||
schema_name="data-observability-dev.fenix_derived",
|
||||
table_name="metrics_clients_last_seen_v1",
|
||||
circuit_breaker_mode=True,
|
||||
dag=dag,
|
||||
)
|
||||
|
||||
bigeye__fenix_derived__firefox_android_anonymised_v1 = RunMetricsOperator(
|
||||
task_id="bigeye__fenix_derived__firefox_android_anonymised_v1",
|
||||
connection_id="bigeye_connection",
|
||||
warehouse_id=1817,
|
||||
schema_name="data-observability-dev.fenix_derived",
|
||||
table_name="firefox_android_anonymised_v1",
|
||||
circuit_breaker_mode=True,
|
||||
dag=dag,
|
||||
)
|
|
@ -1,70 +0,0 @@
|
|||
"""
|
||||
DAG to test out the monte_carlo Airflow integration.
|
||||
|
||||
This is just a POC. Ignore in Airflow triage
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow_mcd.operators import SimpleCircuitBreakerOperator
|
||||
|
||||
from utils.gcp import bigquery_etl_query
|
||||
from utils.tags import Tag
|
||||
|
||||
default_args = {
|
||||
"owner": "ascholtz@mozilla.com",
|
||||
"email": [
|
||||
"ascholtz@mozilla.com",
|
||||
],
|
||||
"depends_on_past": False,
|
||||
"start_date": datetime(2024, 6, 3),
|
||||
"email_on_failure": False,
|
||||
"email_on_retry": False,
|
||||
"retries": 2,
|
||||
"retry_delay": timedelta(minutes=30),
|
||||
}
|
||||
|
||||
tags = [Tag.ImpactTier.tier_3, Tag.Triage.no_triage]
|
||||
|
||||
with DAG(
|
||||
"monte_carlo_poc",
|
||||
default_args=default_args,
|
||||
schedule_interval="30 3 * * *",
|
||||
doc_md=__doc__,
|
||||
tags=tags,
|
||||
) as dag:
|
||||
latest_versions = bigquery_etl_query(
|
||||
reattach_on_restart=True,
|
||||
task_id="latest_versions",
|
||||
destination_table="latest_versions_v1",
|
||||
dataset_id="telemetry_derived",
|
||||
sql_file_path="sql/data-observability-dev/telemetry_derived/latest_versions_v1/query.sql",
|
||||
project_id="data-observability-dev",
|
||||
date_partition_parameter=None,
|
||||
arguments=("--replace",),
|
||||
)
|
||||
|
||||
monte_carlo__telemetry_derived__latest_versions_v1 = SimpleCircuitBreakerOperator(
|
||||
task_id="monte_carlo__telemetry_derived__latest_versions_v1",
|
||||
mcd_session_conn_id="monte_carlo_default_session_id",
|
||||
rule_uuid="438a4215-ab4a-40f0-9c34-64346fb5c486",
|
||||
)
|
||||
|
||||
latest_versions >> monte_carlo__telemetry_derived__latest_versions_v1
|
||||
|
||||
monte_carlo__fenix_derived__metrics_clients_last_seen_v1 = (
|
||||
SimpleCircuitBreakerOperator(
|
||||
task_id="monte_carlo__fenix_derived__metrics_clients_last_seen_v1",
|
||||
mcd_session_conn_id="monte_carlo_default_session_id",
|
||||
rule_uuid="3f9bd6be-e330-446c-a045-9394633b2c31",
|
||||
)
|
||||
)
|
||||
|
||||
monte_carlo__fenix_derived__metrics_clients_last_seen_v1 = (
|
||||
SimpleCircuitBreakerOperator(
|
||||
task_id="monte_carlo__fenix_derived__firefox_android_anonymised_v1",
|
||||
mcd_session_conn_id="monte_carlo_default_session_id",
|
||||
rule_uuid="eb9c26c5-604f-4c71-b7bf-df3d818c55f4",
|
||||
)
|
||||
)
|
|
@ -22,9 +22,6 @@ bigeye-airflow
|
|||
acryl-datahub-airflow-plugin
|
||||
gql
|
||||
|
||||
# Monte Carlo integration
|
||||
airflow-mcd
|
||||
|
||||
# Required for /app/dags/fivetran_acoustic.py, /app/dags/utils/acoustic/acoustic_client.py
|
||||
xmltodict
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@ acryl-datahub-airflow-plugin==0.13.2.4
|
|||
aiofiles==23.2.1
|
||||
aiohttp==3.9.3
|
||||
aiosignal==1.3.1
|
||||
airflow-mcd==0.3.0
|
||||
airflow-provider-fivetran-async==2.0.2
|
||||
alembic==1.13.1
|
||||
amqp==5.2.0
|
||||
|
@ -71,7 +70,6 @@ connexion==2.14.2
|
|||
cron-descriptor==1.4.3
|
||||
croniter==2.0.1
|
||||
cryptography==41.0.7
|
||||
dataclasses-json==0.6.6
|
||||
db-dtypes==1.2.0
|
||||
decorator==5.1.1
|
||||
deprecated==1.2.14
|
||||
|
@ -233,7 +231,6 @@ pyarrow==14.0.2
|
|||
pyasn1==0.5.1
|
||||
pyasn1-modules==0.3.0
|
||||
pyathena==3.3.0
|
||||
pycarlo==0.9.8
|
||||
pycparser==2.21
|
||||
pydantic==2.6.2
|
||||
pydantic-core==2.16.3
|
||||
|
@ -242,7 +239,6 @@ pygments==2.17.2
|
|||
pyjwt==2.8.0
|
||||
pyopenssl==24.0.0
|
||||
pyparsing==3.1.1
|
||||
python-box==7.1.1
|
||||
python-daemon==3.0.1
|
||||
python-dateutil==2.8.2
|
||||
python-nvd3==0.15.0
|
||||
|
@ -269,7 +265,6 @@ s3transfer==0.8.2
|
|||
scramp==1.4.4
|
||||
sentry-sdk==1.40.5
|
||||
setproctitle==1.3.3
|
||||
sgqlc==16.3
|
||||
shapely==2.0.3
|
||||
six==1.16.0
|
||||
slack-sdk==3.27.0
|
||||
|
|
|
@ -158,34 +158,5 @@
|
|||
"port": null,
|
||||
"schema": null,
|
||||
"extra": "{\"refresh_token\": \"dummy_refresh_token\"}"
|
||||
},
|
||||
"datahub_rest_default": {
|
||||
"conn_type": "datahub-rest",
|
||||
"description": "Used to authenticate to DataHub",
|
||||
"password": null,
|
||||
"host": "https://mozilla.acryl.io/gms",
|
||||
"port": null,
|
||||
"schema": null,
|
||||
"extra": null
|
||||
},
|
||||
"bigeye_connection": {
|
||||
"conn_type": "http",
|
||||
"description": "Used to authenticate to BigEye",
|
||||
"login": "",
|
||||
"password": null,
|
||||
"host": "app.bigeye.com",
|
||||
"port": null,
|
||||
"schema": "https",
|
||||
"extra": ""
|
||||
},
|
||||
"monte_carlo_default_session_id": {
|
||||
"conn_type": "http",
|
||||
"description": "Used to authenticate to Monte Carlo",
|
||||
"login": "",
|
||||
"password": null,
|
||||
"host": "",
|
||||
"port": null,
|
||||
"schema": "https",
|
||||
"extra": ""
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче