Enabled the taar_ensemble weekly job (#522)
* split off taar weekly jobs into a separate script * added ExternaltaskSensor dependency on main_summary * fixed dependnecy to point to clients_daily instead of main_summary * fixes as per review renamed `dag_weekly` to `taar_weekly` for weely taar dag corrected external_task_id and external_dag_id * Added a `start_date` argument to the task * removed Frank as owner and set myself as the owner of the task removed frank from alert recipient
This commit is contained in:
Родитель
9c4fbc380f
Коммит
7820515428
|
@ -0,0 +1,59 @@
|
|||
"""
|
||||
This configures a weekly DAG to run the TAAR Ensemble job off.
|
||||
"""
|
||||
from airflow import DAG
|
||||
from airflow.operators.sensors import ExternalTaskSensor
|
||||
from datetime import datetime, timedelta
|
||||
from airflow.operators.moz_databricks import MozDatabricksSubmitRunOperator
|
||||
from utils.mozetl import mozetl_envvar
|
||||
|
||||
default_args_weekly = {
|
||||
"owner": "vng@mozilla.com",
|
||||
"depends_on_past": False,
|
||||
"start_date": datetime(2019, 5, 31),
|
||||
"email": ["telemetry-alerts@mozilla.com"],
|
||||
"email_on_failure": True,
|
||||
"email_on_retry": True,
|
||||
"retries": 2,
|
||||
"retry_delay": timedelta(minutes=30),
|
||||
}
|
||||
|
||||
|
||||
|
||||
taar_weekly = DAG(
|
||||
"taar_weekly", default_args=default_args_weekly, schedule_interval="@weekly"
|
||||
)
|
||||
|
||||
wait_for_clients_daily = ExternalTaskSensor(
|
||||
task_id='clients_daily',
|
||||
external_dag_id='main_summary',
|
||||
external_task_id='clients_daily',
|
||||
execution_delta=timedelta(days=-7, hours=-1), # main_summary waits one hour, execution date is beginning of the week
|
||||
dag=taar_weekly)
|
||||
|
||||
|
||||
taar_ensemble = MozDatabricksSubmitRunOperator(
|
||||
task_id="taar_ensemble",
|
||||
job_name="TAAR Ensemble Model",
|
||||
owner="vng@mozilla.com",
|
||||
email=["vng@mozilla.com", "mlopatka@mozilla.com"],
|
||||
execution_timeout=timedelta(hours=11),
|
||||
instance_count=5,
|
||||
instance_type="i3.2xlarge",
|
||||
spot_bid_price_percent=100,
|
||||
max_instance_count=60,
|
||||
enable_autoscale=True,
|
||||
start_date='20190527',
|
||||
pypi_libs=['mozilla-taar3==0.4.5', 'mozilla-srgutil==0.1.10', 'python-decouple==3.1'],
|
||||
env=mozetl_envvar(
|
||||
"taar_ensemble",
|
||||
{
|
||||
"date": "{{ ds_nodash }}",
|
||||
},
|
||||
),
|
||||
uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-databricks.sh",
|
||||
output_visibility="private",
|
||||
)
|
||||
|
||||
|
||||
taar_ensemble.set_upstream(wait_for_clients_daily)
|
|
@ -79,7 +79,7 @@ class MozDatabricksSubmitRunOperator(DatabricksSubmitRunOperator):
|
|||
:param python_version: the default python runtime on the cluster (python 3.5.2)
|
||||
See https://docs.databricks.com/release-notes/runtime/4.3.html#system-environment
|
||||
for more details.
|
||||
|
||||
:param pypi_libs: PyPI libraries to install. ex: "['pylib1==0.1', 'pylib2==3.1']"
|
||||
:param kwargs: Keyword arguments to pass to DatabricksSubmitRunOperator
|
||||
"""
|
||||
if python_version not in (2, 3):
|
||||
|
|
Загрузка…
Ссылка в новой задаче