Enabled the taar_ensemble weekly job (#522)

* split off taar weekly jobs into a separate script

* added ExternaltaskSensor dependency on main_summary

* fixed dependnecy to point to clients_daily instead of main_summary

* fixes as per review

renamed `dag_weekly` to `taar_weekly` for weely taar dag

corrected external_task_id and external_dag_id

* Added a `start_date` argument to the task

* removed Frank as owner and set myself as the owner of the task

removed frank from alert recipient
This commit is contained in:
Victor Ng 2019-06-05 15:34:35 -04:00 коммит произвёл Anthony Miyaguchi
Родитель 9c4fbc380f
Коммит 7820515428
2 изменённых файлов: 60 добавлений и 1 удалений

59
dags/taar_weekly.py Normal file
Просмотреть файл

@ -0,0 +1,59 @@
"""
This configures a weekly DAG to run the TAAR Ensemble job off.
"""
from airflow import DAG
from airflow.operators.sensors import ExternalTaskSensor
from datetime import datetime, timedelta
from airflow.operators.moz_databricks import MozDatabricksSubmitRunOperator
from utils.mozetl import mozetl_envvar
default_args_weekly = {
"owner": "vng@mozilla.com",
"depends_on_past": False,
"start_date": datetime(2019, 5, 31),
"email": ["telemetry-alerts@mozilla.com"],
"email_on_failure": True,
"email_on_retry": True,
"retries": 2,
"retry_delay": timedelta(minutes=30),
}
taar_weekly = DAG(
"taar_weekly", default_args=default_args_weekly, schedule_interval="@weekly"
)
wait_for_clients_daily = ExternalTaskSensor(
task_id='clients_daily',
external_dag_id='main_summary',
external_task_id='clients_daily',
execution_delta=timedelta(days=-7, hours=-1), # main_summary waits one hour, execution date is beginning of the week
dag=taar_weekly)
taar_ensemble = MozDatabricksSubmitRunOperator(
task_id="taar_ensemble",
job_name="TAAR Ensemble Model",
owner="vng@mozilla.com",
email=["vng@mozilla.com", "mlopatka@mozilla.com"],
execution_timeout=timedelta(hours=11),
instance_count=5,
instance_type="i3.2xlarge",
spot_bid_price_percent=100,
max_instance_count=60,
enable_autoscale=True,
start_date='20190527',
pypi_libs=['mozilla-taar3==0.4.5', 'mozilla-srgutil==0.1.10', 'python-decouple==3.1'],
env=mozetl_envvar(
"taar_ensemble",
{
"date": "{{ ds_nodash }}",
},
),
uri="https://raw.githubusercontent.com/mozilla/python_mozetl/master/bin/mozetl-databricks.sh",
output_visibility="private",
)
taar_ensemble.set_upstream(wait_for_clients_daily)

Просмотреть файл

@ -79,7 +79,7 @@ class MozDatabricksSubmitRunOperator(DatabricksSubmitRunOperator):
:param python_version: the default python runtime on the cluster (python 3.5.2)
See https://docs.databricks.com/release-notes/runtime/4.3.html#system-environment
for more details.
:param pypi_libs: PyPI libraries to install. ex: "['pylib1==0.1', 'pylib2==3.1']"
:param kwargs: Keyword arguments to pass to DatabricksSubmitRunOperator
"""
if python_version not in (2, 3):