This commit is contained in:
Maxime Beauchemin 2015-05-26 15:47:58 -04:00
Родитель bffb403077
Коммит 59d17609a7
5 изменённых файлов: 125 добавлений и 33 удалений

Просмотреть файл

@ -1,3 +1,4 @@
from collections import defaultdict
from datetime import datetime from datetime import datetime
import getpass import getpass
import logging import logging
@ -305,7 +306,7 @@ class SchedulerJob(BaseJob):
execution_date=next_schedule, execution_date=next_schedule,
) )
ti.refresh_from_db() ti.refresh_from_db()
if ti.is_runnable(): if ti.is_queueable():
logging.debug('Queuing next run: ' + str(ti)) logging.debug('Queuing next run: ' + str(ti))
executor.queue_task_instance(ti) executor.queue_task_instance(ti)
# Releasing the lock # Releasing the lock
@ -321,6 +322,37 @@ class SchedulerJob(BaseJob):
session.close() session.close()
@utils.provide_session
def prioritize_queued(self, session, executor, dagbag):
# Prioritizing queued task instances
pools = {p.pool: p for p in session.query(models.Pool).all()}
TI = models.TaskInstance
queued_tis = (
session.query(TI)
.filter(TI.state == State.QUEUED)
.all()
)
d = defaultdict(list)
for ti in queued_tis:
d[ti.pool].append(ti)
for pool, tis in d.items():
open_slots = pools[ti.pool].open_slots(session=session)
if open_slots > 0:
tis = sorted(
tis, key=lambda ti: ti.priority_weight, reverse=True)
for ti in tis[:open_slots]:
task = None
try:
task = dagbag.dags[ti.dag_id].get_task(ti.task_id)
except:
logging.error("Queued task {} seems gone".format(ti))
if task:
ti.task = task
executor.queue_task_instance(ti)
def _execute(self): def _execute(self):
dag_id = self.dag_id dag_id = self.dag_id
@ -331,17 +363,19 @@ class SchedulerJob(BaseJob):
utils.pessimistic_connection_handling() utils.pessimistic_connection_handling()
# Sleep time (seconds) between scheduler runs
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
logging.info("Starting the scheduler") logging.info("Starting the scheduler")
# This should get new code
dagbag = models.DagBag(self.subdir, sync_to_db=True) dagbag = models.DagBag(self.subdir, sync_to_db=True)
executor = dagbag.executor executor = dagbag.executor
executor.start() executor.start()
i = 0 i = 0
while (not self.test_mode) or i < 1: while (not self.test_mode) or i < 1:
try:
self.prioritize_queued(executor=executor, dagbag=dagbag)
except Exception as e:
logging.exception(e)
i += 1 i += 1
try: try:
if i % self.refresh_dags_every == 0: if i % self.refresh_dags_every == 0:

Просмотреть файл

@ -24,7 +24,7 @@ from airflow.configuration import conf
from airflow import settings from airflow import settings
from airflow import utils from airflow import utils
from airflow.utils import State from airflow.utils import State
from airflow.utils import apply_defaults from airflow.utils import apply_defaults, provide_session
Base = declarative_base() Base = declarative_base()
ID_LEN = 250 ID_LEN = 250
@ -314,6 +314,8 @@ class Connection(Base):
return hooks.PrestoHook(presto_conn_id=self.conn_id) return hooks.PrestoHook(presto_conn_id=self.conn_id)
elif self.conn_type == 'hiveserver2': elif self.conn_type == 'hiveserver2':
return hooks.HiveServer2Hook(hiveserver2_conn_id=self.conn_id) return hooks.HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
elif self.conn_type == 'sqlite':
return hooks.SqliteHook(sqlite_conn_id=self.conn_id)
except: except:
return None return None
@ -515,12 +517,13 @@ class TaskInstance(Base):
""" """
return (self.dag_id, self.task_id, self.execution_date) return (self.dag_id, self.task_id, self.execution_date)
def is_runnable(self): def is_queueable(self):
""" """
Returns a boolean on whether the task instance has met all dependencies Returns a boolean on whether the task instance has met all dependencies
and is ready to run. It considers the task's state, the state and is ready to run. It considers the task's state, the state
of its dependencies, depends_on_past and makes sure the execution of its dependencies, depends_on_past and makes sure the execution
isn't in the future. isn't in the future. It doesn't take into
account whether the pool has a slot for it to run.
""" """
if self.execution_date > datetime.now() - self.task.schedule_interval: if self.execution_date > datetime.now() - self.task.schedule_interval:
return False return False
@ -530,12 +533,18 @@ class TaskInstance(Base):
return False return False
elif ( elif (
self.state in State.runnable() and self.state in State.runnable() and
self.are_dependencies_met() and self.are_dependencies_met()):
not self.pool_full()):
return True return True
else: else:
return False return False
def is_runnable(self):
"""
Returns whether a task is ready to run AND there's room in the
queue.
"""
return self.is_queueable() and not self.pool_full()
def are_dependents_done(self, main_session=None): def are_dependents_done(self, main_session=None):
""" """
Checks whether the dependents of this task instance have all succeeded. Checks whether the dependents of this task instance have all succeeded.
@ -626,7 +635,8 @@ class TaskInstance(Base):
return self.state == State.UP_FOR_RETRY and \ return self.state == State.UP_FOR_RETRY and \
self.end_date + self.task.retry_delay < datetime.now() self.end_date + self.task.retry_delay < datetime.now()
def pool_full(self, session=None): @provide_session
def pool_full(self, session):
""" """
Returns a boolean as to whether the slot pool has room for this Returns a boolean as to whether the slot pool has room for this
task to run task to run
@ -634,18 +644,6 @@ class TaskInstance(Base):
if not self.task.pool: if not self.task.pool:
return False return False
close_session = False
if not session:
session = settings.Session()
close_session = True
running = (
session
.query(TaskInstance)
.filter(TaskInstance.pool == self.task.pool)
.filter(TaskInstance.state == State.RUNNING)
.count()
)
pool = ( pool = (
session session
.query(Pool) .query(Pool)
@ -654,12 +652,9 @@ class TaskInstance(Base):
) )
if not pool: if not pool:
return False return False
open_slots = pool.open_slots(session=session)
if close_session: return open_slots <= 0
session.expunge_all()
session.commit()
session.close()
return running >= pool.slots
def run( def run(
@ -707,6 +702,7 @@ class TaskInstance(Base):
"Next run after {0}".format(next_run) "Next run after {0}".format(next_run)
) )
elif force or self.state in State.runnable(): elif force or self.state in State.runnable():
self.start_date = datetime.now()
if not force and task.pool and self.pool_full(session): if not force and task.pool and self.pool_full(session):
self.state = State.QUEUED self.state = State.QUEUED
session.commit() session.commit()
@ -719,7 +715,6 @@ class TaskInstance(Base):
if not test_mode: if not test_mode:
session.add(Log(State.RUNNING, self)) session.add(Log(State.RUNNING, self))
self.state = State.RUNNING self.state = State.RUNNING
self.start_date = datetime.now()
self.end_date = None self.end_date = None
if not test_mode: if not test_mode:
session.merge(self) session.merge(self)
@ -1804,4 +1799,26 @@ class Pool(Base):
description = Column(Text) description = Column(Text)
def __repr__(self): def __repr__(self):
return self.id return self.pool
@provide_session
def used_slots(self, session):
"""
Returns the number of slots used at the moment
"""
running = (
session
.query(TaskInstance)
.filter(TaskInstance.pool == self.pool)
.filter(TaskInstance.state == State.RUNNING)
.count()
)
return running
@provide_session
def open_slots(self, session):
"""
Returns the number of slots open at the moment
"""
used_slots = self.used_slots(session=session)
return self.slots - used_slots

Просмотреть файл

@ -35,7 +35,7 @@ class State(object):
UP_FOR_RETRY = "up_for_retry" UP_FOR_RETRY = "up_for_retry"
state_color = { state_color = {
QUEUED: 'grey', QUEUED: 'gray',
RUNNING: 'lime', RUNNING: 'lime',
SUCCESS: 'green', SUCCESS: 'green',
SHUTDOWN: 'orange', SHUTDOWN: 'orange',
@ -120,6 +120,14 @@ def initdb():
port=10001)) port=10001))
session.commit() session.commit()
conn = session.query(C).filter(C.conn_id == 'mysql_default').first()
if not conn:
session.add(
models.Connection(
conn_id='mysql_default', conn_type='mysql',
host='localhost'))
session.commit()
conn = session.query(C).filter(C.conn_id == 'sqlite_default').first() conn = session.query(C).filter(C.conn_id == 'sqlite_default').first()
if not conn: if not conn:
home = conf.get('core', 'AIRFLOW_HOME') home = conf.get('core', 'AIRFLOW_HOME')
@ -215,8 +223,31 @@ def readfile(filepath):
return content return content
def provide_session(func):
"""
Function decorator that provides a session if it isn't provided.
If you want to reuse a session or run the function as part of a
database transaction, you pass it to the function, if not this wrapper
will create one and close it for you.
"""
@wraps(func)
def wrapper(*args, **kwargs):
needs_session = False
if 'session' not in kwargs:
needs_session = True
session = settings.Session()
kwargs['session'] = session
result = func(*args, **kwargs)
if needs_session:
session.expunge_all()
session.commit()
session.close()
return result
return wrapper
def apply_defaults(func): def apply_defaults(func):
''' """
Function decorator that Looks for an argument named "default_args", and Function decorator that Looks for an argument named "default_args", and
fills the unspecified arguments from it. fills the unspecified arguments from it.
@ -224,7 +255,7 @@ def apply_defaults(func):
calling a function, and that this can be quite confusing with multi-level calling a function, and that this can be quite confusing with multi-level
inheritance and argument defaults, this decorator also alerts with inheritance and argument defaults, this decorator also alerts with
specific information about the missing arguments. specific information about the missing arguments.
''' """
@wraps(func) @wraps(func)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
if len(args) > 1: if len(args) > 1:

Просмотреть файл

@ -1509,6 +1509,7 @@ class ConnectionModelView(SuperUserMixin, ModelView):
('presto', 'Presto',), ('presto', 'Presto',),
('s3', 'S3',), ('s3', 'S3',),
('samba', 'Samba',), ('samba', 'Samba',),
('sqlite', 'Sqlite',),
] ]
} }
mv = ConnectionModelView( mv = ConnectionModelView(
@ -1719,7 +1720,13 @@ mv = VariableView(
admin.add_view(mv) admin.add_view(mv)
def pool_link(v, c, m, p):
url = '/admin/taskinstance/?flt1_pool_equals=' + m.pool
return Markup("<a href='{url}'>{m.pool}</a>".format(**locals()))
class PoolModelView(SuperUserMixin, ModelView): class PoolModelView(SuperUserMixin, ModelView):
pass column_formatters = dict(pool=pool_link)
named_filter_urls = True
mv = PoolModelView(models.Pool, Session, name="Pools", category="Admin") mv = PoolModelView(models.Pool, Session, name="Pools", category="Admin")
admin.add_view(mv) admin.add_view(mv)

Просмотреть файл

@ -44,6 +44,9 @@ span.started{
span.error{ span.error{
background-color:red; background-color:red;
} }
span.queued{
background-color:gray;
}
.tooltip-inner { .tooltip-inner {
text-align:left !important; text-align:left !important;
font-size: 12px; font-size: 12px;