This commit is contained in:
Maxime 2015-01-19 08:13:57 +00:00
Родитель 6af313cdac
Коммит 0790c1ce54
5 изменённых файлов: 49 добавлений и 3 удалений

Просмотреть файл

@ -5,5 +5,6 @@ from presto_check_operator import PrestoCheckOperator
from sensors import SqlSensor from sensors import SqlSensor
from sensors import ExternalTaskSensor from sensors import ExternalTaskSensor
from sensors import HivePartitionSensor from sensors import HivePartitionSensor
from sensors import HdfsSensor
from email_operator import EmailOperator from email_operator import EmailOperator
from dummy_operator import DummyOperator from dummy_operator import DummyOperator

Просмотреть файл

@ -11,6 +11,8 @@ from airflow.models import State
from airflow.models import TaskInstance from airflow.models import TaskInstance
from airflow.utils import apply_defaults from airflow.utils import apply_defaults
from snakebite.client import HAClient, Namenode
class BaseSensorOperator(BaseOperator): class BaseSensorOperator(BaseOperator):
@ -121,7 +123,7 @@ class ExternalTaskSensor(BaseSensorOperator):
class HivePartitionSensor(BaseSensorOperator): class HivePartitionSensor(BaseSensorOperator):
""" """
Waits for the apparation of a partition in Hive Waits for the apparition of a partition in Hive
""" """
template_fields = ('table', 'partition',) template_fields = ('table', 'partition',)
__mapper_args__ = { __mapper_args__ = {
@ -150,3 +152,43 @@ class HivePartitionSensor(BaseSensorOperator):
'partition {self.partition}'.format(**locals())) 'partition {self.partition}'.format(**locals()))
return self.hook.check_for_partition( return self.hook.check_for_partition(
self.schema, self.table, self.partition) self.schema, self.table, self.partition)
class HdfsSensor(BaseSensorOperator):
"""
Waits for a file or folder to land in HDFS
"""
template_fields = ('filepath',)
__mapper_args__ = {
'polymorphic_identity': 'HdfsSensor'
}
@apply_defaults
def __init__(
self,
filepath,
hdfs_conn_id='hdfs_default',
*args, **kwargs):
super(HdfsSensor, self).__init__(*args, **kwargs)
self.filepath = filepath
session = settings.Session()
db = session.query(DB).filter(DB.conn_id==hdfs_conn_id).first()
if not db:
raise Exception("conn_id doesn't exist in the repository")
self.host = db.host
self.port = db.port
NAMENODES = [Namenode(self.host, self.port)]
self.sb = HAClient(NAMENODES)
session.commit()
session.close()
def poke(self):
logging.getLogger("snakebite").setLevel(logging.WARNING)
logging.info(
'Poking for file {self.filepath} '.format(**locals()))
try:
files = [f for f in self.sb.ls([self.filepath])]
except:
return False
print([i for i in f])
return True

Просмотреть файл

@ -1061,12 +1061,13 @@ class ConnectionModelView(LoginMixin, ModelView):
column_list = ('conn_id', 'conn_type', 'host', 'port') column_list = ('conn_id', 'conn_type', 'host', 'port')
form_choices = { form_choices = {
'conn_type': [ 'conn_type': [
('ftp', 'FTP',),
('hdfs', 'HDFS',),
('hive', 'Hive',), ('hive', 'Hive',),
('presto', 'Presto',),
('mysql', 'MySQL',), ('mysql', 'MySQL',),
('oracle', 'Oracle',), ('oracle', 'Oracle',),
('presto', 'Presto',),
('samba', 'Samba',), ('samba', 'Samba',),
('ftp', 'FTP',),
] ]
} }
mv = ConnectionModelView( mv = ConnectionModelView(

Просмотреть файл

@ -18,6 +18,7 @@ pyhive
python-dateutil python-dateutil
requests requests
setproctitle setproctitle
snakebite
sphinx sphinx
sphinx_rtd_theme sphinx_rtd_theme
Sphinx-PyPI-upload Sphinx-PyPI-upload

Просмотреть файл

@ -21,6 +21,7 @@ setup(
'pandas', 'pandas',
'pygments', 'pyhive', 'pygments', 'pyhive',
'python-dateutil', 'requests', 'setproctitle', 'python-dateutil', 'requests', 'setproctitle',
'snakebite',
'sphinx', 'sphinx-rtd-theme', 'Sphinx-PyPI-upload', 'sphinx', 'sphinx-rtd-theme', 'Sphinx-PyPI-upload',
'sqlalchemy', 'thrift', 'tornado' 'sqlalchemy', 'thrift', 'tornado'
], ],