[AIRFLOW-4085] FileSensor now takes glob patterns for `filepath` (#5358)
This commit is contained in:
Родитель
ba9e521e71
Коммит
c9e2d04fde
|
@ -39,6 +39,9 @@ assists users migrating to a new version.
|
|||
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
||||
## Airflow Master
|
||||
|
||||
### Changes to FileSensor
|
||||
FileSensor is now takes a glob pattern, not just a filename. If the filename you are looking for has `*`, `?`, or `[` in it then you should replace these with `[*]`, `[?]`, and `[[]`.
|
||||
|
||||
### Change dag loading duration metric name
|
||||
Change DAG file loading duration metric from
|
||||
`dag.loading-duration.<dag_id>` to `dag.loading-duration.<dag_file>`. This is to
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#
|
||||
|
||||
import os
|
||||
import stat
|
||||
from glob import glob
|
||||
|
||||
from airflow.contrib.hooks.fs_hook import FSHook
|
||||
from airflow.sensors.base_sensor_operator import BaseSensorOperator
|
||||
|
@ -37,7 +37,7 @@ class FileSensor(BaseSensorOperator):
|
|||
connection id
|
||||
:type fs_conn_id: str
|
||||
:param filepath: File or folder name (relative to
|
||||
the base path set within the connection)
|
||||
the base path set within the connection), can be a glob.
|
||||
:type fs_conn_id: str
|
||||
"""
|
||||
template_fields = ('filepath',)
|
||||
|
@ -58,14 +58,12 @@ class FileSensor(BaseSensorOperator):
|
|||
basepath = hook.get_path()
|
||||
full_path = os.path.join(basepath, self.filepath)
|
||||
self.log.info('Poking for file %s', full_path)
|
||||
try:
|
||||
if stat.S_ISDIR(os.stat(full_path).st_mode):
|
||||
for _, _, files in os.walk(full_path):
|
||||
if len(files):
|
||||
return True
|
||||
else:
|
||||
# full_path was a file directly
|
||||
|
||||
for path in glob(full_path):
|
||||
if os.path.isfile(path):
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
for _, _, files in os.walk(full_path):
|
||||
if len(files) > 0:
|
||||
return True
|
||||
return False
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
import unittest
|
||||
import shutil
|
||||
import tempfile
|
||||
import os.path
|
||||
|
||||
from airflow import models, DAG
|
||||
from airflow.exceptions import AirflowSensorTimeout
|
||||
|
@ -78,6 +79,7 @@ class TestFileSensor(unittest.TestCase):
|
|||
fs_conn_id='fs_default',
|
||||
dag=self.dag,
|
||||
timeout=0,
|
||||
poke_interval=1
|
||||
)
|
||||
task._hook = self.hook
|
||||
try:
|
||||
|
@ -95,6 +97,7 @@ class TestFileSensor(unittest.TestCase):
|
|||
fs_conn_id='fs_default',
|
||||
dag=self.dag,
|
||||
timeout=0,
|
||||
poke_interval=1
|
||||
)
|
||||
task._hook = self.hook
|
||||
try:
|
||||
|
@ -134,6 +137,57 @@ class TestFileSensor(unittest.TestCase):
|
|||
task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
|
||||
ignore_ti_state=True)
|
||||
|
||||
def test_wildcard_file(self):
|
||||
suffix = '.txt'
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix) as tmp:
|
||||
fileglob = os.path.join(os.path.dirname(tmp.name), '*' + suffix)
|
||||
task = FileSensor(
|
||||
task_id='test',
|
||||
filepath=fileglob,
|
||||
fs_conn_id='fs_default',
|
||||
dag=self.dag,
|
||||
timeout=0,
|
||||
)
|
||||
task._hook = self.hook
|
||||
task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
|
||||
ignore_ti_state=True)
|
||||
|
||||
def test_subdirectory_not_empty(self):
|
||||
suffix = '.txt'
|
||||
dir_ = tempfile.mkdtemp()
|
||||
subdir = tempfile.mkdtemp(dir=dir_)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix, dir=subdir):
|
||||
task = FileSensor(
|
||||
task_id='test',
|
||||
filepath=dir_,
|
||||
fs_conn_id='fs_default',
|
||||
dag=self.dag,
|
||||
timeout=0,
|
||||
)
|
||||
task._hook = self.hook
|
||||
task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
|
||||
ignore_ti_state=True)
|
||||
shutil.rmtree(dir_)
|
||||
|
||||
def test_subdirectory_empty(self):
|
||||
dir_ = tempfile.mkdtemp()
|
||||
tempfile.mkdtemp(dir=dir_)
|
||||
task = FileSensor(
|
||||
task_id='test',
|
||||
filepath=dir_,
|
||||
fs_conn_id='fs_default',
|
||||
dag=self.dag,
|
||||
timeout=0,
|
||||
poke_interval=1
|
||||
)
|
||||
task._hook = self.hook
|
||||
|
||||
with self.assertRaises(AirflowSensorTimeout):
|
||||
task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
|
||||
ignore_ti_state=True)
|
||||
shutil.rmtree(dir_)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
Загрузка…
Ссылка в новой задаче