incubator-airflow/tests/lineage/test_lineage.py

114 строки
4.1 KiB
Python

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import unittest
from airflow.lineage import AUTO
from airflow.lineage.entities import File
from airflow.models import DAG, TaskInstance as TI
from airflow.operators.dummy import DummyOperator
from airflow.utils import timezone
DEFAULT_DATE = timezone.datetime(2016, 1, 1)
class TestLineage(unittest.TestCase):
def test_lineage(self):
dag = DAG(dag_id='test_prepare_lineage', start_date=DEFAULT_DATE)
f1s = "/tmp/does_not_exist_1-{}"
f2s = "/tmp/does_not_exist_2-{}"
f3s = "/tmp/does_not_exist_3"
file1 = File(f1s.format("{{ execution_date }}"))
file2 = File(f2s.format("{{ execution_date }}"))
file3 = File(f3s)
with dag:
op1 = DummyOperator(
task_id='leave1',
inlets=file1,
outlets=[
file2,
],
)
op2 = DummyOperator(task_id='leave2')
op3 = DummyOperator(task_id='upstream_level_1', inlets=AUTO, outlets=file3)
op4 = DummyOperator(task_id='upstream_level_2')
op5 = DummyOperator(task_id='upstream_level_3', inlets=["leave1", "upstream_level_1"])
op1.set_downstream(op3)
op2.set_downstream(op3)
op3.set_downstream(op4)
op4.set_downstream(op5)
dag.clear()
# execution_date is set in the context in order to avoid creating task instances
ctx1 = {"ti": TI(task=op1, execution_date=DEFAULT_DATE), "execution_date": DEFAULT_DATE}
ctx2 = {"ti": TI(task=op2, execution_date=DEFAULT_DATE), "execution_date": DEFAULT_DATE}
ctx3 = {"ti": TI(task=op3, execution_date=DEFAULT_DATE), "execution_date": DEFAULT_DATE}
ctx5 = {"ti": TI(task=op5, execution_date=DEFAULT_DATE), "execution_date": DEFAULT_DATE}
# prepare with manual inlets and outlets
op1.pre_execute(ctx1)
assert len(op1.inlets) == 1
assert op1.inlets[0].url == f1s.format(DEFAULT_DATE)
assert len(op1.outlets) == 1
assert op1.outlets[0].url == f2s.format(DEFAULT_DATE)
# post process with no backend
op1.post_execute(ctx1)
op2.pre_execute(ctx2)
assert len(op2.inlets) == 0
op2.post_execute(ctx2)
op3.pre_execute(ctx3)
assert len(op3.inlets) == 1
assert op3.inlets[0].url == f2s.format(DEFAULT_DATE)
assert op3.outlets[0] == file3
op3.post_execute(ctx3)
# skip 4
op5.pre_execute(ctx5)
assert len(op5.inlets) == 2
op5.post_execute(ctx5)
def test_lineage_render(self):
# tests inlets / outlets are rendered if they are added
# after initialization
dag = DAG(dag_id='test_lineage_render', start_date=DEFAULT_DATE)
with dag:
op1 = DummyOperator(task_id='task1')
f1s = "/tmp/does_not_exist_1-{}"
file1 = File(f1s.format("{{ execution_date }}"))
op1.inlets.append(file1)
op1.outlets.append(file1)
# execution_date is set in the context in order to avoid creating task instances
ctx1 = {"ti": TI(task=op1, execution_date=DEFAULT_DATE), "execution_date": DEFAULT_DATE}
op1.pre_execute(ctx1)
assert op1.inlets[0].url == f1s.format(DEFAULT_DATE)
assert op1.outlets[0].url == f1s.format(DEFAULT_DATE)