Perf_kit was a separate folder and it was a problem when we tried to
build it from Docker-embedded sources, because there was a hidden,
implicit dependency between tests (conftest) and perf.

Perf_kit is now moved to tests to be avaiilable in the CI image
also when we run tests without the sources mounted.
This is changing back in #10441 and we need to move perf_kit
for it to work.
This commit is contained in:
Jarek Potiuk 2020-08-22 21:53:07 +02:00 коммит произвёл GitHub
Родитель ee7ca128a1
Коммит 7ee7d7cf3f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
16 изменённых файлов: 112 добавлений и 53 удалений

Просмотреть файл

@ -50,7 +50,6 @@ services:
- ../../../pylintrc:/opt/airflow/pylintrc:cached - ../../../pylintrc:/opt/airflow/pylintrc:cached
- ../../../pytest.ini:/opt/airflow/pytest.ini:cached - ../../../pytest.ini:/opt/airflow/pytest.ini:cached
- ../../../scripts:/opt/airflow/scripts:cached - ../../../scripts:/opt/airflow/scripts:cached
- ../../../scripts/perf:/opt/airflow/scripts/perf:cached
- ../../../scripts/in_container/entrypoint_ci.sh:/entrypoint:cached - ../../../scripts/in_container/entrypoint_ci.sh:/entrypoint:cached
- ../../../setup.cfg:/opt/airflow/setup.cfg:cached - ../../../setup.cfg:/opt/airflow/setup.cfg:cached
- ../../../setup.py:/opt/airflow/setup.py:cached - ../../../setup.py:/opt/airflow/setup.py:cached

Просмотреть файл

@ -46,7 +46,6 @@ function generate_local_mounts_list {
"$prefix"pylintrc:/opt/airflow/pylintrc:cached "$prefix"pylintrc:/opt/airflow/pylintrc:cached
"$prefix"pytest.ini:/opt/airflow/pytest.ini:cached "$prefix"pytest.ini:/opt/airflow/pytest.ini:cached
"$prefix"scripts:/opt/airflow/scripts:cached "$prefix"scripts:/opt/airflow/scripts:cached
"$prefix"scripts/perf:/opt/airflow/scripts/perf:cached
"$prefix"scripts/in_container/entrypoint_ci.sh:/entrypoint:cached "$prefix"scripts/in_container/entrypoint_ci.sh:/entrypoint:cached
"$prefix"setup.cfg:/opt/airflow/setup.cfg:cached "$prefix"setup.cfg:/opt/airflow/setup.cfg:cached
"$prefix"setup.py:/opt/airflow/setup.py:cached "$prefix"setup.py:/opt/airflow/setup.py:cached

Просмотреть файл

@ -33,12 +33,7 @@ os.environ["AIRFLOW__CORE__UNIT_TEST_MODE"] = "True"
os.environ["AWS_DEFAULT_REGION"] = (os.environ.get("AWS_DEFAULT_REGION") or "us-east-1") os.environ["AWS_DEFAULT_REGION"] = (os.environ.get("AWS_DEFAULT_REGION") or "us-east-1")
os.environ["CREDENTIALS_DIR"] = (os.environ.get('CREDENTIALS_DIR') or "/files/airflow-breeze-config/keys") os.environ["CREDENTIALS_DIR"] = (os.environ.get('CREDENTIALS_DIR') or "/files/airflow-breeze-config/keys")
perf_directory = os.path.abspath(os.path.join(tests_directory, os.pardir, 'scripts', 'perf')) from tests.utils.perf.perf_kit.sqlalchemy import ( # noqa isort:skip # pylint: disable=wrong-import-position
if perf_directory not in sys.path:
sys.path.append(perf_directory)
from perf_kit.sqlalchemy import ( # noqa: E402 isort:skip # pylint: disable=wrong-import-position
count_queries, trace_queries count_queries, trace_queries
) )

Просмотреть файл

@ -63,7 +63,7 @@ from tests.test_utils.mock_executor import MockExecutor
ROOT_FOLDER = os.path.realpath( ROOT_FOLDER = os.path.realpath(
os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir)
) )
PERF_DAGS_FOLDER = os.path.join(ROOT_FOLDER, "scripts", "perf", "dags") PERF_DAGS_FOLDER = os.path.join(ROOT_FOLDER, "tests", "utils", "perf", "dags")
ELASTIC_DAG_FILE = os.path.join(PERF_DAGS_FOLDER, "elastic_dag.py") ELASTIC_DAG_FILE = os.path.join(PERF_DAGS_FOLDER, "elastic_dag.py")
TEST_DAG_FOLDER = os.environ['AIRFLOW__CORE__DAGS_FOLDER'] TEST_DAG_FOLDER = os.environ['AIRFLOW__CORE__DAGS_FOLDER']

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -32,21 +32,21 @@ Content
======= =======
The following decorators and context managers are included. The following decorators and context managers are included.
.. autofunction:: perf_kit.memory.trace_memory .. autofunction:: tests.utils.perf.perf_kit.memory.trace_memory
.. autofunction:: perf_kit.python.pyspy .. autofunction:: tests.utils.perf.perf_kit.python.pyspy
.. autofunction:: perf_kit.python.profiled .. autofunction:: tests.utils.perf.perf_kit.python.profiled
.. autofunction:: perf_kit.repeat_and_time.timing .. autofunction:: tests.utils.perf.perf_kit.repeat_and_time.timing
.. autofunction:: perf_kit.repeat_and_time.repeat .. autofunction:: tests.utils.perf.perf_kit.repeat_and_time.repeat
.. autofunction:: perf_kit.repeat_and_time.timeout .. autofunction:: tests.utils.perf.perf_kit.repeat_and_time.timeout
.. autofunction:: perf_kit.sqlalchemy.trace_queries .. autofunction:: tests.utils.perf.perf_kit.sqlalchemy.trace_queries
.. autofunction:: perf_kit.sqlalchemy.count_queries .. autofunction:: tests.utils.perf.perf_kit.sqlalchemy.count_queries
Documentation for each function is provided in the function docstrings. Each module also has an example in Documentation for each function is provided in the function docstrings. Each module also has an example in
the main section of the module. the main section of the module.
@ -54,11 +54,12 @@ the main section of the module.
Examples Examples
======== ========
If you want to run an all example for ``perf_kit.sqlalchemy``, you can run the following command. If you want to run an all example for ``tests.utils.perf.perf_kit.sqlalchemy``, you can run the
following command.
.. code-block:: bash .. code-block:: bash
python -m perf_kit.sqlalchemy python -m tests.utils.perf_kit.sqlalchemy
If you want to know how to use these functions, it is worth to familiarize yourself with these examples. If you want to know how to use these functions, it is worth to familiarize yourself with these examples.
@ -98,7 +99,7 @@ queries in it.
self.assertEqual(prev_local.isoformat(), "2018-03-24T03:00:00+01:00") self.assertEqual(prev_local.isoformat(), "2018-03-24T03:00:00+01:00")
self.assertEqual(prev.isoformat(), "2018-03-24T02:00:00+00:00") self.assertEqual(prev.isoformat(), "2018-03-24T02:00:00+00:00")
from perf_kit.sqlalchemy import trace_queries from tests.utils.perf.perf_kit.sqlalchemy import trace_queries
@trace_queries @trace_queries
def test_bulk_sync_to_db(self): def test_bulk_sync_to_db(self):

Просмотреть файл

@ -35,6 +35,7 @@ def _human_readable_size(size, decimal_places=3):
class TraceMemoryResult: class TraceMemoryResult:
"""Trace results of memory,"""
def __init__(self): def __init__(self):
self.before = 0 self.before = 0
self.after = 0 self.after = 0

Просмотреть файл

@ -45,7 +45,7 @@ def pyspy():
cap_add: cap_add:
- SYS_PTRACE - SYS_PTRACE
In the case of Airflow Breeze, you should modify the ``scripts/perf/perf_kit/python.py`` file. In the case of Airflow Breeze, you should modify the ``tests/utils/perf/perf_kit/python.py`` file.
""" """
pid = str(os.getpid()) pid = str(os.getpid())
suffix = datetime.datetime.now().isoformat() suffix = datetime.datetime.now().isoformat()
@ -66,24 +66,28 @@ def profiled(print_callers=False):
This decorator provide deterministic profiling. It uses ``cProfile`` internally. It generates statistic This decorator provide deterministic profiling. It uses ``cProfile`` internally. It generates statistic
and print on the screen. and print on the screen.
""" """
pr = cProfile.Profile() profile = cProfile.Profile()
pr.enable() profile.enable()
try: try:
yield yield
finally: finally:
pr.disable() profile.disable()
s = io.StringIO() stat = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats("cumulative") pstatistics = pstats.Stats(profile, stream=stat).sort_stats("cumulative")
if print_callers: if print_callers:
ps.print_callers() pstatistics.print_callers()
else: else:
ps.print_stats() pstatistics.print_stats()
print(s.getvalue()) print(stat.getvalue())
if __name__ == "__main__": if __name__ == "__main__":
def case(): def case():
"""
Load modules.
:return:
"""
import logging import logging
import airflow import airflow

Просмотреть файл

@ -23,6 +23,7 @@ import time
class TimingResult: class TimingResult:
"""Timing result."""
def __init__(self): def __init__(self):
self.start_time = 0 self.start_time = 0
self.end_time = 0 self.end_time = 0
@ -65,7 +66,7 @@ def repeat(repeat_count=5):
@functools.wraps(f) @functools.wraps(f)
def wrap(*args, **kwargs): def wrap(*args, **kwargs):
last_result = None last_result = None
for i in range(repeat_count): for _ in range(repeat_count):
last_result = f(*args, **kwargs) last_result = f(*args, **kwargs)
return last_result return last_result
@ -75,7 +76,7 @@ def repeat(repeat_count=5):
class TimeoutException(Exception): class TimeoutException(Exception):
pass """Exception when the test timeo uts"""
@contextlib.contextmanager @contextlib.contextmanager
@ -109,13 +110,13 @@ def timeout(seconds=1):
if __name__ == "__main__": if __name__ == "__main__":
def monte_carlo(total=10000): def monte_carlo(total=10000):
# Monte Carlo """Monte Carlo"""
inside = 0 inside = 0
for i in range(0, total): for _ in range(0, total):
x2 = random.random() ** 2 x_val = random.random() ** 2
y2 = random.random() ** 2 y_val = random.random() ** 2
if math.sqrt(x2 + y2) < 1.0: if math.sqrt(x_val + y_val) < 1.0:
inside += 1 inside += 1
return (float(inside) / total) * 4 return (float(inside) / total) * 4
@ -134,15 +135,16 @@ if __name__ == "__main__":
@timing(REPEAT_COUNT) @timing(REPEAT_COUNT)
@repeat(REPEAT_COUNT) @repeat(REPEAT_COUNT)
@timing() @timing()
def pi(): def get_pi():
"""Returns PI value:"""
return monte_carlo() return monte_carlo()
result = pi() res = get_pi()
print("PI: ", result) print("PI: ", res)
print() print()
# Example 3: # Example 3:
with timing(): with timing():
result = monte_carlo() res = monte_carlo()
print("PI: ", result) print("PI: ", res)

Просмотреть файл

@ -32,6 +32,7 @@ def _pretty_format_sql(text: str):
return text return text
# noinspection PyUnusedLocal
class TraceQueries: class TraceQueries:
""" """
Tracking SQL queries in a code block. Tracking SQL queries in a code block.
@ -61,11 +62,46 @@ class TraceQueries:
self.print_fn = print_fn self.print_fn = print_fn
self.query_count = 0 self.query_count = 0
def before_cursor_execute(self, conn, cursor, statement, parameters, context, executemany): def before_cursor_execute(self,
conn,
cursor, # pylint: disable=unused-argument
statement, # pylint: disable=unused-argument
parameters, # pylint: disable=unused-argument
context, # pylint: disable=unused-argument
executemany): # pylint: disable=unused-argument
"""
Executed before cursor.
:param conn: connection
:param cursor: cursor
:param statement: statement
:param parameters: parameters
:param context: context
:param executemany: whether many statements executed
:return:
"""
conn.info.setdefault("query_start_time", []).append(time.monotonic()) conn.info.setdefault("query_start_time", []).append(time.monotonic())
self.query_count += 1 self.query_count += 1
def after_cursor_execute(self, conn, cursor, statement, parameters, context, executemany): def after_cursor_execute(self,
conn,
cursor, # pylint: disable=unused-argument
statement,
parameters,
context, # pylint: disable=unused-argument
executemany): # pylint: disable=unused-argument
"""
Executed after cursor.
:param conn: connection
:param cursor: cursor
:param statement: statement
:param parameters: parameters
:param context: context
:param executemany: whether many statements executed
:return:
"""
total = time.monotonic() - conn.info["query_start_time"].pop() total = time.monotonic() - conn.info["query_start_time"].pop()
file_names = [ file_names = [
f"{f.filename}:{f.name}:{f.lineno}" f"{f.filename}:{f.name}:{f.lineno}"
@ -102,7 +138,8 @@ class TraceQueries:
event.listen(airflow.settings.engine, "before_cursor_execute", self.before_cursor_execute) event.listen(airflow.settings.engine, "before_cursor_execute", self.before_cursor_execute)
event.listen(airflow.settings.engine, "after_cursor_execute", self.after_cursor_execute) event.listen(airflow.settings.engine, "after_cursor_execute", self.after_cursor_execute)
def __exit__(self, type_, value, traceback): # noinspection PyShadowingNames
def __exit__(self, type_, value, traceback): # pylint: disable=redefined-outer-name
import airflow.settings import airflow.settings
event.remove(airflow.settings.engine, "before_cursor_execute", self.before_cursor_execute) event.remove(airflow.settings.engine, "before_cursor_execute", self.before_cursor_execute)
event.remove(airflow.settings.engine, "after_cursor_execute", self.after_cursor_execute) event.remove(airflow.settings.engine, "after_cursor_execute", self.after_cursor_execute)
@ -112,6 +149,9 @@ trace_queries = TraceQueries # pylint: disable=invalid-name
class CountQueriesResult: class CountQueriesResult:
"""
Counter for number of queries.
"""
def __init__(self): def __init__(self):
self.count = 0 self.count = 0
@ -136,13 +176,30 @@ class CountQueries:
event.listen(airflow.settings.engine, "after_cursor_execute", self.after_cursor_execute) event.listen(airflow.settings.engine, "after_cursor_execute", self.after_cursor_execute)
return self.result return self.result
def __exit__(self, type_, value, traceback): # noinspection PyShadowingNames
def __exit__(self, type_, value, traceback): # pylint: disable=redefined-outer-name
import airflow.settings import airflow.settings
event.remove(airflow.settings.engine, "after_cursor_execute", self.after_cursor_execute) event.remove(airflow.settings.engine, "after_cursor_execute", self.after_cursor_execute)
self.print_fn(f"Count SQL queries: {self.result.count}") self.print_fn(f"Count SQL queries: {self.result.count}")
def after_cursor_execute(self, *args, **kwargs): def after_cursor_execute(self,
conn, # pylint: disable=unused-argument
cursor, # pylint: disable=unused-argument
statement, # pylint: disable=unused-argument
parameters, # pylint: disable=unused-argument
context, # pylint: disable=unused-argument
executemany): # pylint: disable=unused-argument
"""
Executed after cursor.
:param conn: connection
:param cursor: cursor
:param statement: statement
:param parameters: parameters
:param context: context
:param executemany: whether many statements executed
"""
self.result.count += 1 self.result.count += 1
@ -152,6 +209,7 @@ if __name__ == "__main__":
# Example: # Example:
def case(): def case():
"Case of logging om/"
import logging import logging
from unittest import mock from unittest import mock

Просмотреть файл

@ -101,15 +101,15 @@ def get_executor_under_test(dotted_path):
if dotted_path == "MockExecutor": if dotted_path == "MockExecutor":
try: try:
# Run against master and 1.10.x releases # Run against master and 1.10.x releases
from tests.test_utils.mock_executor import MockExecutor as Executor from tests.test_utils.mock_executor import MockExecutor as executor
except ImportError: except ImportError:
from tests.executors.test_executor import TestExecutor as Executor from tests.executors.test_executor import TestExecutor as executor
else: else:
Executor = ExecutorLoader.load_executor(dotted_path) executor = ExecutorLoader.load_executor(dotted_path)
# Change this to try other executors # Change this to try other executors
class ShortCircuitExecutor(ShortCircuitExecutorMixin, Executor): class ShortCircuitExecutor(ShortCircuitExecutorMixin, executor):
""" """
Placeholder class that implements the inheritance hierarchy Placeholder class that implements the inheritance hierarchy
""" """
@ -153,16 +153,16 @@ def create_dag_runs(dag, num_runs, session):
try: try:
from airflow.utils.types import DagRunType from airflow.utils.types import DagRunType
ID_PREFIX = f'{DagRunType.SCHEDULED.value}__' id_prefix = f'{DagRunType.SCHEDULED.value}__'
except ImportError: except ImportError:
from airflow.models.dagrun import DagRun from airflow.models.dagrun import DagRun
ID_PREFIX = DagRun.ID_PREFIX id_prefix = DagRun.ID_PREFIX # pylint: disable=no-member
next_run_date = dag.normalize_schedule(dag.start_date or min(t.start_date for t in dag.tasks)) next_run_date = dag.normalize_schedule(dag.start_date or min(t.start_date for t in dag.tasks))
for _ in range(num_runs): for _ in range(num_runs):
dag.create_dagrun( dag.create_dagrun(
run_id=ID_PREFIX + next_run_date.isoformat(), run_id=id_prefix + next_run_date.isoformat(),
execution_date=next_run_date, execution_date=next_run_date,
start_date=timezone.utcnow(), start_date=timezone.utcnow(),
state=State.RUNNING, state=State.RUNNING,

Просмотреть файл