Runner - Add signal handler in runner (#530)
Add signal handler in runner to gracefully exit when receiving SIGINT (<kbd>Ctrl</kbd>+<kbd>C</kbd>) or SIGTERM during benchmark execution.
This commit is contained in:
Родитель
4c0d96e5d8
Коммит
a1cd3c9475
1
setup.py
1
setup.py
|
@ -198,6 +198,7 @@ setup(
|
|||
'types-pkg_resources',
|
||||
'types-pyyaml',
|
||||
'typing-extensions>=3.10',
|
||||
'urllib3<2.0',
|
||||
'vcrpy>=4.1.1',
|
||||
'yapf==0.31.0',
|
||||
],
|
||||
|
|
|
@ -59,11 +59,12 @@ class AnsibleClient():
|
|||
self._config['cmdline'] += ' --ask-pass --ask-become-pass'
|
||||
logger.info(self._config)
|
||||
|
||||
def run(self, ansible_config, sudo=False): # pragma: no cover
|
||||
def run(self, ansible_config, cancel_callback=None, sudo=False): # pragma: no cover
|
||||
"""Run Ansible runner.
|
||||
|
||||
Args:
|
||||
ansible_config (dict): Ansible config dict.
|
||||
cancel_callback (Callable): Ansible runner cancel callback.
|
||||
sudo (bool): Run as sudo or not. Defaults to False.
|
||||
|
||||
Returns:
|
||||
|
@ -73,7 +74,7 @@ class AnsibleClient():
|
|||
logger.info('Run as sudo ...')
|
||||
ansible_config['cmdline'] += ' --become'
|
||||
with tempfile.TemporaryDirectory(prefix='ansible') as tmpdir:
|
||||
r = ansible_runner.run(private_data_dir=tmpdir, **ansible_config)
|
||||
r = ansible_runner.run(private_data_dir=tmpdir, cancel_callback=cancel_callback, **ansible_config)
|
||||
logger.debug(r.stats)
|
||||
if r.rc == 0:
|
||||
logger.info('Run succeed, return code {}.'.format(r.rc))
|
||||
|
|
|
@ -4,8 +4,10 @@
|
|||
"""SuperBench Runner."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import random
|
||||
import signal
|
||||
from pathlib import Path
|
||||
from pprint import pformat
|
||||
from collections import defaultdict
|
||||
|
@ -233,6 +235,18 @@ class SuperBenchRunner():
|
|||
)
|
||||
)
|
||||
|
||||
def __signal_handler(self, signum, frame):
|
||||
"""Signal handler for runner.
|
||||
|
||||
Args:
|
||||
signum (int): Signal number.
|
||||
frame (FrameType): Timeout frame.
|
||||
"""
|
||||
if signum == signal.SIGINT or signum == signal.SIGTERM:
|
||||
logger.info('Killed by %s, exiting ...', signal.Signals(signum).name)
|
||||
self.cleanup()
|
||||
sys.exit(128 + signum)
|
||||
|
||||
def __create_results_summary(self): # pragma: no cover
|
||||
"""Create the result summary file of all nodes."""
|
||||
all_results = list()
|
||||
|
@ -438,12 +452,17 @@ class SuperBenchRunner():
|
|||
# we do not expect timeout in ansible unless subprocess hangs
|
||||
ansible_runner_config['timeout'] = timeout + 60
|
||||
|
||||
rc = self._ansible_client.run(ansible_runner_config, sudo=(not self._docker_config.skip))
|
||||
# overwrite ansible runner's default signal handler with main process's
|
||||
rc = self._ansible_client.run(
|
||||
ansible_runner_config, cancel_callback=lambda: None, sudo=(not self._docker_config.skip)
|
||||
)
|
||||
return rc
|
||||
|
||||
def run(self):
|
||||
"""Run the SuperBench benchmarks distributedly."""
|
||||
self.check_env()
|
||||
signal.signal(signal.SIGINT, self.__signal_handler)
|
||||
signal.signal(signal.SIGTERM, self.__signal_handler)
|
||||
for benchmark_name in self._sb_benchmarks:
|
||||
if benchmark_name not in self._sb_enabled_benchmarks:
|
||||
continue
|
||||
|
|
Загрузка…
Ссылка в новой задаче