Output stack trace when failing shut down node cleanly (#5055)

This commit is contained in:
Takuro Sato 2023-02-27 08:50:22 +00:00 коммит произвёл GitHub
Родитель 1b57244ce6
Коммит 06888e47c1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 73 добавлений и 2 удалений

Просмотреть файл

@ -1 +1 @@
This looks like a "job" for Threading Canary!!!
This looks like a "job" for Threading Canary!!!

Просмотреть файл

@ -46,3 +46,6 @@
- import_role:
name: perf-tool
tasks_from: install.yml
- import_role:
name: lldb
tasks_from: install.yml

Просмотреть файл

@ -0,0 +1,8 @@
- name: Include vars
include_vars: common.yml
- name: Install debs
apt:
name: "lldb"
update_cache: yes
become: yes

Просмотреть файл

@ -0,0 +1 @@
workspace: "/tmp/"

Просмотреть файл

@ -517,6 +517,45 @@ class LocalRemote(CmdMixin):
ignore_error_patterns=ignore_error_patterns,
)
def _print_stack_trace(self):
if shutil.which("lldb") != "":
# To avoid errors on decoding lldb output as utf-8.
# We shoud find a way to force lldb to use utf-8.
errors = "ignore"
lldb_timeout = 20
try:
command = [
"lldb",
"--one-line",
f"process attach --pid {self.proc.pid}",
"--one-line",
"thread backtrace all",
"--one-line",
"quit",
]
if os.geteuid() != 0:
# Add sudo if not root
command.insert(0, "sudo")
completed_lldb_process = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
errors=errors,
text=True,
timeout=lldb_timeout,
check=True,
)
LOG.info(f"stack trace: {completed_lldb_process.stdout}")
except subprocess.TimeoutExpired:
LOG.info(
"Failed to get stack trace. lldb did not finish within {lldb_timeout} seconds."
)
except Exception as e:
LOG.info(f"Failed to get stack trace: {e}")
else:
LOG.info("Couldn't find lldb installed")
def stop(self, ignore_error_patterns=None):
"""
Disconnect the client, and therefore shut down the command as well.
@ -524,7 +563,16 @@ class LocalRemote(CmdMixin):
LOG.info("[{}] closing".format(self.hostname))
if self.proc:
self.proc.terminate()
self.proc.wait(10)
timeout = 10
try:
self.proc.wait(timeout)
except subprocess.TimeoutExpired:
LOG.exception(
f"Process didn't finish within {timeout} seconds. Tyring to get stack trace..."
)
self._print_stack_trace()
raise
exit_code = self.proc.returncode
if exit_code is not None and exit_code < 0:
signal_str = signal.strsignal(-exit_code)

Просмотреть файл

@ -17,6 +17,17 @@ pip install -q -U -r ../tests/requirements.txt
pip install -q -U -r ../tests/perf-system/requirements.txt
echo "Python environment successfully setup"
# We can delete it when
# lldb is included in the CI images
if ! command -v lldb; then
SUDO=""
if [ "$EUID" != 0 ]; then
SUDO="sudo"
fi
$SUDO apt update
$SUDO apt install -y lldb
fi
# Export where the VENV has been set, so tests running
# a sandbox.sh can inherit it rather create a new one
VENV_DIR=$(realpath env)