* Add sufficient sized data-disk in kdump test

* Move print_additional_info just before panic

* Use execute_async and remove kill_on_timeout parameter

* Fix linter errors by moving the check into a new internal function

* Remove whitespace

* Use black format
This commit is contained in:
Bala 2024-10-22 15:38:37 +05:30 коммит произвёл GitHub
Родитель b80e5bcfc6
Коммит 717e7c400b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
2 изменённых файлов: 130 добавлений и 6 удалений

Просмотреть файл

@ -1,6 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import math
import re
from pathlib import PurePath, PurePosixPath
from time import sleep
@ -12,6 +13,8 @@ from lisa.base_tools import Cat, Sed, Service, Wget
from lisa.executable import Tool
from lisa.operating_system import CBLMariner, Debian, Oracle, Posix, Redhat, Suse
from lisa.tools import Find, Gcc
from lisa.tools.lsblk import Lsblk
from lisa.tools.lscpu import Lscpu
from lisa.tools.make import Make
from lisa.tools.sysctl import Sysctl
from lisa.tools.tar import Tar
@ -451,6 +454,10 @@ class KdumpBase(Tool):
# Check if memory is reserved for crash kernel
self._check_crashkernel_memory_reserved()
def capture_info(self) -> None:
# Override this method to print additional info before panic
return
class KdumpRedhat(KdumpBase):
@property
@ -597,11 +604,58 @@ class KdumpCBLMariner(KdumpBase):
self.node.os.install_packages("kexec-tools")
return self._check_exists()
def enable_kdump_service(self) -> None:
"""
This method enables the kdump service.
"""
kdump_conf = "/etc/kdump.conf"
sed = self.node.tools[Sed]
# Remove force_no_rebuild=1 if present
sed.substitute(
match_lines="^force_no_rebuild",
regexp="force_no_rebuild",
replacement="#force_no_rebuild",
file=kdump_conf,
sudo=True,
)
# Set mariner_2_initrd_use_suffix. Otherwise it will replace
# the original initrd file which will cause a reboot-loop
sed.substitute(
match_lines="mariner_2_initrd_use_suffix",
regexp="#mariner_2_initrd_use_suffix",
replacement="mariner_2_initrd_use_suffix",
file=kdump_conf,
sudo=True,
)
# Check for sufficient core numbers
self.ensure_nr_cpus()
super().enable_kdump_service()
def ensure_nr_cpus(self) -> None:
lscpu = self.node.tools[Lscpu]
core_count = lscpu.get_core_count()
preferred_nr_cpus = math.ceil(core_count / 56)
conf_file = "/etc/sysconfig/kdump"
sed = self.node.tools[Sed]
# replace nr_cpus=<whatever> to nr_cpus=preferred_nr_cpus
sed.substitute(
match_lines="^KDUMP_COMMANDLINE_APPEND",
regexp="nr_cpus=[^[:space:]]*",
replacement=f"nr_cpus={preferred_nr_cpus}",
file=conf_file,
sudo=True,
)
def calculate_crashkernel_size(self, total_memory: str) -> str:
# For x86 and arm64 Mariner, the default setting is 256M
return ""
def _get_crashkernel_cfg_file(self) -> str:
if self.node.os.information.version.major >= 3:
return "/etc/default/grub.d/51_kexec_tools.cfg"
else:
return "/boot/mariner.cfg"
def _get_crashkernel_cfg_cmdline(self) -> str:
@ -609,3 +663,54 @@ class KdumpCBLMariner(KdumpBase):
def _get_crashkernel_update_cmd(self, crashkernel: str) -> str:
return ""
def config_resource_disk_dump_path(self, dump_path: str) -> None:
"""
If the system memory size is bigger than 1T, the default size of /var/crash
may not be enough to store the dump file, need to change the dump path
"""
self.node.execute(
f"mkdir -p {dump_path}",
expected_exit_code=0,
expected_exit_code_failure_message=(f"Fail to create dir {dump_path}"),
shell=True,
sudo=True,
)
self.dump_path = dump_path
# Change dump path in kdump conf
kdump_conf = "/etc/kdump.conf"
sed = self.node.tools[Sed]
sed.substitute(
match_lines="^path",
regexp="path",
replacement="#path",
file=kdump_conf,
sudo=True,
)
sed.append(f"path {self.dump_path}", kdump_conf, sudo=True)
def capture_info(self) -> None:
# print /proc/cmdline
cat = self.node.tools[Cat]
result = cat.run("/proc/cmdline", force_run=True, sudo=True)
self._log.info(f"Current kernel command line: {result.stdout}")
# print /etc/default/grub.d/51_kexec_tools.cfg
result = cat.run(self._get_crashkernel_cfg_file(), force_run=True, sudo=True)
self._log.info(f"Current kernel cmdline in config file: {result.stdout}")
# print /etc/sysconfig/kdump
result = cat.run("/etc/sysconfig/kdump", force_run=True, sudo=True)
self._log.info(f"Current kdump configuration: {result.stdout}")
# print /proc/sys/kernel/sysrq
result = cat.run("/proc/sys/kernel/sysrq", force_run=True, sudo=True)
self._log.info(f"Current sysrq value: {result.stdout}")
# print lsblk -l output
lsblk = self.node.tools[Lsblk]
result = lsblk.run("-l", force_run=True)
self._log.info(f"Current disk partitions: {result.stdout}")
# print /etc/fstab
result = cat.run("/etc/fstab", force_run=True, sudo=True)
self._log.info(f"Current fstab: {result.stdout}")
# print /etc/kdump.conf
result = cat.run("/etc/kdump.conf", force_run=True, sudo=True)
self._log.info(f"Current kdump configuration: {result.stdout}")
return

Просмотреть файл

@ -268,6 +268,24 @@ class KdumpCrash(TestSuite):
dump_path = mount_point + "/crash"
return dump_path
def _is_system_with_more_memory(self, node: Node) -> bool:
free = node.tools[Free]
total_memory = free.get_total_memory()
# Return true when system memory is 10 GiB higher than the OS disk size
if "T" in total_memory or (
"G" in total_memory
and (
node.capability.disk
and isinstance(node.capability.disk.os_disk_size, int)
and (
float(total_memory.strip("G"))
> (node.capability.disk.os_disk_size - 10)
)
)
):
return True
return False
def _kdump_test(self, node: Node, log_path: Path, log: Logger) -> None:
try:
self._check_supported(node)
@ -281,14 +299,14 @@ class KdumpCrash(TestSuite):
if self.is_auto:
self.crash_kernel = "auto"
if "T" in total_memory and float(total_memory.strip("T")) > 1:
# System memory is more than 1T, need to change the dump path
if self._is_system_with_more_memory(node):
# System memory is more os disk size, need to change the dump path
# and increase the timeout duration
kdump.config_resource_disk_dump_path(
self._get_resource_disk_dump_path(node)
)
self.timeout_of_dump_crash = 1200
if float(total_memory.strip("T")) > 6:
if "T" in total_memory and float(total_memory.strip("T")) > 6:
self.timeout_of_dump_crash = 2000
kdump.config_crashkernel_memory(self.crash_kernel)
@ -310,14 +328,15 @@ class KdumpCrash(TestSuite):
echo.write_to_file("1", node.get_pure_path("/proc/sys/kernel/sysrq"), sudo=True)
node.execute("sync", shell=True, sudo=True)
kdump.capture_info()
try:
# Trigger kdump. After execute the trigger cmd, the VM will be disconnected
# We set a timeout time 10.
node.execute(
node.execute_async(
self.trigger_kdump_cmd,
shell=True,
sudo=True,
timeout=10,
)
except Exception as identifier:
log.debug(f"ignorable ssh exception: {identifier}")