зеркало из https://github.com/microsoft/lisa.git
Fix kdump test for Mariner (#3475)
* Add sufficient sized data-disk in kdump test * Move print_additional_info just before panic * Use execute_async and remove kill_on_timeout parameter * Fix linter errors by moving the check into a new internal function * Remove whitespace * Use black format
This commit is contained in:
Родитель
b80e5bcfc6
Коммит
717e7c400b
|
@ -1,6 +1,7 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import math
|
||||
import re
|
||||
from pathlib import PurePath, PurePosixPath
|
||||
from time import sleep
|
||||
|
@ -12,6 +13,8 @@ from lisa.base_tools import Cat, Sed, Service, Wget
|
|||
from lisa.executable import Tool
|
||||
from lisa.operating_system import CBLMariner, Debian, Oracle, Posix, Redhat, Suse
|
||||
from lisa.tools import Find, Gcc
|
||||
from lisa.tools.lsblk import Lsblk
|
||||
from lisa.tools.lscpu import Lscpu
|
||||
from lisa.tools.make import Make
|
||||
from lisa.tools.sysctl import Sysctl
|
||||
from lisa.tools.tar import Tar
|
||||
|
@ -451,6 +454,10 @@ class KdumpBase(Tool):
|
|||
# Check if memory is reserved for crash kernel
|
||||
self._check_crashkernel_memory_reserved()
|
||||
|
||||
def capture_info(self) -> None:
|
||||
# Override this method to print additional info before panic
|
||||
return
|
||||
|
||||
|
||||
class KdumpRedhat(KdumpBase):
|
||||
@property
|
||||
|
@ -597,15 +604,113 @@ class KdumpCBLMariner(KdumpBase):
|
|||
self.node.os.install_packages("kexec-tools")
|
||||
return self._check_exists()
|
||||
|
||||
def enable_kdump_service(self) -> None:
|
||||
"""
|
||||
This method enables the kdump service.
|
||||
"""
|
||||
kdump_conf = "/etc/kdump.conf"
|
||||
sed = self.node.tools[Sed]
|
||||
# Remove force_no_rebuild=1 if present
|
||||
sed.substitute(
|
||||
match_lines="^force_no_rebuild",
|
||||
regexp="force_no_rebuild",
|
||||
replacement="#force_no_rebuild",
|
||||
file=kdump_conf,
|
||||
sudo=True,
|
||||
)
|
||||
# Set mariner_2_initrd_use_suffix. Otherwise it will replace
|
||||
# the original initrd file which will cause a reboot-loop
|
||||
sed.substitute(
|
||||
match_lines="mariner_2_initrd_use_suffix",
|
||||
regexp="#mariner_2_initrd_use_suffix",
|
||||
replacement="mariner_2_initrd_use_suffix",
|
||||
file=kdump_conf,
|
||||
sudo=True,
|
||||
)
|
||||
|
||||
# Check for sufficient core numbers
|
||||
self.ensure_nr_cpus()
|
||||
|
||||
super().enable_kdump_service()
|
||||
|
||||
def ensure_nr_cpus(self) -> None:
|
||||
lscpu = self.node.tools[Lscpu]
|
||||
core_count = lscpu.get_core_count()
|
||||
preferred_nr_cpus = math.ceil(core_count / 56)
|
||||
conf_file = "/etc/sysconfig/kdump"
|
||||
sed = self.node.tools[Sed]
|
||||
# replace nr_cpus=<whatever> to nr_cpus=preferred_nr_cpus
|
||||
sed.substitute(
|
||||
match_lines="^KDUMP_COMMANDLINE_APPEND",
|
||||
regexp="nr_cpus=[^[:space:]]*",
|
||||
replacement=f"nr_cpus={preferred_nr_cpus}",
|
||||
file=conf_file,
|
||||
sudo=True,
|
||||
)
|
||||
|
||||
def calculate_crashkernel_size(self, total_memory: str) -> str:
|
||||
# For x86 and arm64 Mariner, the default setting is 256M
|
||||
return ""
|
||||
|
||||
def _get_crashkernel_cfg_file(self) -> str:
|
||||
return "/boot/mariner.cfg"
|
||||
if self.node.os.information.version.major >= 3:
|
||||
return "/etc/default/grub.d/51_kexec_tools.cfg"
|
||||
else:
|
||||
return "/boot/mariner.cfg"
|
||||
|
||||
def _get_crashkernel_cfg_cmdline(self) -> str:
|
||||
return "mariner_cmdline"
|
||||
|
||||
def _get_crashkernel_update_cmd(self, crashkernel: str) -> str:
|
||||
return ""
|
||||
|
||||
def config_resource_disk_dump_path(self, dump_path: str) -> None:
|
||||
"""
|
||||
If the system memory size is bigger than 1T, the default size of /var/crash
|
||||
may not be enough to store the dump file, need to change the dump path
|
||||
"""
|
||||
self.node.execute(
|
||||
f"mkdir -p {dump_path}",
|
||||
expected_exit_code=0,
|
||||
expected_exit_code_failure_message=(f"Fail to create dir {dump_path}"),
|
||||
shell=True,
|
||||
sudo=True,
|
||||
)
|
||||
self.dump_path = dump_path
|
||||
# Change dump path in kdump conf
|
||||
kdump_conf = "/etc/kdump.conf"
|
||||
sed = self.node.tools[Sed]
|
||||
sed.substitute(
|
||||
match_lines="^path",
|
||||
regexp="path",
|
||||
replacement="#path",
|
||||
file=kdump_conf,
|
||||
sudo=True,
|
||||
)
|
||||
sed.append(f"path {self.dump_path}", kdump_conf, sudo=True)
|
||||
|
||||
def capture_info(self) -> None:
|
||||
# print /proc/cmdline
|
||||
cat = self.node.tools[Cat]
|
||||
result = cat.run("/proc/cmdline", force_run=True, sudo=True)
|
||||
self._log.info(f"Current kernel command line: {result.stdout}")
|
||||
# print /etc/default/grub.d/51_kexec_tools.cfg
|
||||
result = cat.run(self._get_crashkernel_cfg_file(), force_run=True, sudo=True)
|
||||
self._log.info(f"Current kernel cmdline in config file: {result.stdout}")
|
||||
# print /etc/sysconfig/kdump
|
||||
result = cat.run("/etc/sysconfig/kdump", force_run=True, sudo=True)
|
||||
self._log.info(f"Current kdump configuration: {result.stdout}")
|
||||
# print /proc/sys/kernel/sysrq
|
||||
result = cat.run("/proc/sys/kernel/sysrq", force_run=True, sudo=True)
|
||||
self._log.info(f"Current sysrq value: {result.stdout}")
|
||||
# print lsblk -l output
|
||||
lsblk = self.node.tools[Lsblk]
|
||||
result = lsblk.run("-l", force_run=True)
|
||||
self._log.info(f"Current disk partitions: {result.stdout}")
|
||||
# print /etc/fstab
|
||||
result = cat.run("/etc/fstab", force_run=True, sudo=True)
|
||||
self._log.info(f"Current fstab: {result.stdout}")
|
||||
# print /etc/kdump.conf
|
||||
result = cat.run("/etc/kdump.conf", force_run=True, sudo=True)
|
||||
self._log.info(f"Current kdump configuration: {result.stdout}")
|
||||
return
|
||||
|
|
|
@ -268,6 +268,24 @@ class KdumpCrash(TestSuite):
|
|||
dump_path = mount_point + "/crash"
|
||||
return dump_path
|
||||
|
||||
def _is_system_with_more_memory(self, node: Node) -> bool:
|
||||
free = node.tools[Free]
|
||||
total_memory = free.get_total_memory()
|
||||
# Return true when system memory is 10 GiB higher than the OS disk size
|
||||
if "T" in total_memory or (
|
||||
"G" in total_memory
|
||||
and (
|
||||
node.capability.disk
|
||||
and isinstance(node.capability.disk.os_disk_size, int)
|
||||
and (
|
||||
float(total_memory.strip("G"))
|
||||
> (node.capability.disk.os_disk_size - 10)
|
||||
)
|
||||
)
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _kdump_test(self, node: Node, log_path: Path, log: Logger) -> None:
|
||||
try:
|
||||
self._check_supported(node)
|
||||
|
@ -281,14 +299,14 @@ class KdumpCrash(TestSuite):
|
|||
if self.is_auto:
|
||||
self.crash_kernel = "auto"
|
||||
|
||||
if "T" in total_memory and float(total_memory.strip("T")) > 1:
|
||||
# System memory is more than 1T, need to change the dump path
|
||||
if self._is_system_with_more_memory(node):
|
||||
# System memory is more os disk size, need to change the dump path
|
||||
# and increase the timeout duration
|
||||
kdump.config_resource_disk_dump_path(
|
||||
self._get_resource_disk_dump_path(node)
|
||||
)
|
||||
self.timeout_of_dump_crash = 1200
|
||||
if float(total_memory.strip("T")) > 6:
|
||||
if "T" in total_memory and float(total_memory.strip("T")) > 6:
|
||||
self.timeout_of_dump_crash = 2000
|
||||
|
||||
kdump.config_crashkernel_memory(self.crash_kernel)
|
||||
|
@ -310,14 +328,15 @@ class KdumpCrash(TestSuite):
|
|||
echo.write_to_file("1", node.get_pure_path("/proc/sys/kernel/sysrq"), sudo=True)
|
||||
node.execute("sync", shell=True, sudo=True)
|
||||
|
||||
kdump.capture_info()
|
||||
|
||||
try:
|
||||
# Trigger kdump. After execute the trigger cmd, the VM will be disconnected
|
||||
# We set a timeout time 10.
|
||||
node.execute(
|
||||
node.execute_async(
|
||||
self.trigger_kdump_cmd,
|
||||
shell=True,
|
||||
sudo=True,
|
||||
timeout=10,
|
||||
)
|
||||
except Exception as identifier:
|
||||
log.debug(f"ignorable ssh exception: {identifier}")
|
||||
|
|
Загрузка…
Ссылка в новой задаче