NVMe testcase fixes for ASAP VMs

This commit is contained in:
SrikanthMyakam 2024-09-15 19:51:45 +05:30
Родитель f17921bf29
Коммит be1313ba74
3 изменённых файлов: 180 добавлений и 86 удалений

Просмотреть файл

@ -16,6 +16,7 @@ from lisa.schema import FeatureSettings
from lisa.tools import Ls, Lspci, Nvmecli
from lisa.tools.lspci import PciDevice
from lisa.util import field_metadata, get_matched_str
from lisa.util.constants import DEVICE_TYPE_NVME
class Nvme(Feature):
@ -42,6 +43,9 @@ class Nvme(Feature):
# /dev/nvme0n1p15 -> /dev/nvme0n1
NVME_NAMESPACE_PATTERN = re.compile(r"/dev/nvme[0-9]+n[0-9]+", re.M)
# /dev/nvme0n1p15 -> /dev/nvme0n1
NVME_DEVICE_PATTERN = re.compile(r"/dev/nvme[0-9]+", re.M)
_pci_device_name = "Non-Volatile memory controller"
_ls_devices: str = ""
@ -63,6 +67,11 @@ class Nvme(Feature):
matched_result = self._device_pattern.match(row)
if matched_result:
devices_list.append(matched_result.group("device_name"))
node_disk = self._node.features[Disk]
if node_disk.get_os_disk_controller_type() == schema.DiskControllerType.NVME:
os_disk_nvme_device = self.get_os_disk_nvme_device()
# Removing OS disk/device from the list.
devices_list.remove(os_disk_nvme_device)
return devices_list
def get_namespaces(self) -> List[str]:
@ -78,7 +87,13 @@ class Nvme(Feature):
return namespaces
def get_namespaces_from_cli(self) -> List[str]:
return self._node.tools[Nvmecli].get_namespaces()
namespaces_list = self._node.tools[Nvmecli].get_namespaces()
node_disk = self._node.features[Disk]
if node_disk.get_os_disk_controller_type() == schema.DiskControllerType.NVME:
os_disk_nvme_namespace = self.get_os_disk_nvme_namespace()
# Removing OS disk/device from the list.
namespaces_list.remove(os_disk_nvme_namespace)
return namespaces_list
def get_os_disk_nvme_namespace(self) -> str:
node_disk = self._node.features[Disk]
@ -93,10 +108,22 @@ class Nvme(Feature):
)
return os_partition_namespace
def get_os_disk_nvme_device(self) -> str:
os_disk_nvme_namespace = self.get_os_disk_nvme_namespace()
# Sample os_boot_partition when disc controller type is NVMe:
# name: /dev/nvme0n1p15, disk: nvme, mount_point: /boot/efi, type: vfat
if os_disk_nvme_namespace:
os_disk_nvme_device = get_matched_str(
os_disk_nvme_namespace,
self.NVME_DEVICE_PATTERN,
)
return os_disk_nvme_device
def get_devices_from_lspci(self) -> List[PciDevice]:
devices_from_lspci = []
lspci_tool = self._node.tools[Lspci]
device_list = lspci_tool.get_devices()
device_list = lspci_tool.get_devices_by_type(DEVICE_TYPE_NVME, use_pci_ids=True)
devices_from_lspci = [
x for x in device_list if self._pci_device_name == x.device_class
]

Просмотреть файл

@ -163,13 +163,25 @@ class Lspci(Tool):
return self._check_exists()
def get_device_names_by_type(
self, device_type: str, force_run: bool = False
self, device_type: str, force_run: bool = False, use_pci_ids: bool = False
) -> List[str]:
if device_type.upper() not in DEVICE_TYPE_DICT.keys():
raise LisaException(f"pci_type '{device_type}' is not recognized.")
class_names = DEVICE_TYPE_DICT[device_type.upper()]
devices_list = self.get_devices(force_run)
devices_slots = [x.slot for x in devices_list if x.device_class in class_names]
devices_slots = []
if use_pci_ids:
for device in devices_list:
if (
device.controller_id in CONTROLLER_ID_DICT[device_type.upper()]
and device.vendor_id in VENDOR_ID_DICT[device_type.upper()]
and device.device_id in DEVICE_ID_DICT[device_type.upper()]
):
devices_slots.append(device.slot)
else:
devices_slots = [
x.slot for x in devices_list if x.device_class in class_names
]
return devices_slots
def get_devices_by_type(
@ -318,7 +330,7 @@ class LspciBSD(Lspci):
_disabled_devices: Set[str] = set()
def get_device_names_by_type(
self, device_type: str, force_run: bool = False
self, device_type: str, force_run: bool = False, use_pci_ids: bool = False
) -> List[str]:
output = self.node.execute("pciconf -l", sudo=True).stdout
if device_type.upper() not in self._DEVICE_DRIVER_MAPPING.keys():

Просмотреть файл

@ -11,26 +11,33 @@ from lisa import (
TestCaseMetadata,
TestSuite,
TestSuiteMetadata,
constants,
simple_requirement,
)
from lisa.features import Nvme, NvmeSettings, Sriov
from lisa.sut_orchestrator.azure.platform_ import AzurePlatform
from lisa.tools import Cat, Echo, Fdisk, Lscpu, Lspci, Mount, Nvmecli
from lisa.tools import Cat, Df, Echo, Fdisk, Lscpu, Lspci, Mkfs, Mount, Nvmecli
from lisa.tools.fdisk import FileSystem
from lisa.util.constants import DEVICE_TYPE_NVME, DEVICE_TYPE_SRIOV
def _format_mount_disk(
node: Node,
namespace: str,
file_system: FileSystem,
use_partitions: bool = True,
) -> None:
mount_point = namespace.rpartition("/")[-1]
fdisk = node.tools[Fdisk]
mount = node.tools[Mount]
mount.umount(namespace, mount_point)
fdisk.make_partition(namespace, file_system)
mount.mount(f"{namespace}p1", mount_point)
fdisk.delete_partitions(namespace)
if use_partitions:
fdisk.make_partition(namespace, file_system)
mount.mount(f"{namespace}p1", mount_point)
else:
format_disk = node.tools[Mkfs]
format_disk.mkfs(f"{namespace}", file_system)
mount.mount(f"{namespace}", mount_point)
@TestSuiteMetadata(
@ -97,75 +104,28 @@ class NvmeTestSuite(TestSuite):
),
)
def verify_nvme_function(self, node: Node) -> None:
nvme = node.features[Nvme]
nvme_namespaces = nvme.get_raw_nvme_disks()
nvme_cli = node.tools[Nvmecli]
cat = node.tools[Cat]
mount = node.tools[Mount]
for namespace in nvme_namespaces:
# 1. Get the number of errors from nvme-cli before operations.
error_count_before_operations = nvme_cli.get_error_count(namespace)
self._verify_nvme_function(node)
# 2. Create a partition, filesystem and mount it.
_format_mount_disk(node, namespace, FileSystem.ext4)
# 3. Create a txt file on the partition, content is 'TestContent'.
mount_point = namespace.rpartition("/")[-1]
cmd_result = node.execute(
f"echo TestContent > {mount_point}/testfile.txt", shell=True, sudo=True
)
cmd_result.assert_exit_code(
message=f"{mount_point}/testfile.txt may not exist."
)
# 4. Create a file 'data' on the partition, get the md5sum value.
cmd_result = node.execute(
f"dd if=/dev/zero of={mount_point}/data bs=10M count=100",
shell=True,
sudo=True,
)
cmd_result.assert_exit_code(
message=f"{mount_point}/data is not created successfully, "
"please check the disk space."
)
initial_md5 = node.execute(
f"md5sum {mount_point}/data", shell=True, sudo=True
)
initial_md5.assert_exit_code(
message=f"{mount_point}/data not exist or md5sum command enounter"
" unexpected error."
)
# 5. Umount and remount the partition.
mount.umount(namespace, mount_point, erase=False)
mount.mount(f"{namespace}p1", mount_point)
# 6. Get the txt file content, compare the value.
file_content = cat.run(f"{mount_point}/testfile.txt", shell=True, sudo=True)
assert_that(
file_content.stdout,
f"content of {mount_point}/testfile.txt should keep consistent "
"after umount and re-mount.",
).is_equal_to("TestContent")
# 6. Get md5sum value of file 'data', compare with initial value.
final_md5 = node.execute(
f"md5sum {mount_point}/data", shell=True, sudo=True
)
assert_that(
initial_md5.stdout,
f"md5sum of {mount_point}/data should keep consistent "
"after umount and re-mount.",
).is_equal_to(final_md5.stdout)
# 7. Compare the number of errors from nvme-cli after operations.
error_count_after_operations = nvme_cli.get_error_count(namespace)
assert_that(
error_count_before_operations,
"error-log should not increase after operations.",
).is_equal_to(error_count_after_operations)
mount.umount(disk_name=namespace, point=mount_point)
@TestCaseMetadata(
description="""
The test case is same as `verify_nvme_function`, except it uses
unpartitioned disks.
This test case will do following things for each NVMe device.
1. Get the number of errors from nvme-cli before operations.
2. Create filesystem and mount it.
3. Create a txt file on the partition, content is 'TestContent'.
4. Create a file 'data' on the partition, get the md5sum value.
5. Umount and remount the partition.
6. Get the txt file content, compare the value.
7. Compare the number of errors from nvme-cli after operations.
""",
priority=2,
requirement=simple_requirement(
supported_features=[Nvme],
),
)
def verify_nvme_function_unpartitioned(self, node: Node) -> None:
self._verify_nvme_function(node, use_partitions=False)
@TestCaseMetadata(
description="""
@ -187,6 +147,7 @@ class NvmeTestSuite(TestSuite):
nvme = node.features[Nvme]
nvme_namespaces = nvme.get_raw_nvme_disks()
mount = node.tools[Mount]
df = node.tools[Df]
for namespace in nvme_namespaces:
mount_point = namespace.rpartition("/")[-1]
@ -202,12 +163,16 @@ class NvmeTestSuite(TestSuite):
message=f"{mount_point} not exist or fstrim command enounter "
"unexpected error."
)
# 3. Create a 300 gb file 'data' using dd command in the partition.
# 80% of free space is used to create a file.
free_space_gb = int(df.get_filesystem_available_space(mount_point) * 0.8)
# limit the free space to 300GB to avoid long time operation.
free_space_gb = min(free_space_gb, 300)
# 3. Create a file 'data' using dd command in the partition.
cmd_result = node.execute(
f"dd if=/dev/zero of={mount_point}/data bs=1G count=300",
f"dd if=/dev/zero of={mount_point}/data bs=1G count={free_space_gb}",
shell=True,
sudo=True,
timeout=1200,
)
cmd_result.assert_exit_code(
message=f"{mount_point}/data is not created successfully, "
@ -350,7 +315,9 @@ class NvmeTestSuite(TestSuite):
description="""
This test case will
1. Disable NVME devices.
2. Enable NVME device.
2. Enable PCI devices.
3. Get NVMe devices slots.
4. Check NVMe devices are back after rescan.
""",
priority=2,
requirement=simple_requirement(
@ -360,9 +327,20 @@ class NvmeTestSuite(TestSuite):
def verify_nvme_rescind(self, node: Node) -> None:
lspci = node.tools[Lspci]
# 1. Disable NVME devices.
lspci.disable_devices_by_type(device_type=constants.DEVICE_TYPE_NVME)
# 2. Enable NVME device.
before_pci_count = lspci.disable_devices_by_type(
device_type=DEVICE_TYPE_NVME, use_pci_ids=True
)
# 2. Enable PCI devices.
lspci.enable_devices()
# 3. Get PCI devices slots.
after_devices_slots = lspci.get_device_names_by_type(
DEVICE_TYPE_NVME, True, True
)
# 4. Check PCI devices are back after rescan.
assert_that(
after_devices_slots,
"After rescan, the disabled NVMe PCI devices should be back.",
).is_length(before_pci_count)
@TestCaseMetadata(
description="""
@ -381,18 +359,20 @@ class NvmeTestSuite(TestSuite):
)
def verify_nvme_sriov_rescind(self, node: Node) -> None:
lspci = node.tools[Lspci]
device_types = [constants.DEVICE_TYPE_NVME, constants.DEVICE_TYPE_SRIOV]
device_types = [DEVICE_TYPE_NVME, DEVICE_TYPE_SRIOV]
for device_type in device_types:
# 1. Disable PCI devices.
before_pci_count = lspci.disable_devices_by_type(device_type)
before_pci_count = lspci.disable_devices_by_type(device_type, True)
# 2. Enable PCI devices.
lspci.enable_devices()
# 3. Get PCI devices slots.
after_devices_slots = lspci.get_device_names_by_type(device_type, True)
after_devices_slots = lspci.get_device_names_by_type(
device_type, True, True
)
# 4. Check PCI devices are back after rescan.
assert_that(
after_devices_slots,
"After rescan, the disabled PCI devices should be back.",
f"After rescan, the disabled {device_type} PCI devices should be back.",
).is_length(before_pci_count)
def _verify_nvme_disk(self, environment: Environment, node: Node) -> None:
@ -431,3 +411,78 @@ class NvmeTestSuite(TestSuite):
assert_that(nvme_namespace).described_as(
"nvme devices count should be equal to [vCPU/8]."
).is_length(expected_count)
def _verify_nvme_function(self, node: Node, use_partitions: bool = True) -> None:
# Verify the basic function of all NVMe disks
nvme = node.features[Nvme]
nvme_namespaces = nvme.get_raw_nvme_disks()
nvme_cli = node.tools[Nvmecli]
cat = node.tools[Cat]
mount = node.tools[Mount]
for namespace in nvme_namespaces:
# 1. Get the number of errors from nvme-cli before operations.
error_count_before_operations = nvme_cli.get_error_count(namespace)
# 2. Create a partition, filesystem and mount it.
_format_mount_disk(node, namespace, FileSystem.ext4, use_partitions)
# 3. Create a txt file on the partition, content is 'TestContent'.
mount_point = namespace.rpartition("/")[-1]
cmd_result = node.execute(
f"echo TestContent > {mount_point}/testfile.txt", shell=True, sudo=True
)
cmd_result.assert_exit_code(
message=f"{mount_point}/testfile.txt may not exist."
)
# 4. Create a file 'data' on the partition, get the md5sum value.
cmd_result = node.execute(
f"dd if=/dev/zero of={mount_point}/data bs=10M count=100",
shell=True,
sudo=True,
)
cmd_result.assert_exit_code(
message=f"{mount_point}/data is not created successfully, "
"please check the disk space."
)
initial_md5 = node.execute(
f"md5sum {mount_point}/data", shell=True, sudo=True
)
initial_md5.assert_exit_code(
message=f"{mount_point}/data not exist or md5sum command encountered"
" unexpected error."
)
# 5. Umount and remount the partition.
mount.umount(namespace, mount_point, erase=False)
if use_partitions:
mount.mount(f"{namespace}p1", mount_point)
else:
mount.mount(f"{namespace}", mount_point)
# 6. Get the txt file content, compare the value.
file_content = cat.run(f"{mount_point}/testfile.txt", shell=True, sudo=True)
assert_that(
file_content.stdout,
f"content of {mount_point}/testfile.txt should keep consistent "
"after umount and re-mount.",
).is_equal_to("TestContent")
# 6. Get md5sum value of file 'data', compare with initial value.
final_md5 = node.execute(
f"md5sum {mount_point}/data", shell=True, sudo=True
)
assert_that(
initial_md5.stdout,
f"md5sum of {mount_point}/data should keep consistent "
"after umount and re-mount.",
).is_equal_to(final_md5.stdout)
# 7. Compare the number of errors from nvme-cli after operations.
error_count_after_operations = nvme_cli.get_error_count(namespace)
assert_that(
error_count_before_operations,
"error-log should not increase after operations.",
).is_equal_to(error_count_after_operations)
mount.umount(disk_name=namespace, point=mount_point)