added retries for agent cgroups test (#3075)

* retries for agent cgroups test

* pylint warn

* addressed comment
This commit is contained in:
Nageswara Nandigam 2024-03-01 12:47:14 -08:00 коммит произвёл GitHub
Родитель dd6c465ade
Коммит cc94d46faa
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
2 изменённых файлов: 33 добавлений и 24 удалений

Просмотреть файл

@ -8,6 +8,7 @@ from azurelinuxagent.common.utils import shellutil
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION
from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.agent_log import AgentLog
from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry_if_false
BASE_CGROUP = '/sys/fs/cgroup' BASE_CGROUP = '/sys/fs/cgroup'
AGENT_CGROUP_NAME = 'WALinuxAgent' AGENT_CGROUP_NAME = 'WALinuxAgent'
@ -93,23 +94,27 @@ def verify_agent_cgroup_assigned_correctly():
This method checks agent is running and assigned to the correct cgroup using service status output This method checks agent is running and assigned to the correct cgroup using service status output
""" """
log.info("===== Verifying the daemon and the agent are assigned to the same correct cgroup using systemd") log.info("===== Verifying the daemon and the agent are assigned to the same correct cgroup using systemd")
service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()])
log.info("Agent service status output:\n%s", service_status)
is_active = False
is_cgroup_assigned = False
cgroup_mount_path = get_agent_cgroup_mount_path() cgroup_mount_path = get_agent_cgroup_mount_path()
is_active_pattern = re.compile(r".*Active:\s+active.*") service_status = ""
for line in service_status.splitlines(): def check_agent_service_cgroup():
if re.match(is_active_pattern, line): is_active = False
is_active = True is_cgroup_assigned = False
elif cgroup_mount_path in line: service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()])
is_cgroup_assigned = True log.info("Agent service status output:\n%s", service_status)
is_active_pattern = re.compile(r".*Active:\s+active.*")
if not is_active: for line in service_status.splitlines():
fail('walinuxagent service was not active/running. Service status:{0}'.format(service_status)) if re.match(is_active_pattern, line):
if not is_cgroup_assigned: is_active = True
fail('walinuxagent service was not assigned to the expected cgroup:{0}'.format(cgroup_mount_path)) elif cgroup_mount_path in line:
is_cgroup_assigned = True
return is_active and is_cgroup_assigned
# Test check can happen before correct cgroup assigned and relfected in service status. So, retrying the check for few times
if not retry_if_false(check_agent_service_cgroup):
fail('walinuxagent service was not assigned to the expected cgroup:{0}. Current agent status:{1}'.format(cgroup_mount_path, service_status))
log.info("Successfully verified the agent cgroup assigned correctly by systemd\n") log.info("Successfully verified the agent cgroup assigned correctly by systemd\n")

Просмотреть файл

@ -61,22 +61,26 @@ def verify_agent_cgroup_created_on_file_system():
""" """
log.info("===== Verifying the agent cgroup paths exist on file system") log.info("===== Verifying the agent cgroup paths exist on file system")
agent_cgroup_mount_path = get_agent_cgroup_mount_path() agent_cgroup_mount_path = get_agent_cgroup_mount_path()
all_agent_cgroup_controllers_path_exist = True log.info("expected agent cgroup mount path: %s", agent_cgroup_mount_path)
missing_agent_cgroup_controllers_path = [] missing_agent_cgroup_controllers_path = []
verified_agent_cgroup_controllers_path = [] verified_agent_cgroup_controllers_path = []
log.info("expected agent cgroup mount path: %s", agent_cgroup_mount_path) def is_agent_cgroup_controllers_path_exist():
all_controllers_path_exist = True
for controller in AGENT_CONTROLLERS: for controller in AGENT_CONTROLLERS:
agent_controller_path = os.path.join(BASE_CGROUP, controller, agent_cgroup_mount_path[1:]) agent_controller_path = os.path.join(BASE_CGROUP, controller, agent_cgroup_mount_path[1:])
if not os.path.exists(agent_controller_path): if not os.path.exists(agent_controller_path):
all_agent_cgroup_controllers_path_exist = False all_controllers_path_exist = False
missing_agent_cgroup_controllers_path.append(agent_controller_path) missing_agent_cgroup_controllers_path.append(agent_controller_path)
else: else:
verified_agent_cgroup_controllers_path.append(agent_controller_path) verified_agent_cgroup_controllers_path.append(agent_controller_path)
return all_controllers_path_exist
if not all_agent_cgroup_controllers_path_exist: # Test check can happen before agent setup cgroup configuration. So, retrying the check for few times
if not retry_if_false(is_agent_cgroup_controllers_path_exist):
fail("Agent's cgroup paths couldn't be found on file system. Missing agent cgroups path :{0}.\n Verified agent cgroups path:{1}".format(missing_agent_cgroup_controllers_path, verified_agent_cgroup_controllers_path)) fail("Agent's cgroup paths couldn't be found on file system. Missing agent cgroups path :{0}.\n Verified agent cgroups path:{1}".format(missing_agent_cgroup_controllers_path, verified_agent_cgroup_controllers_path))
log.info('Verified all agent cgroup paths are present.\n {0}'.format(verified_agent_cgroup_controllers_path)) log.info('Verified all agent cgroup paths are present.\n {0}'.format(verified_agent_cgroup_controllers_path))