diff --git a/tests_e2e/tests/lib/cgroup_helpers.py b/tests_e2e/tests/lib/cgroup_helpers.py index 6da2865c2..5c552ef19 100644 --- a/tests_e2e/tests/lib/cgroup_helpers.py +++ b/tests_e2e/tests/lib/cgroup_helpers.py @@ -8,6 +8,7 @@ from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.logging import log +from tests_e2e.tests.lib.retry import retry_if_false BASE_CGROUP = '/sys/fs/cgroup' AGENT_CGROUP_NAME = 'WALinuxAgent' @@ -93,23 +94,27 @@ def verify_agent_cgroup_assigned_correctly(): This method checks agent is running and assigned to the correct cgroup using service status output """ log.info("===== Verifying the daemon and the agent are assigned to the same correct cgroup using systemd") - service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()]) - log.info("Agent service status output:\n%s", service_status) - is_active = False - is_cgroup_assigned = False cgroup_mount_path = get_agent_cgroup_mount_path() - is_active_pattern = re.compile(r".*Active:\s+active.*") + service_status = "" - for line in service_status.splitlines(): - if re.match(is_active_pattern, line): - is_active = True - elif cgroup_mount_path in line: - is_cgroup_assigned = True + def check_agent_service_cgroup(): + is_active = False + is_cgroup_assigned = False + service_status = shellutil.run_command(["systemctl", "status", systemd.get_agent_unit_name()]) + log.info("Agent service status output:\n%s", service_status) + is_active_pattern = re.compile(r".*Active:\s+active.*") - if not is_active: - fail('walinuxagent service was not active/running. Service status:{0}'.format(service_status)) - if not is_cgroup_assigned: - fail('walinuxagent service was not assigned to the expected cgroup:{0}'.format(cgroup_mount_path)) + for line in service_status.splitlines(): + if re.match(is_active_pattern, line): + is_active = True + elif cgroup_mount_path in line: + is_cgroup_assigned = True + + return is_active and is_cgroup_assigned + + # Test check can happen before correct cgroup assigned and relfected in service status. So, retrying the check for few times + if not retry_if_false(check_agent_service_cgroup): + fail('walinuxagent service was not assigned to the expected cgroup:{0}. Current agent status:{1}'.format(cgroup_mount_path, service_status)) log.info("Successfully verified the agent cgroup assigned correctly by systemd\n") diff --git a/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py b/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py index 064f30400..4f6444462 100755 --- a/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py +++ b/tests_e2e/tests/scripts/agent_cgroups-check_cgroups_agent.py @@ -61,22 +61,26 @@ def verify_agent_cgroup_created_on_file_system(): """ log.info("===== Verifying the agent cgroup paths exist on file system") agent_cgroup_mount_path = get_agent_cgroup_mount_path() - all_agent_cgroup_controllers_path_exist = True + log.info("expected agent cgroup mount path: %s", agent_cgroup_mount_path) + missing_agent_cgroup_controllers_path = [] verified_agent_cgroup_controllers_path = [] - log.info("expected agent cgroup mount path: %s", agent_cgroup_mount_path) + def is_agent_cgroup_controllers_path_exist(): + all_controllers_path_exist = True - for controller in AGENT_CONTROLLERS: - agent_controller_path = os.path.join(BASE_CGROUP, controller, agent_cgroup_mount_path[1:]) + for controller in AGENT_CONTROLLERS: + agent_controller_path = os.path.join(BASE_CGROUP, controller, agent_cgroup_mount_path[1:]) - if not os.path.exists(agent_controller_path): - all_agent_cgroup_controllers_path_exist = False - missing_agent_cgroup_controllers_path.append(agent_controller_path) - else: - verified_agent_cgroup_controllers_path.append(agent_controller_path) + if not os.path.exists(agent_controller_path): + all_controllers_path_exist = False + missing_agent_cgroup_controllers_path.append(agent_controller_path) + else: + verified_agent_cgroup_controllers_path.append(agent_controller_path) + return all_controllers_path_exist - if not all_agent_cgroup_controllers_path_exist: + # Test check can happen before agent setup cgroup configuration. So, retrying the check for few times + if not retry_if_false(is_agent_cgroup_controllers_path_exist): fail("Agent's cgroup paths couldn't be found on file system. Missing agent cgroups path :{0}.\n Verified agent cgroups path:{1}".format(missing_agent_cgroup_controllers_path, verified_agent_cgroup_controllers_path)) log.info('Verified all agent cgroup paths are present.\n {0}'.format(verified_agent_cgroup_controllers_path))