* refactor cgroup controllers (#13)

* Refactor Cgroup, CpuCgroup, MemoryCgroup to ControllerMetrics, CpuMetrics, MemoryMetrics

* Create methods to get unit/process cgroup representation

* Refactoring changes

* Refactoring changes

* Fix e2e test

* Fix unintentional comment change

* Remove unneeded comments

* Clean up comments and make code more readable

* Simplify get controller metrics

* Clean up cgroupapi

* Cleanup cgroup -> controllermetrics changes

* Clean up cgroup configurator

* Fix unit tests for agent.py

* Fix cgroupapi tests

* Fix cgroupconfigurator and tests

* Rename controller metrics tests

* Ignore pylint issues

* Improve test coverage for cgroupapi

* Rename cgroup to metrics

* Update cgroup.procs to accurately represent file

* Do not track metrics if controller is not mounted

* We should set cpu quota before tracking cpu metrics

* Pylint

* address pr comments (#14)

* Address Nag's comments

* pyling

* pylint

* remove lambda (#15)
This commit is contained in:
maddieford 2024-06-16 11:41:57 -07:00 коммит произвёл GitHub
Родитель cc6501d6dd
Коммит 610e12b3f1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
20 изменённых файлов: 983 добавлений и 663 удалений

Просмотреть файл

@ -31,7 +31,7 @@ import threading
from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga import logcollector, cgroupconfigurator from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR, CpuMetrics
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException
import azurelinuxagent.common.conf as conf import azurelinuxagent.common.conf as conf
@ -208,8 +208,7 @@ class Agent(object):
# Check the cgroups unit # Check the cgroups unit
log_collector_monitor = None log_collector_monitor = None
cpu_cgroup_path = None tracked_metrics = []
memory_cgroup_path = None
if CollectLogsHandler.is_enabled_monitor_cgroups_check(): if CollectLogsHandler.is_enabled_monitor_cgroups_check():
try: try:
cgroup_api = get_cgroup_api() cgroup_api = get_cgroup_api()
@ -220,40 +219,27 @@ class Agent(object):
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True) log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
cpu_cgroup_path, memory_cgroup_path = cgroup_api.get_process_cgroup_paths("self") log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
cpu_slice_matches = False tracked_metrics = log_collector_cgroup.get_controller_metrics()
memory_slice_matches = False
if cpu_cgroup_path is not None:
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
if memory_cgroup_path is not None:
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)
if not cpu_slice_matches or not memory_slice_matches:
log_cgroup_warning("The Log Collector process is not in the proper cgroups:", send_event=False)
if not cpu_slice_matches:
log_cgroup_warning("\tunexpected cpu slice: {0}".format(cpu_cgroup_path), send_event=False)
if not memory_slice_matches:
log_cgroup_warning("\tunexpected memory slice: {0}".format(memory_cgroup_path), send_event=False)
if len(tracked_metrics) != len(log_collector_cgroup.get_supported_controllers()):
log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controllers()))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path): if not log_collector_cgroup.check_in_expected_slice(cgroupconfigurator.LOGCOLLECTOR_SLICE):
cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path) log_cgroup_warning("The Log Collector process is not in the proper cgroups", send_event=False)
msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup) sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
logger.info(msg)
cpu_cgroup.initialize_cpu_usage()
memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path)
msg = "Started tracking memory cgroup {0}".format(memory_cgroup)
logger.info(msg)
return [cpu_cgroup, memory_cgroup]
try: try:
log_collector = LogCollector(is_full_mode) log_collector = LogCollector(is_full_mode)
# Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector. # Running log collector resource monitoring only if agent starts the log collector.
# If Log collector start by any other means, then it will not be monitored. # If Log collector start by any other means, then it will not be monitored.
if CollectLogsHandler.is_enabled_monitor_cgroups_check(): if CollectLogsHandler.is_enabled_monitor_cgroups_check():
tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path) for metric in tracked_metrics:
log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups) if isinstance(metric, CpuMetrics):
metric.initialize_cpu_usage()
break
log_collector_monitor = get_log_collector_monitor_handler(tracked_metrics)
log_collector_monitor.run() log_collector_monitor.run()
archive = log_collector.collect_logs_and_get_archive() archive = log_collector.collect_logs_and_get_archive()
logger.info("Log collection successfully completed. Archive can be found at {0} " logger.info("Log collection successfully completed. Archive can be found at {0} "

Просмотреть файл

@ -24,7 +24,7 @@ import uuid
from azurelinuxagent.common import logger from azurelinuxagent.common import logger
from azurelinuxagent.common.event import WALAEventOperation, add_event from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.conf import get_agent_pid_file_path from azurelinuxagent.common.conf import get_agent_pid_file_path
from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, \ from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, \
@ -185,14 +185,14 @@ def get_cgroup_api():
if available_unified_controllers != "": if available_unified_controllers != "":
raise CGroupsException("Detected hybrid cgroup mode, but there are controllers available to be enabled in unified hierarchy: {0}".format(available_unified_controllers)) raise CGroupsException("Detected hybrid cgroup mode, but there are controllers available to be enabled in unified hierarchy: {0}".format(available_unified_controllers))
cgroup_api = SystemdCgroupApiv1() cgroup_api_v1 = SystemdCgroupApiv1()
# Previously the agent supported users mounting cgroup v1 controllers in locations other than the systemd # Previously the agent supported users mounting cgroup v1 controllers in locations other than the systemd
# default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If either the cpu or memory # default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If any agent supported controller is
# controller is mounted in a location other than the systemd default, raise Exception. # mounted in a location other than the systemd default, raise Exception.
if not cgroup_api.are_mountpoints_systemd_created(): if not cgroup_api_v1.are_mountpoints_systemd_created():
raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api.get_controller_root_paths()))) raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api_v1.get_controller_mountpoints())))
log_cgroup_info("Using cgroup v1 for resource enforcement and monitoring") log_cgroup_info("Using cgroup v1 for resource enforcement and monitoring")
return cgroup_api return cgroup_api_v1
raise CGroupsException("{0} has an unexpected file type: {1}".format(CGROUP_FILE_SYSTEM_ROOT, root_hierarchy_mode)) raise CGroupsException("{0} has an unexpected file type: {1}".format(CGROUP_FILE_SYSTEM_ROOT, root_hierarchy_mode))
@ -202,7 +202,6 @@ class _SystemdCgroupApi(object):
Cgroup interface via systemd. Contains common api implementations between cgroup v1 and v2. Cgroup interface via systemd. Contains common api implementations between cgroup v1 and v2.
""" """
def __init__(self): def __init__(self):
self._agent_unit_name = None
self._systemd_run_commands = [] self._systemd_run_commands = []
self._systemd_run_commands_lock = threading.RLock() self._systemd_run_commands_lock = threading.RLock()
@ -213,55 +212,36 @@ class _SystemdCgroupApi(object):
with self._systemd_run_commands_lock: with self._systemd_run_commands_lock:
return self._systemd_run_commands[:] return self._systemd_run_commands[:]
def get_controller_root_paths(self): def get_unit_cgroup(self, unit_name, cgroup_name):
""" """
Cgroup version specific. Returns a tuple with the root paths for the cpu and memory controllers; the values can Cgroup version specific. Returns a representation of the unit cgroup.
be None if the corresponding controller is not mounted or enabled at the root cgroup.
:param unit_name: The unit to return the cgroup of.
:param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes.
""" """
raise NotImplementedError() raise NotImplementedError()
def get_unit_cgroup_paths(self, unit_name): def get_cgroup_from_relative_path(self, relative_path, cgroup_name):
""" """
Returns a tuple with the path of the cpu and memory cgroups for the given unit. Cgroup version specific. Returns a representation of the cgroup at the provided relative path.
The values returned can be None if the controller is not mounted or enabled.
:param relative_path: The relative path to return the cgroup of.
:param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes.
""" """
# Ex: ControlGroup=/azure.slice/walinuxagent.service raise NotImplementedError()
# controlgroup_path[1:] = azure.slice/walinuxagent.service
controlgroup_path = systemd.get_unit_property(unit_name, "ControlGroup")
cpu_root_path, memory_root_path = self.get_controller_root_paths()
cpu_cgroup_path = os.path.join(cpu_root_path, controlgroup_path[1:]) \ def get_process_cgroup(self, process_id, cgroup_name):
if cpu_root_path is not None else None
memory_cgroup_path = os.path.join(memory_root_path, controlgroup_path[1:]) \
if memory_root_path is not None else None
return cpu_cgroup_path, memory_cgroup_path
def get_process_cgroup_paths(self, process_id):
""" """
Returns a tuple with the path of the cpu and memory cgroups for the given process. Cgroup version specific. Returns a representation of the process' cgroup.
The 'process_id' can be a numeric PID or the string "self" for the current process.
The values returned can be None if the controller is not mounted or enabled. :param process_id: A numeric PID to return the cgroup of, or the string "self" to return the cgroup of the current process.
:param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes.
""" """
cpu_cgroup_relative_path, memory_cgroup_relative_path = self.get_process_cgroup_relative_paths(process_id) raise NotImplementedError()
cpu_root_path, memory_root_path = self.get_controller_root_paths() def log_root_paths(self):
cpu_cgroup_path = os.path.join(cpu_root_path, cpu_cgroup_relative_path) \
if cpu_root_path is not None and cpu_cgroup_relative_path is not None else None
memory_cgroup_path = os.path.join(memory_root_path, memory_cgroup_relative_path) \
if memory_root_path is not None and memory_cgroup_relative_path is not None else None
return cpu_cgroup_path, memory_cgroup_path
def get_process_cgroup_relative_paths(self, process_id):
""" """
Cgroup version specific. Returns a tuple with the path of the cpu and memory cgroups for the given process Cgroup version specific. Logs the root paths of the cgroup filesystem/controllers.
(relative to the root path of the corresponding controller).
The 'process_id' can be a numeric PID or the string "self" for the current process.
The values returned can be None if the controller is not mounted or enabled.
""" """
raise NotImplementedError() raise NotImplementedError()
@ -279,11 +259,6 @@ class _SystemdCgroupApi(object):
unit_not_found = "Unit {0} not found.".format(scope_name) unit_not_found = "Unit {0} not found.".format(scope_name)
return unit_not_found in stderr or scope_name not in stderr return unit_not_found in stderr or scope_name not in stderr
@staticmethod
def get_processes_in_cgroup(cgroup_path):
with open(os.path.join(cgroup_path, "cgroup.procs"), "r") as cgroup_procs:
return [int(pid) for pid in cgroup_procs.read().split()]
class SystemdCgroupApiv1(_SystemdCgroupApi): class SystemdCgroupApiv1(_SystemdCgroupApi):
""" """
@ -293,7 +268,8 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
super(SystemdCgroupApiv1, self).__init__() super(SystemdCgroupApiv1, self).__init__()
self._cgroup_mountpoints = self._get_controller_mountpoints() self._cgroup_mountpoints = self._get_controller_mountpoints()
def _get_controller_mountpoints(self): @staticmethod
def _get_controller_mountpoints():
""" """
In v1, each controller is mounted at a different path. Use findmnt to get each path. In v1, each controller is mounted at a different path. Use findmnt to get each path.
@ -304,7 +280,8 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
/sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct /sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct
etc etc
Returns a dictionary of the controller-path mappings. Returns a dictionary of the controller-path mappings. The dictionary only includes the controllers which are
supported by the agent.
""" """
mount_points = {} mount_points = {}
for line in shellutil.run_command(['findmnt', '-t', 'cgroup', '--noheadings']).splitlines(): for line in shellutil.run_command(['findmnt', '-t', 'cgroup', '--noheadings']).splitlines():
@ -315,51 +292,91 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
if match is not None: if match is not None:
path = match.group('path') path = match.group('path')
controller = match.group('controller') controller = match.group('controller')
if controller is not None and path is not None: if controller is not None and path is not None and controller in CgroupV1.get_supported_controllers():
mount_points[controller] = path mount_points[controller] = path
return mount_points return mount_points
def get_controller_mountpoints(self):
"""
Returns a dictionary of controller-mountpoint mappings.
"""
return self._cgroup_mountpoints
def are_mountpoints_systemd_created(self): def are_mountpoints_systemd_created(self):
""" """
Systemd mounts each controller at '/sys/fs/cgroup/<controller>'. Returns True if both cpu and memory Systemd mounts each controller at '/sys/fs/cgroup/<controller>'. Returns True if all mounted controllers which
mountpoints match this pattern, False otherwise. are supported by the agent have mountpoints which match this pattern, False otherwise.
The agent does not support cgroup usage if the default root systemd mountpoint (/sys/fs/cgroup) is not used. The agent does not support cgroup usage if the default root systemd mountpoint (/sys/fs/cgroup) is not used.
This method is used to check if any users are using non-systemd mountpoints. If they are, the agent drop-in This method is used to check if any users are using non-systemd mountpoints. If they are, the agent drop-in
files will be cleaned up in cgroupconfigurator. files will be cleaned up in cgroupconfigurator.
""" """
cpu_mountpoint = self._cgroup_mountpoints.get('cpu,cpuacct') for controller, mount_point in self._cgroup_mountpoints.items():
memory_mountpoint = self._cgroup_mountpoints.get('memory') if mount_point != os.path.join(CGROUP_FILE_SYSTEM_ROOT, controller):
if cpu_mountpoint is not None and cpu_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'cpu,cpuacct'): return False
return False
if memory_mountpoint is not None and memory_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'memory'):
return False
return True return True
def get_controller_root_paths(self): @staticmethod
# Return a tuple representing the mountpoints for cpu and memory. Either should be None if the corresponding def _get_process_relative_controller_paths(process_id):
# controller is not mounted. """
return self._cgroup_mountpoints.get('cpu,cpuacct'), self._cgroup_mountpoints.get('memory') Returns the relative paths of the cgroup for the given process as a dict of controller-path mappings. The result
only includes controllers which are supported.
The contents of the /proc/{process_id}/cgroup file are similar to
# cat /proc/1218/cgroup
10:memory:/system.slice/walinuxagent.service
3:cpu,cpuacct:/system.slice/walinuxagent.service
etc
def get_process_cgroup_relative_paths(self, process_id): :param process_id: A numeric PID to return the relative paths of, or the string "self" to return the relative paths of the current process.
# The contents of the file are similar to """
# # cat /proc/1218/cgroup conroller_relative_paths = {}
# 10:memory:/system.slice/walinuxagent.service
# 3:cpu,cpuacct:/system.slice/walinuxagent.service
# etc
cpu_path = None
memory_path = None
for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines(): for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines():
match = re.match(r'\d+:(?P<controller>(memory|.*cpuacct.*)):(?P<path>.+)', line) match = re.match(r'\d+:(?P<controller>.+):(?P<path>.+)', line)
if match is not None: if match is not None:
controller = match.group('controller') controller = match.group('controller')
path = match.group('path').lstrip('/') if match.group('path') != '/' else None path = match.group('path').lstrip('/') if match.group('path') != '/' else None
if controller == 'memory': if path is not None and controller in CgroupV1.get_supported_controllers():
memory_path = path conroller_relative_paths[controller] = path
else:
cpu_path = path
return cpu_path, memory_path return conroller_relative_paths
def get_unit_cgroup(self, unit_name, cgroup_name):
unit_cgroup_relative_path = systemd.get_unit_property(unit_name, "ControlGroup")
unit_controller_paths = {}
for controller, mountpoint in self._cgroup_mountpoints.items():
unit_controller_paths[controller] = os.path.join(mountpoint, unit_cgroup_relative_path[1:])
return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints,
controller_paths=unit_controller_paths)
def get_cgroup_from_relative_path(self, relative_path, cgroup_name):
controller_paths = {}
for controller, mountpoint in self._cgroup_mountpoints.items():
controller_paths[controller] = os.path.join(mountpoint, relative_path)
return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints,
controller_paths=controller_paths)
def get_process_cgroup(self, process_id, cgroup_name):
relative_controller_paths = self._get_process_relative_controller_paths(process_id)
process_controller_paths = {}
for controller, mountpoint in self._cgroup_mountpoints.items():
relative_controller_path = relative_controller_paths.get(controller)
if relative_controller_path is not None:
process_controller_paths[controller] = os.path.join(mountpoint, relative_controller_path)
return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints,
controller_paths=process_controller_paths)
def log_root_paths(self):
for controller in CgroupV1.get_supported_controllers():
mount_point = self._cgroup_mountpoints.get(controller)
if mount_point is None:
log_cgroup_info("The {0} controller is not mounted".format(controller), send_event=False)
else:
log_cgroup_info("The {0} controller is mounted at {1}".format(controller, mount_point), send_event=False)
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
error_code=ExtensionErrorCodes.PluginUnknownFailure): error_code=ExtensionErrorCodes.PluginUnknownFailure):
@ -385,25 +402,14 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
log_cgroup_info("Started extension in unit '{0}'".format(scope_name), send_event=False) log_cgroup_info("Started extension in unit '{0}'".format(scope_name), send_event=False)
cpu_cgroup = None cpu_metrics = None
try: try:
cgroup_relative_path = os.path.join('azure.slice/azure-vmextensions.slice', extension_slice_name) cgroup_relative_path = os.path.join('azure.slice/azure-vmextensions.slice', extension_slice_name)
cgroup = self.get_cgroup_from_relative_path(cgroup_relative_path, extension_name)
cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_controller_root_paths() for metrics in cgroup.get_controller_metrics():
if isinstance(metrics, CpuMetrics):
if cpu_cgroup_mountpoint is None: cpu_metrics = metrics
log_cgroup_info("The CPU controller is not mounted; will not track resource usage", send_event=False) CGroupsTelemetry.track_cgroup(metrics)
else:
cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path)
cpu_cgroup = CpuCgroup(extension_name, cpu_cgroup_path)
CGroupsTelemetry.track_cgroup(cpu_cgroup)
if memory_cgroup_mountpoint is None:
log_cgroup_info("The Memory controller is not mounted; will not track resource usage", send_event=False)
else:
memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path)
memory_cgroup = MemoryCgroup(extension_name, memory_cgroup_path)
CGroupsTelemetry.track_cgroup(memory_cgroup)
except IOError as e: except IOError as e:
if e.errno == 2: # 'No such file or directory' if e.errno == 2: # 'No such file or directory'
@ -415,7 +421,7 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
# Wait for process completion or timeout # Wait for process completion or timeout
try: try:
return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
stderr=stderr, error_code=error_code, cpu_cgroup=cpu_cgroup) stderr=stderr, error_code=error_code, cpu_metrics=cpu_metrics)
except ExtensionError as e: except ExtensionError as e:
# The extension didn't terminate successfully. Determine whether it was due to systemd errors or # The extension didn't terminate successfully. Determine whether it was due to systemd errors or
# extension errors. # extension errors.
@ -448,7 +454,7 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
def __init__(self): def __init__(self):
super(SystemdCgroupApiv2, self).__init__() super(SystemdCgroupApiv2, self).__init__()
self._root_cgroup_path = self._get_root_cgroup_path() self._root_cgroup_path = self._get_root_cgroup_path()
self._controllers_enabled_at_root = self._get_controllers_enabled_at_root(self._root_cgroup_path) if self._root_cgroup_path is not None else [] self._controllers_enabled_at_root = self._get_controllers_enabled_at_root(self._root_cgroup_path) if self._root_cgroup_path != "" else []
@staticmethod @staticmethod
def _get_root_cgroup_path(): def _get_root_cgroup_path():
@ -459,7 +465,7 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
$ findmnt -t cgroup2 --noheadings $ findmnt -t cgroup2 --noheadings
/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot /sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot
Returns None if the root cgroup cannot be determined from the output above. Returns empty string if the root cgroup cannot be determined from the output above.
""" """
# #
for line in shellutil.run_command(['findmnt', '-t', 'cgroup2', '--noheadings']).splitlines(): for line in shellutil.run_command(['findmnt', '-t', 'cgroup2', '--noheadings']).splitlines():
@ -470,7 +476,13 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
root_cgroup_path = match.group('path') root_cgroup_path = match.group('path')
if root_cgroup_path is not None: if root_cgroup_path is not None:
return root_cgroup_path return root_cgroup_path
return None return ""
def get_root_cgroup_path(self):
"""
Returns the unified cgroup mountpoint.
"""
return self._root_cgroup_path
@staticmethod @staticmethod
def _get_controllers_enabled_at_root(root_cgroup_path): def _get_controllers_enabled_at_root(root_cgroup_path):
@ -478,47 +490,229 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
Returns a list of the controllers enabled at the root cgroup. The cgroup.subtree_control file at the root shows Returns a list of the controllers enabled at the root cgroup. The cgroup.subtree_control file at the root shows
a space separated list of the controllers which are enabled to control resource distribution from the root a space separated list of the controllers which are enabled to control resource distribution from the root
cgroup to its children. If a controller is listed here, then that controller is available to enable in children cgroup to its children. If a controller is listed here, then that controller is available to enable in children
cgroups. cgroups. Returns only the enabled controllers which are supported by the agent.
$ cat /sys/fs/cgroup/cgroup.subtree_control $ cat /sys/fs/cgroup/cgroup.subtree_control
cpuset cpu io memory hugetlb pids rdma misc cpuset cpu io memory hugetlb pids rdma misc
""" """
controllers_enabled_at_root = []
enabled_controllers_file = os.path.join(root_cgroup_path, 'cgroup.subtree_control') enabled_controllers_file = os.path.join(root_cgroup_path, 'cgroup.subtree_control')
if os.path.exists(enabled_controllers_file): if os.path.exists(enabled_controllers_file):
controllers_enabled_at_root = fileutil.read_file(enabled_controllers_file).rstrip().split() controllers_enabled_at_root = fileutil.read_file(enabled_controllers_file).rstrip().split()
return controllers_enabled_at_root return list(set(controllers_enabled_at_root) & set(CgroupV2.get_supported_controllers()))
return []
def get_controller_root_paths(self): @staticmethod
# Return a tuple representing the root cgroups for cpu and memory. Either should be None if the corresponding def _get_process_relative_cgroup_path(process_id):
# controller is not enabled at the root. This check is necessary because all non-root "cgroup.subtree_control" """
# files can only contain controllers which are enabled in the parent's "cgroup.subtree_control" file. Returns the relative path of the cgroup for the given process.
The contents of the /proc/{process_id}/cgroup file are similar to
# cat /proc/1218/cgroup
0::/azure.slice/walinuxagent.service
root_cpu_path = None :param process_id: A numeric PID to return the relative path of, or the string "self" to return the relative path of the current process.
root_memory_path = None """
if self._root_cgroup_path is not None: relative_path = ""
if 'cpu' in self._controllers_enabled_at_root:
root_cpu_path = self._root_cgroup_path
if 'memory' in self._controllers_enabled_at_root:
root_memory_path = self._root_cgroup_path
return root_cpu_path, root_memory_path
def get_process_cgroup_relative_paths(self, process_id):
# The contents of the file are similar to
# # cat /proc/1218/cgroup
# 0::/azure.slice/walinuxagent.service
cpu_path = None
memory_path = None
for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines(): for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines():
match = re.match(r'0::(?P<path>\S+)', line) match = re.match(r'0::(?P<path>\S+)', line)
if match is not None: if match is not None:
path = match.group('path').lstrip('/') if match.group('path') != '/' else None relative_path = match.group('path').lstrip('/') if match.group('path') != '/' else ""
memory_path = path
cpu_path = path
return cpu_path, memory_path return relative_path
def get_unit_cgroup(self, unit_name, cgroup_name):
unit_cgroup_relative_path = systemd.get_unit_property(unit_name, "ControlGroup")
unit_cgroup_path = ""
if self._root_cgroup_path != "":
unit_cgroup_path = os.path.join(self._root_cgroup_path, unit_cgroup_relative_path[1:])
return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=unit_cgroup_path, enabled_controllers=self._controllers_enabled_at_root)
def get_cgroup_from_relative_path(self, relative_path, cgroup_name):
cgroup_path = ""
if self._root_cgroup_path != "":
cgroup_path = os.path.join(self._root_cgroup_path, relative_path)
return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=cgroup_path, enabled_controllers=self._controllers_enabled_at_root)
def get_process_cgroup(self, process_id, cgroup_name):
relative_path = self._get_process_relative_cgroup_path(process_id)
cgroup_path = ""
if self._root_cgroup_path != "":
cgroup_path = os.path.join(self._root_cgroup_path, relative_path)
return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=cgroup_path, enabled_controllers=self._controllers_enabled_at_root)
def log_root_paths(self):
log_cgroup_info("The root cgroup path is {0}".format(self._root_cgroup_path), send_event=False)
for controller in CgroupV2.get_supported_controllers():
if controller in self._controllers_enabled_at_root:
log_cgroup_info("The {0} controller is enabled at the root cgroup".format(controller), send_event=False)
else:
log_cgroup_info("The {0} controller is not enabled at the root cgroup".format(controller), send_event=False)
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
error_code=ExtensionErrorCodes.PluginUnknownFailure): error_code=ExtensionErrorCodes.PluginUnknownFailure):
raise NotImplementedError() raise NotImplementedError()
class Cgroup(object):
MEMORY_CONTROLLER = "memory"
def __init__(self, cgroup_name):
self._cgroup_name = cgroup_name
@staticmethod
def get_supported_controllers():
"""
Cgroup version specific. Returns a list of the controllers which the agent supports.
"""
raise NotImplementedError()
def check_in_expected_slice(self, expected_slice):
"""
Cgroup version specific. Returns True if the cgroup is in the expected slice, False otherwise.
:param expected_slice: The slice the cgroup is expected to be in.
"""
raise NotImplementedError()
def get_controller_metrics(self, expected_relative_path=None):
"""
Cgroup version specific. Returns a list of the metrics for the agent supported controllers which are
mounted/enabled for the cgroup.
:param expected_relative_path: The expected relative path of the cgroup. If provided, only metrics for controllers at this expected path will be returned.
"""
raise NotImplementedError()
def get_processes(self):
"""
Cgroup version specific. Returns a list of all the process ids in the cgroup.
"""
raise NotImplementedError()
class CgroupV1(Cgroup):
CPU_CONTROLLER = "cpu,cpuacct"
def __init__(self, cgroup_name, controller_mountpoints, controller_paths):
"""
:param cgroup_name: The name of the cgroup. Used for logging/tracking purposes.
:param controller_mountpoints: A dictionary of controller-mountpoint mappings for each agent supported controller which is mounted.
:param controller_paths: A dictionary of controller-path mappings for each agent supported controller which is mounted. The path represents the absolute path of the controller.
"""
super(CgroupV1, self).__init__(cgroup_name=cgroup_name)
self._controller_mountpoints = controller_mountpoints
self._controller_paths = controller_paths
@staticmethod
def get_supported_controllers():
return [CgroupV1.CPU_CONTROLLER, CgroupV1.MEMORY_CONTROLLER]
def check_in_expected_slice(self, expected_slice):
in_expected_slice = True
for controller, path in self._controller_paths.items():
if expected_slice not in path:
log_cgroup_warning("The {0} controller for the {1} cgroup is not mounted in the expected slice. Expected slice: {2}. Actual controller path: {3}".format(controller, self._cgroup_name, expected_slice, path), send_event=False)
in_expected_slice = False
return in_expected_slice
def get_controller_metrics(self, expected_relative_path=None):
metrics = []
for controller in self.get_supported_controllers():
controller_metrics = None
controller_path = self._controller_paths.get(controller)
controller_mountpoint = self._controller_mountpoints.get(controller)
if controller_mountpoint is None:
log_cgroup_warning("{0} controller is not mounted; will not track metrics".format(controller), send_event=False)
continue
if controller_path is None:
log_cgroup_warning("{0} is not mounted for the {1} cgroup; will not track metrics".format(controller, self._cgroup_name), send_event=False)
continue
if expected_relative_path is not None:
expected_path = os.path.join(controller_mountpoint, expected_relative_path)
if controller_path != expected_path:
log_cgroup_warning("The {0} controller is not mounted at the expected path for the {1} cgroup; will not track metrics. Actual cgroup path:[{2}] Expected:[{3}]".format(controller, self._cgroup_name, controller_path, expected_path), send_event=False)
continue
if controller == self.CPU_CONTROLLER:
controller_metrics = CpuMetrics(self._cgroup_name, controller_path)
elif controller == self.MEMORY_CONTROLLER:
controller_metrics = MemoryMetrics(self._cgroup_name, controller_path)
if controller_metrics is not None:
msg = "{0} metrics for cgroup: {1}".format(controller, controller_metrics)
log_cgroup_info(msg, send_event=False)
metrics.append(controller_metrics)
return metrics
def get_controller_procs_path(self, controller):
controller_path = self._controller_paths.get(controller)
if controller_path is not None and controller_path != "":
return os.path.join(controller_path, "cgroup.procs")
return ""
def get_processes(self):
pids = set()
for controller in self._controller_paths.keys():
procs_path = self.get_controller_procs_path(controller)
if os.path.exists(procs_path):
with open(procs_path, "r") as cgroup_procs:
for pid in cgroup_procs.read().split():
pids.add(int(pid))
return list(pids)
class CgroupV2(Cgroup):
CPU_CONTROLLER = "cpu"
def __init__(self, cgroup_name, root_cgroup_path, cgroup_path, enabled_controllers):
"""
:param cgroup_name: The name of the cgroup. Used for logging/tracking purposes.
:param root_cgroup_path: A string representing the root cgroup path. String can be empty.
:param cgroup_path: A string representing the absolute cgroup path. String can be empty.
:param enabled_controllers: A list of strings representing the agent supported controllers enabled at the root cgroup.
"""
super(CgroupV2, self).__init__(cgroup_name)
self._root_cgroup_path = root_cgroup_path
self._cgroup_path = cgroup_path
self._enabled_controllers = enabled_controllers
@staticmethod
def get_supported_controllers():
return [CgroupV2.CPU_CONTROLLER, CgroupV2.MEMORY_CONTROLLER]
def check_in_expected_slice(self, expected_slice):
if expected_slice not in self._cgroup_path:
log_cgroup_warning("The {0} cgroup is not in the expected slice. Expected slice: {1}. Actual cgroup path: {2}".format(self._cgroup_name, expected_slice, self._cgroup_path), send_event=False)
return False
return True
def get_controller_metrics(self, expected_relative_path=None):
# TODO - Implement controller metrics for cgroup v2
raise NotImplementedError()
def get_procs_path(self):
if self._cgroup_path != "":
return os.path.join(self._cgroup_path, "cgroup.procs")
return ""
def get_processes(self):
pids = set()
procs_path = self.get_procs_path()
if os.path.exists(procs_path):
with open(procs_path, "r") as cgroup_procs:
for pid in cgroup_procs.read().split():
pids.add(int(pid))
return list(pids)

Просмотреть файл

@ -23,7 +23,7 @@ import threading
from azurelinuxagent.common import conf from azurelinuxagent.common import conf
from azurelinuxagent.common import logger from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup from azurelinuxagent.ga.controllermetrics import CpuMetrics, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryMetrics
from azurelinuxagent.ga.cgroupapi import SystemdRunError, EXTENSION_SLICE_PREFIX, CGroupUtil, SystemdCgroupApiv2, \ from azurelinuxagent.ga.cgroupapi import SystemdRunError, EXTENSION_SLICE_PREFIX, CGroupUtil, SystemdCgroupApiv2, \
log_cgroup_info, log_cgroup_warning, get_cgroup_api, InvalidCgroupMountpointException log_cgroup_info, log_cgroup_warning, get_cgroup_api, InvalidCgroupMountpointException
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
@ -130,9 +130,8 @@ class CGroupConfigurator(object):
self._agent_cgroups_enabled = False self._agent_cgroups_enabled = False
self._extensions_cgroups_enabled = False self._extensions_cgroups_enabled = False
self._cgroups_api = None self._cgroups_api = None
self._agent_cpu_cgroup_path = None self._agent_cgroup = None
self._agent_memory_cgroup_path = None self._agent_memory_metrics = None
self._agent_memory_cgroup = None
self._check_cgroups_lock = threading.RLock() # Protect the check_cgroups which is called from Monitor thread and main loop. self._check_cgroups_lock = threading.RLock() # Protect the check_cgroups which is called from Monitor thread and main loop.
def initialize(self): def initialize(self):
@ -189,28 +188,30 @@ class CGroupConfigurator(object):
self.__setup_azure_slice() self.__setup_azure_slice()
cpu_controller_root, memory_controller_root = self.__get_cgroup_controller_roots() # Log mount points/root paths for cgroup controllers
self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroup_paths(agent_slice, self._cgroups_api.log_root_paths()
cpu_controller_root,
memory_controller_root) # Get agent cgroup
self._agent_cgroup = self._cgroups_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_NAME_TELEMETRY)
if conf.get_cgroup_disable_on_process_check_failure() and self._check_fails_if_processes_found_in_agent_cgroup_before_enable(agent_slice): if conf.get_cgroup_disable_on_process_check_failure() and self._check_fails_if_processes_found_in_agent_cgroup_before_enable(agent_slice):
reason = "Found unexpected processes in the agent cgroup before agent enable cgroups." reason = "Found unexpected processes in the agent cgroup before agent enable cgroups."
self.disable(reason, DisableCgroups.ALL) self.disable(reason, DisableCgroups.ALL)
return return
if self._agent_cpu_cgroup_path is not None or self._agent_memory_cgroup_path is not None: # Get metrics to track
metrics = self._agent_cgroup.get_controller_metrics(expected_relative_path=os.path.join(agent_slice, systemd.get_agent_unit_name()))
if len(metrics) > 0:
self.enable() self.enable()
if self._agent_cpu_cgroup_path is not None: for metric in metrics:
log_cgroup_info("Agent CPU cgroup: {0}".format(self._agent_cpu_cgroup_path)) for prop in metric.get_unit_properties():
self.__set_cpu_quota(conf.get_agent_cpu_quota()) log_cgroup_info('{0}: {1}'.format(prop, systemd.get_unit_property(systemd.get_agent_unit_name(), prop)))
CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path)) if isinstance(metric, CpuMetrics):
self.__set_cpu_quota(conf.get_agent_cpu_quota())
if self._agent_memory_cgroup_path is not None: elif isinstance(metric, MemoryMetrics):
log_cgroup_info("Agent Memory cgroup: {0}".format(self._agent_memory_cgroup_path)) self._agent_memory_metrics = metric
self._agent_memory_cgroup = MemoryCgroup(AGENT_NAME_TELEMETRY, self._agent_memory_cgroup_path) CGroupsTelemetry.track_cgroup(metric)
CGroupsTelemetry.track_cgroup(self._agent_memory_cgroup)
except Exception as exception: except Exception as exception:
log_cgroup_warning("Error initializing cgroups: {0}".format(ustr(exception))) log_cgroup_warning("Error initializing cgroups: {0}".format(ustr(exception)))
@ -229,21 +230,6 @@ class CGroupConfigurator(object):
return False return False
return True return True
def __get_cgroup_controller_roots(self):
cpu_controller_root, memory_controller_root = self._cgroups_api.get_controller_root_paths()
if cpu_controller_root is not None:
log_cgroup_info("The CPU cgroup controller root path is {0}".format(cpu_controller_root), send_event=False)
else:
log_cgroup_warning("The CPU cgroup controller is not mounted or enabled")
if memory_controller_root is not None:
log_cgroup_info("The memory cgroup controller root path is {0}".format(memory_controller_root), send_event=False)
else:
log_cgroup_warning("The memory cgroup controller is not mounted or enabled")
return cpu_controller_root, memory_controller_root
@staticmethod @staticmethod
def __setup_azure_slice(): def __setup_azure_slice():
""" """
@ -416,47 +402,6 @@ class CGroupConfigurator(object):
return True return True
return False return False
def __get_agent_cgroup_paths(self, agent_slice, cpu_controller_root, memory_controller_root):
agent_unit_name = systemd.get_agent_unit_name()
expected_relative_path = os.path.join(agent_slice, agent_unit_name)
cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths(
"self")
if cpu_cgroup_relative_path is None:
log_cgroup_warning("The agent's process is not within a CPU cgroup")
else:
if cpu_cgroup_relative_path == expected_relative_path:
log_cgroup_info('CPUAccounting: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUAccounting")))
log_cgroup_info('CPUQuota: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec")))
else:
log_cgroup_warning(
"The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]".format(cpu_cgroup_relative_path, expected_relative_path))
cpu_cgroup_relative_path = None # Set the path to None to prevent monitoring
if memory_cgroup_relative_path is None:
log_cgroup_warning("The agent's process is not within a memory cgroup")
else:
if memory_cgroup_relative_path == expected_relative_path:
memory_accounting = systemd.get_unit_property(agent_unit_name, "MemoryAccounting")
log_cgroup_info('MemoryAccounting: {0}'.format(memory_accounting))
else:
log_cgroup_warning(
"The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]".format(memory_cgroup_relative_path, expected_relative_path))
memory_cgroup_relative_path = None # Set the path to None to prevent monitoring
if cpu_controller_root is not None and cpu_cgroup_relative_path is not None:
agent_cpu_cgroup_path = os.path.join(cpu_controller_root, cpu_cgroup_relative_path)
else:
agent_cpu_cgroup_path = None
if memory_controller_root is not None and memory_cgroup_relative_path is not None:
agent_memory_cgroup_path = os.path.join(memory_controller_root, memory_cgroup_relative_path)
else:
agent_memory_cgroup_path = None
return agent_cpu_cgroup_path, agent_memory_cgroup_path
def supported(self): def supported(self):
return self._cgroups_supported return self._cgroups_supported
@ -496,7 +441,11 @@ class CGroupConfigurator(object):
elif disable_cgroups == DisableCgroups.AGENT: # disable agent elif disable_cgroups == DisableCgroups.AGENT: # disable agent
self._agent_cgroups_enabled = False self._agent_cgroups_enabled = False
self.__reset_agent_cpu_quota() self.__reset_agent_cpu_quota()
CGroupsTelemetry.stop_tracking(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path)) agent_metrics = self._agent_cgroup.get_controller_metrics()
for metric in agent_metrics:
if isinstance(metric, CpuMetrics):
CGroupsTelemetry.stop_tracking(metric)
break
log_cgroup_warning("Disabling resource usage monitoring. Reason: {0}".format(reason), op=WALAEventOperation.CGroupsDisabled) log_cgroup_warning("Disabling resource usage monitoring. Reason: {0}".format(reason), op=WALAEventOperation.CGroupsDisabled)
@ -612,11 +561,7 @@ class CGroupConfigurator(object):
""" """
unexpected = [] unexpected = []
agent_cgroup_proc_names = [] agent_cgroup_proc_names = []
# Now we call _check_processes_in_agent_cgroup before we enable the cgroups or any one of the controller is not mounted, agent cgroup paths can be None.
# so we need to check both.
cgroup_path = self._agent_cpu_cgroup_path if self._agent_cpu_cgroup_path is not None else self._agent_memory_cgroup_path
if cgroup_path is None:
return
try: try:
daemon = os.getppid() daemon = os.getppid()
extension_handler = os.getpid() extension_handler = os.getpid()
@ -624,12 +569,12 @@ class CGroupConfigurator(object):
agent_commands.update(shellutil.get_running_commands()) agent_commands.update(shellutil.get_running_commands())
systemd_run_commands = set() systemd_run_commands = set()
systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands()) systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
agent_cgroup = self._cgroups_api.get_processes_in_cgroup(cgroup_path) agent_cgroup_proccesses = self._agent_cgroup.get_processes()
# get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup; # get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup;
agent_commands.update(shellutil.get_running_commands()) agent_commands.update(shellutil.get_running_commands())
systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands()) systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
for process in agent_cgroup: for process in agent_cgroup_proccesses:
agent_cgroup_proc_names.append(self.__format_process(process)) agent_cgroup_proc_names.append(self.__format_process(process))
# Note that the agent uses systemd-run to start extensions; systemd-run belongs to the agent cgroup, though the extensions don't. # Note that the agent uses systemd-run to start extensions; systemd-run belongs to the agent cgroup, though the extensions don't.
if process in (daemon, extension_handler) or process in systemd_run_commands: if process in (daemon, extension_handler) or process in systemd_run_commands:
@ -753,8 +698,8 @@ class CGroupConfigurator(object):
raise CGroupsException("The agent has been throttled for {0} seconds".format(metric.value)) raise CGroupsException("The agent has been throttled for {0} seconds".format(metric.value))
def check_agent_memory_usage(self): def check_agent_memory_usage(self):
if self.enabled() and self._agent_memory_cgroup: if self.enabled() and self._agent_memory_metrics is not None:
metrics = self._agent_memory_cgroup.get_tracked_metrics() metrics = self._agent_memory_metrics.get_tracked_metrics()
current_usage = 0 current_usage = 0
for metric in metrics: for metric in metrics:
if metric.counter == MetricsCounter.TOTAL_MEM_USAGE: if metric.counter == MetricsCounter.TOTAL_MEM_USAGE:
@ -780,59 +725,37 @@ class CGroupConfigurator(object):
return 0 return 0
def start_tracking_unit_cgroups(self, unit_name): def start_tracking_unit_cgroups(self, unit_name):
"""
TODO: Start tracking Memory Cgroups
"""
try: try:
cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name) cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name)
metrics = cgroup.get_controller_metrics()
if cpu_cgroup_path is None: for metric in metrics:
log_cgroup_info("The CPU controller is not mounted or enabled; will not track resource usage", send_event=False) CGroupsTelemetry.track_cgroup(metric)
else:
CGroupsTelemetry.track_cgroup(CpuCgroup(unit_name, cpu_cgroup_path))
if memory_cgroup_path is None:
log_cgroup_info("The Memory controller is not mounted or enabled; will not track resource usage", send_event=False)
else:
CGroupsTelemetry.track_cgroup(MemoryCgroup(unit_name, memory_cgroup_path))
except Exception as exception: except Exception as exception:
log_cgroup_info("Failed to start tracking resource usage for the extension: {0}".format(ustr(exception)), send_event=False) log_cgroup_info("Failed to start tracking resource usage for the extension: {0}".format(ustr(exception)), send_event=False)
def stop_tracking_unit_cgroups(self, unit_name): def stop_tracking_unit_cgroups(self, unit_name):
"""
TODO: remove Memory cgroups from tracked list.
"""
try: try:
cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name) cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name)
metrics = cgroup.get_controller_metrics()
if cpu_cgroup_path is not None: for metric in metrics:
CGroupsTelemetry.stop_tracking(CpuCgroup(unit_name, cpu_cgroup_path)) CGroupsTelemetry.stop_tracking(metric)
if memory_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(MemoryCgroup(unit_name, memory_cgroup_path))
except Exception as exception: except Exception as exception:
log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False) log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False)
def stop_tracking_extension_cgroups(self, extension_name): def stop_tracking_extension_cgroups(self, extension_name):
"""
TODO: remove extension Memory cgroups from tracked list
"""
try: try:
extension_slice_name = CGroupUtil.get_extension_slice_name(extension_name) extension_slice_name = CGroupUtil.get_extension_slice_name(extension_name)
cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE, cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE, extension_slice_name)
extension_slice_name)
cpu_root_path, memory_root_path = self._cgroups_api.get_controller_root_paths() cgroup = self._cgroups_api.get_cgroup_from_relative_path(relative_path=cgroup_relative_path,
cpu_cgroup_path = os.path.join(cpu_root_path, cgroup_relative_path) cgroup_name=extension_name)
memory_cgroup_path = os.path.join(memory_root_path, cgroup_relative_path) metrics = cgroup.get_controller_metrics()
for metric in metrics:
if cpu_cgroup_path is not None: CGroupsTelemetry.stop_tracking(metric)
CGroupsTelemetry.stop_tracking(CpuCgroup(extension_name, cpu_cgroup_path))
if memory_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(MemoryCgroup(extension_name, memory_cgroup_path))
except Exception as exception: except Exception as exception:
log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False) log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False)

Просмотреть файл

@ -17,7 +17,7 @@ import errno
import threading import threading
from azurelinuxagent.common import logger from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import CpuCgroup from azurelinuxagent.ga.controllermetrics import CpuMetrics
from azurelinuxagent.common.future import ustr from azurelinuxagent.common.future import ustr
@ -41,7 +41,7 @@ class CGroupsTelemetry(object):
""" """
Adds the given item to the dictionary of tracked cgroups Adds the given item to the dictionary of tracked cgroups
""" """
if isinstance(cgroup, CpuCgroup): if isinstance(cgroup, CpuMetrics):
# set the current cpu usage # set the current cpu usage
cgroup.initialize_cpu_usage() cgroup.initialize_cpu_usage()

Просмотреть файл

@ -25,7 +25,7 @@ from azurelinuxagent.ga import logcollector, cgroupconfigurator
import azurelinuxagent.common.conf as conf import azurelinuxagent.common.conf as conf
from azurelinuxagent.common import logger from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import MetricsCounter from azurelinuxagent.ga.controllermetrics import MetricsCounter
from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation, report_metric from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation, report_metric
from azurelinuxagent.common.future import ustr from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.interfaces import ThreadHandlerInterface from azurelinuxagent.ga.interfaces import ThreadHandlerInterface

Просмотреть файл

@ -88,7 +88,7 @@ class MetricsCounter(object):
re_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n') re_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n')
class CGroup(object): class ControllerMetrics(object):
def __init__(self, name, cgroup_path): def __init__(self, name, cgroup_path):
""" """
Initialize _data collection for the Memory controller Initialize _data collection for the Memory controller
@ -169,10 +169,16 @@ class CGroup(object):
""" """
raise NotImplementedError() raise NotImplementedError()
def get_unit_properties(self):
"""
Returns a list of the unit properties to collect for the controller.
"""
raise NotImplementedError()
class CpuCgroup(CGroup):
class CpuMetrics(ControllerMetrics):
def __init__(self, name, cgroup_path): def __init__(self, name, cgroup_path):
super(CpuCgroup, self).__init__(name, cgroup_path) super(CpuMetrics, self).__init__(name, cgroup_path)
self._osutil = get_osutil() self._osutil = get_osutil()
self._previous_cgroup_cpu = None self._previous_cgroup_cpu = None
@ -306,10 +312,13 @@ class CpuCgroup(CGroup):
return tracked return tracked
def get_unit_properties(self):
return ["CPUAccounting", "CPUQuotaPerSecUSec"]
class MemoryCgroup(CGroup):
class MemoryMetrics(ControllerMetrics):
def __init__(self, name, cgroup_path): def __init__(self, name, cgroup_path):
super(MemoryCgroup, self).__init__(name, cgroup_path) super(MemoryMetrics, self).__init__(name, cgroup_path)
self._counter_not_found_error_count = 0 self._counter_not_found_error_count = 0
@ -390,3 +399,6 @@ class MemoryCgroup(CGroup):
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, self.name, MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, self.name,
self.try_swap_memory_usage(), _REPORT_EVERY_HOUR) self.try_swap_memory_usage(), _REPORT_EVERY_HOUR)
] ]
def get_unit_properties(self):
return["MemoryAccounting"]

Просмотреть файл

@ -31,7 +31,7 @@ from azurelinuxagent.common.future import ustr
TELEMETRY_MESSAGE_MAX_LEN = 3200 TELEMETRY_MESSAGE_MAX_LEN = 3200
def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup): def wait_for_process_completion_or_timeout(process, timeout, cpu_metrics):
""" """
Utility function that waits for the process to complete within the given time frame. This function will terminate Utility function that waits for the process to complete within the given time frame. This function will terminate
the process if when the given time frame elapses. the process if when the given time frame elapses.
@ -47,7 +47,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup):
throttled_time = 0 throttled_time = 0
if timeout == 0: if timeout == 0:
throttled_time = get_cpu_throttled_time(cpu_cgroup) throttled_time = get_cpu_throttled_time(cpu_metrics)
os.killpg(os.getpgid(process.pid), signal.SIGKILL) os.killpg(os.getpgid(process.pid), signal.SIGKILL)
else: else:
# process completed or forked; sleep 1 sec to give the child process (if any) a chance to start # process completed or forked; sleep 1 sec to give the child process (if any) a chance to start
@ -57,7 +57,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup):
return timeout == 0, return_code, throttled_time return timeout == 0, return_code, throttled_time
def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_cgroup=None): def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_metrics=None):
""" """
Utility function that waits for process completion and retrieves its output (stdout and stderr) if it completed Utility function that waits for process completion and retrieves its output (stdout and stderr) if it completed
before the timeout period. Otherwise, the process will get killed and an ExtensionError will be raised. before the timeout period. Otherwise, the process will get killed and an ExtensionError will be raised.
@ -68,15 +68,15 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c
:param stdout: Must be a file since we seek on it when parsing the subprocess output :param stdout: Must be a file since we seek on it when parsing the subprocess output
:param stderr: Must be a file since we seek on it when parsing the subprocess outputs :param stderr: Must be a file since we seek on it when parsing the subprocess outputs
:param error_code: The error code to set if we raise an ExtensionError :param error_code: The error code to set if we raise an ExtensionError
:param cpu_cgroup: Reference the cpu cgroup name and path :param cpu_metrics: References the cpu metrics for the cgroup
:return: :return:
""" """
# Wait for process completion or timeout # Wait for process completion or timeout
timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup) timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_metrics)
process_output = read_output(stdout, stderr) process_output = read_output(stdout, stderr)
if timed_out: if timed_out:
if cpu_cgroup is not None: # Report CPUThrottledTime when timeout happens if cpu_metrics is not None: # Report CPUThrottledTime when timeout happens
raise ExtensionError("Timeout({0});CPUThrottledTime({1}secs): {2}\n{3}".format(timeout, throttled_time, command, process_output), raise ExtensionError("Timeout({0});CPUThrottledTime({1}secs): {2}\n{3}".format(timeout, throttled_time, command, process_output),
code=ExtensionErrorCodes.PluginHandlerScriptTimedout) code=ExtensionErrorCodes.PluginHandlerScriptTimedout)
@ -211,14 +211,14 @@ def format_stdout_stderr(stdout, stderr):
return to_s(stdout, -1*max_len_each, stderr, -1*max_len_each) return to_s(stdout, -1*max_len_each, stderr, -1*max_len_each)
def get_cpu_throttled_time(cpu_cgroup): def get_cpu_throttled_time(cpu_metrics):
""" """
return the throttled time for the given cgroup. return the throttled time for the given cgroup.
""" """
throttled_time = 0 throttled_time = 0
if cpu_cgroup is not None: if cpu_metrics is not None:
try: try:
throttled_time = cpu_cgroup.get_cpu_throttled_time(read_previous_throttled_time=False) throttled_time = cpu_metrics.get_cpu_throttled_time(read_previous_throttled_time=False)
except Exception as e: except Exception as e:
logger.warn("Failed to get cpu throttled time for the extension: {0}", ustr(e)) logger.warn("Failed to get cpu throttled time for the extension: {0}", ustr(e))

Просмотреть файл

@ -22,7 +22,7 @@ import threading
import azurelinuxagent.common.conf as conf import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.networkutil as networkutil import azurelinuxagent.common.utils.networkutil as networkutil
from azurelinuxagent.ga.cgroup import MetricValue, MetricsCategory, MetricsCounter from azurelinuxagent.ga.controllermetrics import MetricValue, MetricsCategory, MetricsCounter
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.errorstate import ErrorState from azurelinuxagent.common.errorstate import ErrorState

Просмотреть файл

@ -19,7 +19,7 @@ import shutil
import subprocess import subprocess
import tempfile import tempfile
from azurelinuxagent.ga.cgroup import CpuCgroup from azurelinuxagent.ga.controllermetrics import CpuMetrics
from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes
from azurelinuxagent.common.future import ustr from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.extensionprocessutil import format_stdout_stderr, read_output, \ from azurelinuxagent.ga.extensionprocessutil import format_stdout_stderr, read_output, \
@ -52,7 +52,7 @@ class TestProcessUtils(AgentTestCase):
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE) stderr=subprocess.PIPE)
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_cgroup=None) timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None)
self.assertEqual(timed_out, False) self.assertEqual(timed_out, False)
self.assertEqual(ret, 0) self.assertEqual(ret, 0)
@ -70,7 +70,8 @@ class TestProcessUtils(AgentTestCase):
# We don't actually mock the kill, just wrap it so we can assert its call count # We don't actually mock the kill, just wrap it so we can assert its call count
with patch('azurelinuxagent.ga.extensionprocessutil.os.killpg', wraps=os.killpg) as patch_kill: with patch('azurelinuxagent.ga.extensionprocessutil.os.killpg', wraps=os.killpg) as patch_kill:
with patch('time.sleep') as mock_sleep: with patch('time.sleep') as mock_sleep:
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout, cpu_cgroup=None) timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout,
cpu_metrics=None)
# We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
# we're "waiting" the correct amount of time before killing the process # we're "waiting" the correct amount of time before killing the process
@ -89,7 +90,7 @@ class TestProcessUtils(AgentTestCase):
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE) stderr=subprocess.PIPE)
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_cgroup=None) timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None)
self.assertEqual(timed_out, False) self.assertEqual(timed_out, False)
self.assertEqual(ret, 2) self.assertEqual(ret, 2)
@ -105,12 +106,8 @@ class TestProcessUtils(AgentTestCase):
stderr=stderr, stderr=stderr,
preexec_fn=os.setsid) preexec_fn=os.setsid)
process_output = handle_process_completion(process=process, process_output = handle_process_completion(process=process, command=command, timeout=5, stdout=stdout,
command=command, stderr=stderr, error_code=42)
timeout=5,
stdout=stdout,
stderr=stderr,
error_code=42)
expected_output = "[stdout]\ndummy stdout\n\n\n[stderr]\ndummy stderr\n" expected_output = "[stdout]\ndummy stdout\n\n\n[stderr]\ndummy stderr\n"
self.assertEqual(process_output, expected_output) self.assertEqual(process_output, expected_output)
@ -130,12 +127,8 @@ class TestProcessUtils(AgentTestCase):
stderr=stderr, stderr=stderr,
preexec_fn=os.setsid) preexec_fn=os.setsid)
handle_process_completion(process=process, handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
command=command, stderr=stderr, error_code=42)
timeout=timeout,
stdout=stdout,
stderr=stderr,
error_code=42)
# We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
# we're "waiting" the correct amount of time before killing the process and raising an exception # we're "waiting" the correct amount of time before killing the process and raising an exception
@ -158,7 +151,7 @@ class TestProcessUtils(AgentTestCase):
test_file = os.path.join(self.tmp_dir, "cpu.stat") test_file = os.path.join(self.tmp_dir, "cpu.stat")
shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"), shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"),
test_file) # throttled_time = 50 test_file) # throttled_time = 50
cgroup = CpuCgroup("test", self.tmp_dir) cgroup = CpuMetrics("test", self.tmp_dir)
process = subprocess.Popen(command, # pylint: disable=subprocess-popen-preexec-fn process = subprocess.Popen(command, # pylint: disable=subprocess-popen-preexec-fn
shell=True, shell=True,
cwd=self.tmp_dir, cwd=self.tmp_dir,
@ -167,13 +160,8 @@ class TestProcessUtils(AgentTestCase):
stderr=stderr, stderr=stderr,
preexec_fn=os.setsid) preexec_fn=os.setsid)
handle_process_completion(process=process, handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
command=command, stderr=stderr, error_code=42, cpu_metrics=cgroup)
timeout=timeout,
stdout=stdout,
stderr=stderr,
error_code=42,
cpu_cgroup=cgroup)
# We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
# we're "waiting" the correct amount of time before killing the process and raising an exception # we're "waiting" the correct amount of time before killing the process and raising an exception
@ -200,11 +188,7 @@ class TestProcessUtils(AgentTestCase):
stderr=stderr, stderr=stderr,
preexec_fn=os.setsid) preexec_fn=os.setsid)
handle_process_completion(process=process, handle_process_completion(process=process, command=command, timeout=4, stdout=stdout, stderr=stderr,
command=command,
timeout=4,
stdout=stdout,
stderr=stderr,
error_code=error_code) error_code=error_code)
self.assertEqual(context_manager.exception.code, error_code) self.assertEqual(context_manager.exception.code, error_code)

Просмотреть файл

@ -0,0 +1,3 @@
123
234
345

Просмотреть файл

@ -24,10 +24,11 @@ import tempfile
from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga.cgroupapi import SystemdCgroupApiv1, SystemdCgroupApiv2, CGroupUtil, get_cgroup_api, \ from azurelinuxagent.ga.cgroupapi import SystemdCgroupApiv1, SystemdCgroupApiv2, CGroupUtil, get_cgroup_api, \
InvalidCgroupMountpointException InvalidCgroupMountpointException, CgroupV1, CgroupV2
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.osutil import systemd
from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics
from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, mock_cgroup_v2_environment, \ from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, mock_cgroup_v2_environment, \
mock_cgroup_hybrid_environment mock_cgroup_hybrid_environment
from tests.lib.mock_environment import MockCommand from tests.lib.mock_environment import MockCommand
@ -85,7 +86,7 @@ class CGroupUtilTestCase(AgentTestCase):
class SystemdCgroupsApiTestCase(AgentTestCase): class SystemdCgroupsApiTestCase(AgentTestCase):
def test_get_cgroup_api_raises_exception_when_systemd_mount_point_does_not_exist(self): def test_get_cgroup_api_raises_exception_when_systemd_mountpoint_does_not_exist(self):
with mock_cgroup_v1_environment(self.tmp_dir): with mock_cgroup_v1_environment(self.tmp_dir):
# Mock os.path.exists to return False for the os.path.exists(CGROUP_FILE_SYSTEM_ROOT) check # Mock os.path.exists to return False for the os.path.exists(CGROUP_FILE_SYSTEM_ROOT) check
with patch("os.path.exists", return_value=False): with patch("os.path.exists", return_value=False):
@ -151,106 +152,16 @@ class SystemdCgroupsApiTestCase(AgentTestCase):
class SystemdCgroupsApiv1TestCase(AgentTestCase): class SystemdCgroupsApiv1TestCase(AgentTestCase):
def test_get_unit_cgroup_paths_should_return_the_cgroup_v1_mount_points(self): def test_get_controller_mountpoints_should_return_only_supported_controllers(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service',
"The mount point for the CPU controller is incorrect")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service',
"The mount point for the memory controller is incorrect")
def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service',
"The mount point for the CPU controller is incorrect")
self.assertIsNone(memory,
"The mount point for the memory controller is None so unit cgroup should be None")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertIsNone(cpu, "The mount point for the cpu controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service',
"The mount point for the memory controller is incorrect")
def test_get_process_cgroup_paths_should_return_the_cgroup_v1_mount_points(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
"The mount point for the CPU controller is incorrect")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service',
"The mount point for the memory controller is incorrect")
def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
"The mount point for the CPU controller is incorrect")
self.assertIsNone(memory,
"The mount point for the memory controller is None so unit cgroup should be None")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIsNone(cpu, "The mount point for the CPU controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service',
"The mount point for the memory controller is incorrect")
def test_get_process_cgroup_v1_path_should_return_None_if_either_relative_path_is_None(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=('system.slice/walinuxagent.service', None)):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
"The mount point for the CPU controller is incorrect")
self.assertIsNone(memory,
"The relative cgroup path for the memory controller is None so unit cgroup should be None")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=(None, 'system.slice/walinuxagent.service')):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service',
"The mount point for the memory controller is incorrect")
def test_get_controller_root_paths_should_return_the_cgroup_v1_controller_mount_points(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the CPU controller is incorrect")
self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect")
def test_get_controller_root_paths_should_return_None_if_either_controller_not_mounted(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory', 'io': '/sys/fs/cgroup/io'}):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertIsNone(cpu, "The CPU controller is mot mounted, so the cpu controller path should be None")
self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'io': '/sys/fs/cgroup/io'}):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertIsNone(memory, "The memory controller is mot mounted, so the memory controller path should be None")
self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the cpu controller is incorrect")
def test_get_controller_mountpoints_should_return_all_controller_mount_points(self):
with mock_cgroup_v1_environment(self.tmp_dir): with mock_cgroup_v1_environment(self.tmp_dir):
cgroup_api = get_cgroup_api() cgroup_api = get_cgroup_api()
# Expected value comes from findmnt output in the mocked environment # Expected value comes from findmnt output in the mocked environment
self.assertEqual(cgroup_api._get_controller_mountpoints(), { self.assertEqual(cgroup_api._get_controller_mountpoints(), {
'systemd': '/sys/fs/cgroup/systemd',
'devices': '/sys/fs/cgroup/devices',
'rdma': '/sys/fs/cgroup/rdma',
'perf_event': '/sys/fs/cgroup/perf_event',
'net_cls,net_prio': '/sys/fs/cgroup/net_cls,net_prio',
'blkio': '/sys/fs/cgroup/blkio',
'cpuset': '/sys/fs/cgroup/cpuset',
'misc': '/sys/fs/cgroup/misc',
'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct',
'memory': '/sys/fs/cgroup/memory', 'memory': '/sys/fs/cgroup/memory'
'freezer': '/sys/fs/cgroup/freezer',
'hugetlb': '/sys/fs/cgroup/hugetlb',
'pids': '/sys/fs/cgroup/pids',
}, "The controller mountpoints are not correct") }, "The controller mountpoints are not correct")
def test_are_mountpoints_systemd_created_should_return_False_if_cpu_or_memory_are_not_systemd_mountpoints(self): def test_are_mountpoints_systemd_created_should_return_False_if_mountpoints_are_not_systemd(self):
with mock_cgroup_v1_environment(self.tmp_dir): with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/custom/mountpoint/path', 'memory': '/custom/mountpoint/path'}): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/custom/mountpoint/path', 'memory': '/custom/mountpoint/path'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
@ -261,23 +172,123 @@ class SystemdCgroupsApiv1TestCase(AgentTestCase):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/custom/mountpoint/path'}): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/custom/mountpoint/path'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
def test_are_mountpoints_systemd_created_should_return_True_if_cpu_and_memory_are_systemd_mountpoints(self): def test_are_mountpoints_systemd_created_should_return_True_if_mountpoints_are_systemd(self):
with mock_cgroup_v1_environment(self.tmp_dir): with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup', 'memory': '/sys/fs/cgroup'}): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
# are_mountpoints_systemd_created should only check controllers which are mounted # are_mountpoints_systemd_created should only check controllers which are mounted
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup'}): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup'}): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created()) self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
def test_get_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self):
with mock_cgroup_v1_environment(self.tmp_dir): with mock_cgroup_v1_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self') relative_paths = get_cgroup_api()._get_process_relative_controller_paths('self')
self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect") self.assertEqual(len(relative_paths), 2)
self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect") self.assertEqual(relative_paths.get('cpu,cpuacct'), "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect")
self.assertEqual(relative_paths.get('memory'), "system.slice/walinuxagent.service", "The relative memory for the memory cgroup is incorrect")
def test_get_unit_cgroup_should_return_correct_paths_for_cgroup_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct':'/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service', 'memory': '/sys/fs/cgroup/memory/system.slice/extension.service'})
def test_get_unit_cgroup_should_return_only_mounted_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct':'/sys/fs/cgroup/cpu,cpuacct'})
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'})
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._controller_mountpoints, {})
self.assertEqual(cgroup._controller_paths, {})
def test_get_cgroup_from_relative_path_should_return_the_correct_paths_for_cgroup_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertEqual(cgroup._controller_mountpoints,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/some/relative/path',
'memory': '/sys/fs/cgroup/memory/some/relative/path'})
def test_get_cgroup_from_relative_path_should_return_only_mounted_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertEqual(cgroup._controller_mountpoints,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'})
self.assertEqual(cgroup._controller_paths,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/some/relative/path'})
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertEqual(cgroup._controller_mountpoints, {})
self.assertEqual(cgroup._controller_paths, {})
def test_get_process_cgroup_should_return_the_correct_paths_for_cgroup_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
'memory': '/sys/fs/cgroup/memory/system.slice/walinuxagent.service'})
def test_get_process_cgroup_should_return_only_mounted_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'})
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service'})
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints, {})
self.assertEqual(cgroup._controller_paths, {})
def test_get_process_cgroup_should_return_only_mounted_process_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'relative/path'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/relative/path'})
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths, {})
@patch('time.sleep', side_effect=lambda _: mock_sleep()) @patch('time.sleep', side_effect=lambda _: mock_sleep())
def test_start_extension_cgroups_v1_command_should_return_the_command_output(self, _): def test_start_extension_cgroups_v1_command_should_return_the_command_output(self, _):
@ -354,17 +365,6 @@ class SystemdCgroupsApiv1TestCase(AgentTestCase):
class SystemdCgroupsApiv2TestCase(AgentTestCase): class SystemdCgroupsApiv2TestCase(AgentTestCase):
def test_get_controllers_enabled_at_root_should_return_list_of_enabled_controllers(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._get_controllers_enabled_at_root('/sys/fs/cgroup'), ['cpuset', 'cpu', 'io', 'memory', 'pids'])
def test_get_controllers_enabled_at_root_should_return_empty_list_if_root_cgroup_path_is_None(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None):
cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._controllers_enabled_at_root, [])
def test_get_root_cgroup_path_should_return_v2_cgroup_root(self): def test_get_root_cgroup_path_should_return_v2_cgroup_root(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
cgroup_api = get_cgroup_api() cgroup_api = get_cgroup_api()
@ -374,97 +374,113 @@ class SystemdCgroupsApiv2TestCase(AgentTestCase):
with mock_cgroup_v2_environment(self.tmp_dir) as env: with mock_cgroup_v2_environment(self.tmp_dir) as env:
# Mock an environment which has multiple v2 mountpoints # Mock an environment which has multiple v2 mountpoints
env.add_command(MockCommand(r"^findmnt -t cgroup2 --noheadings$", env.add_command(MockCommand(r"^findmnt -t cgroup2 --noheadings$",
'''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime '''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime
/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime /sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime
/custom/mountpoint/path2 none cgroup2 rw,relatime /custom/mountpoint/path2 none cgroup2 rw,relatime
''')) '''))
cgroup_api = get_cgroup_api() cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._get_root_cgroup_path(), '/sys/fs/cgroup') self.assertEqual(cgroup_api._get_root_cgroup_path(), '/sys/fs/cgroup')
def test_get_unit_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self): def test_get_controllers_enabled_at_root_should_return_list_of_agent_supported_and_enabled_controllers(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") cgroup_api = get_cgroup_api()
self.assertEqual(cpu, '/sys/fs/cgroup/system.slice/extension.service', enabled_controllers = cgroup_api._get_controllers_enabled_at_root('/sys/fs/cgroup')
"The cgroup path for the CPU controller is incorrect") self.assertEqual(len(enabled_controllers), 2)
self.assertEqual(memory, '/sys/fs/cgroup/system.slice/extension.service', self.assertIn('cpu', enabled_controllers)
"The cgroup path for the memory controller is incorrect") self.assertIn('memory', enabled_controllers)
def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self): def test_get_controllers_enabled_at_root_should_return_empty_list_if_root_cgroup_path_is_empty(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service") cgroup_api = get_cgroup_api()
self.assertIn(cpu, '/sys/fs/cgroup/system.slice/extension.service', self.assertEqual(cgroup_api._controllers_enabled_at_root, [])
"The cgroup path for the CPU controller is incorrect")
self.assertIsNone(memory,
"The cgroup path for the memory controller is None so unit cgroup should be None")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')): def test_get_process_relative_cgroup_path_should_return_relative_path(self):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertIsNone(cpu, "The cgroup path for the cpu controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/system.slice/extension.service',
"The cgroup path for the memory controller is incorrect")
def test_get_process_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") cgroup_api = get_cgroup_api()
self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service', self.assertEqual(cgroup_api._get_process_relative_cgroup_path(process_id="self"), "system.slice/walinuxagent.service")
"The cgroup path for the CPU controller is incorrect")
self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service',
"The cgroup path for the memory controller is incorrect")
def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self): def test_get_unit_cgroup_should_return_correct_paths_for_cgroup_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)): cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") self.assertIsInstance(cgroup, CgroupV2)
self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service', self.assertEqual(cgroup._cgroup_name, "extension")
"The cgroup path for the CPU controller is incorrect") self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertIsNone(memory, self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service")
"The cgroup path for the memory controller is None so unit cgroup should be None") self.assertEqual(len(cgroup._enabled_controllers), 2)
self.assertIn('cpu', cgroup._enabled_controllers)
self.assertIn('memory', cgroup._enabled_controllers)
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')): def test_get_unit_cgroup_should_return_empty_paths_if_root_path_empty_v2(self):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIsNone(cpu, "The cgroup path for the CPU controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service',
"The cgroup path for the memory controller is incorrect")
def test_get_process_cgroup_v2_path_should_return_None_if_relative_path_is_None(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_process_cgroup_relative_paths', return_value=(None, None)): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self") cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None") self.assertIsInstance(cgroup, CgroupV2)
self.assertIsNone(memory, self.assertEqual(cgroup._cgroup_name, "extension")
"The relative cgroup path for the memory controller is None so unit cgroup should be None") self.assertEqual(cgroup._root_cgroup_path, "")
self.assertEqual(cgroup._cgroup_path, "")
self.assertEqual(len(cgroup._enabled_controllers), 0)
def test_get_controller_root_paths_should_return_the_cgroup_v2_root_cgroup_path(self): def test_get_unit_cgroup_should_return_only_enabled_controllers_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_controller_root_paths() with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['cpu']):
self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect") cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect") self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service")
self.assertEqual(len(cgroup._enabled_controllers), 1)
self.assertIn('cpu', cgroup._enabled_controllers)
def test_get_controller_root_paths_should_return_None_if_root_cgroup_path_is_None(self): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=[]):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service")
self.assertEqual(len(cgroup._enabled_controllers), 0)
def test_get_cgroup_from_relative_path_should_return_the_correct_paths_for_cgroup_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None): cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
cpu, memory = get_cgroup_api().get_controller_root_paths() self.assertIsInstance(cgroup, CgroupV2)
self.assertIsNone(cpu, "The root cgroup path is None, so the CPU controller path should be None") self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertIsNone(memory, "The root cgroup path is None, so the memory controller path should be None") self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/some/relative/path")
self.assertEqual(len(cgroup._enabled_controllers), 2)
self.assertIn('cpu', cgroup._enabled_controllers)
self.assertIn('memory', cgroup._enabled_controllers)
def test_get_controller_root_paths_should_return_None_if_either_controller_not_enabled(self): def test_get_cgroup_from_relative_path_should_return_empty_paths_if_root_path_empty_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['io', 'memory']): with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cpu, memory = get_cgroup_api().get_controller_root_paths() cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
self.assertIsNone(cpu, "The CPU controller is not enabled, so the CPU controller path should be None") self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect") self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertEqual(cgroup._root_cgroup_path, "")
self.assertEqual(cgroup._cgroup_path, "")
self.assertEqual(len(cgroup._enabled_controllers), 0)
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['cpu', 'io']): def test_get_process_cgroup_should_return_the_correct_paths_for_cgroup_v2(self):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect")
self.assertIsNone(memory, "The memory controller is not enabled, so the memory controller path should be None")
def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v2_relative_paths(self):
with mock_cgroup_v2_environment(self.tmp_dir): with mock_cgroup_v2_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self') cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect") self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect") self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/walinuxagent.service")
self.assertEqual(len(cgroup._enabled_controllers), 2)
self.assertIn('cpu', cgroup._enabled_controllers)
self.assertIn('memory', cgroup._enabled_controllers)
def test_get_process_cgroup_should_return_empty_paths_if_root_path_empty_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._root_cgroup_path, "")
self.assertEqual(cgroup._cgroup_path, "")
self.assertEqual(len(cgroup._enabled_controllers), 0)
class SystemdCgroupsApiMockedFileSystemTestCase(_MockedFileSystemTestCase): class SystemdCgroupsApiMockedFileSystemTestCase(_MockedFileSystemTestCase):
@ -483,3 +499,176 @@ class SystemdCgroupsApiMockedFileSystemTestCase(_MockedFileSystemTestCase):
self.assertEqual(legacy_cgroups, 2, "cleanup_legacy_cgroups() did not find all the expected cgroups") self.assertEqual(legacy_cgroups, 2, "cleanup_legacy_cgroups() did not find all the expected cgroups")
self.assertFalse(os.path.exists(legacy_cpu_cgroup), "cleanup_legacy_cgroups() did not remove the CPU legacy cgroup") self.assertFalse(os.path.exists(legacy_cpu_cgroup), "cleanup_legacy_cgroups() did not remove the CPU legacy cgroup")
self.assertFalse(os.path.exists(legacy_memory_cgroup), "cleanup_legacy_cgroups() did not remove the memory legacy cgroup") self.assertFalse(os.path.exists(legacy_memory_cgroup), "cleanup_legacy_cgroups() did not remove the memory legacy cgroup")
class CgroupsApiv1TestCase(AgentTestCase):
def test_get_supported_controllers_returns_v1_controllers(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
controllers = cgroup.get_supported_controllers()
self.assertEqual(len(controllers), 2)
self.assertIn('cpu,cpuacct', controllers)
self.assertIn('memory', controllers)
def test_check_in_expected_slice_returns_True_if_all_paths_in_expected_slice(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
def test_check_in_expected_slice_returns_False_if_any_paths_not_in_expected_slice(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service', 'memory': 'user.slice/walinuxagent.service'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': '', 'memory': ''}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice'))
def test_get_controller_metrics_returns_all_supported_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics()
self.assertEqual(len(metrics), 2)
self.assertIsInstance(metrics[0], CpuMetrics)
self.assertEqual(metrics[0].name, "walinuxagent")
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service")
self.assertIsInstance(metrics[1], MemoryMetrics)
self.assertEqual(metrics[1].name, "walinuxagent")
self.assertEqual(metrics[1].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service")
def test_get_controller_metrics_returns_only_mounted_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics()
self.assertEqual(len(metrics), 1)
self.assertIsInstance(metrics[0], CpuMetrics)
self.assertEqual(metrics[0].name, "walinuxagent")
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics()
self.assertEqual(len(metrics), 1)
self.assertIsInstance(metrics[0], MemoryMetrics)
self.assertEqual(metrics[0].name, "walinuxagent")
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics()
self.assertEqual(len(metrics), 0)
def test_get_controller_metrics_returns_only_controllers_at_expected_path_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service', 'memory': 'unexpected/path'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service")
self.assertEqual(len(metrics), 1)
self.assertIsInstance(metrics[0], CpuMetrics)
self.assertEqual(metrics[0].name, "walinuxagent")
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'unexpected/path', 'memory': 'unexpected/path'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service")
self.assertEqual(len(metrics), 0)
def test_get_procs_path_returns_correct_path_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs_path = cgroup.get_controller_procs_path(controller='cpu,cpuacct')
self.assertEqual(procs_path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service/cgroup.procs")
procs_path = cgroup.get_controller_procs_path(controller='memory')
self.assertEqual(procs_path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service/cgroup.procs")
def test_get_processes_returns_processes_at_all_controller_paths_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertEqual(len(procs), 3)
self.assertIn(int(123), procs)
self.assertIn(int(234), procs)
self.assertIn(int(345), procs)
def test_get_processes_returns_empty_list_if_no_controllers_mounted_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertIsInstance(procs, list)
self.assertEqual(len(procs), 0)
def test_get_processes_returns_empty_list_if_procs_path_empty_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.CgroupV1.get_controller_procs_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertIsInstance(procs, list)
self.assertEqual(len(procs), 0)
class CgroupsApiv2TestCase(AgentTestCase):
def test_get_supported_controllers_returns_v2_controllers(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
controllers = cgroup.get_supported_controllers()
self.assertEqual(len(controllers), 2)
self.assertIn('cpu', controllers)
self.assertIn('memory', controllers)
def test_check_in_expected_slice_returns_True_if_cgroup_path_in_expected_slice(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
def test_check_in_expected_slice_returns_False_if_cgroup_path_not_in_expected_slice(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_process_relative_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice'))
def test_get_procs_path_returns_empty_if_root_cgroup_empty_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs_path = cgroup.get_procs_path()
self.assertEqual(procs_path, "")
def test_get_procs_path_returns_correct_path_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs_path = cgroup.get_procs_path()
self.assertEqual(procs_path, "/sys/fs/cgroup/system.slice/walinuxagent.service/cgroup.procs")
def test_get_processes_returns_processes_at_all_controller_paths_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertEqual(len(procs), 3)
self.assertIn(int(123), procs)
self.assertIn(int(234), procs)
self.assertIn(int(345), procs)
def test_get_processes_returns_empty_list_if_root_cgroup_empty_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertEqual(len(procs), 0)

Просмотреть файл

@ -27,7 +27,7 @@ import time
import threading import threading
from azurelinuxagent.common import conf from azurelinuxagent.common import conf
from azurelinuxagent.ga.cgroup import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuCgroup from azurelinuxagent.ga.controllermetrics import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuMetrics
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, DisableCgroups from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, DisableCgroups
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.event import WALAEventOperation from azurelinuxagent.common.event import WALAEventOperation
@ -272,7 +272,7 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \ CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \
'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \ 'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \
CpuCgroup('Microsoft.CPlat.Extension', CpuMetrics('Microsoft.CPlat.Extension',
'/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice') '/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice')
configurator.remove_extension_slice(extension_name="Microsoft.CPlat.Extension") configurator.remove_extension_slice(extension_name="Microsoft.CPlat.Extension")
@ -369,10 +369,10 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
configurator.setup_extension_slice(extension_name=extension_name, cpu_quota=5) configurator.setup_extension_slice(extension_name=extension_name, cpu_quota=5)
configurator.set_extension_services_cpu_memory_quota(service_list) configurator.set_extension_services_cpu_memory_quota(service_list)
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \ CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \
CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \ CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \
'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \ 'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \
CpuCgroup('Microsoft.CPlat.Extension', CpuMetrics('Microsoft.CPlat.Extension',
'/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice') '/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice')
configurator.disable("UNIT TEST", DisableCgroups.ALL) configurator.disable("UNIT TEST", DisableCgroups.ALL)
@ -717,7 +717,7 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
with self._get_cgroup_configurator() as configurator: with self._get_cgroup_configurator() as configurator:
with patch("os.path.exists") as mock_path: with patch("os.path.exists") as mock_path:
mock_path.return_value = True mock_path.return_value = True
CGroupsTelemetry.track_cgroup(CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')) CGroupsTelemetry.track_cgroup(CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'))
configurator.stop_tracking_extension_services_cgroups(service_list) configurator.stop_tracking_extension_services_cgroups(service_list)
tracked = CGroupsTelemetry._tracked tracked = CGroupsTelemetry._tracked
@ -776,7 +776,7 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
with patch("os.path.exists") as mock_path: with patch("os.path.exists") as mock_path:
mock_path.side_effect = side_effect mock_path.side_effect = side_effect
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \ CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \
CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service') CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
configurator.stop_tracking_unit_cgroups("extension.service") configurator.stop_tracking_unit_cgroups("extension.service")
tracked = CGroupsTelemetry._tracked tracked = CGroupsTelemetry._tracked
@ -911,7 +911,7 @@ exit 0
agent_processes = [os.getppid(), os.getpid()] + agent_command_processes + [start_extension.systemd_run_pid] agent_processes = [os.getppid(), os.getpid()] + agent_command_processes + [start_extension.systemd_run_pid]
other_processes = [1, get_completed_process()] + extension_processes other_processes = [1, get_completed_process()] + extension_processes
with patch("azurelinuxagent.ga.cgroupapi._SystemdCgroupApi.get_processes_in_cgroup", return_value=agent_processes + other_processes): with patch("azurelinuxagent.ga.cgroupapi.CgroupV1.get_processes", return_value=agent_processes + other_processes):
with self.assertRaises(CGroupsException) as context_manager: with self.assertRaises(CGroupsException) as context_manager:
configurator._check_processes_in_agent_cgroup() configurator._check_processes_in_agent_cgroup()
@ -1012,7 +1012,7 @@ exit 0
with self.assertRaises(AgentMemoryExceededException) as context_manager: with self.assertRaises(AgentMemoryExceededException) as context_manager:
with self._get_cgroup_configurator() as configurator: with self._get_cgroup_configurator() as configurator:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_tracked_metrics") as tracked_metrics: with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_tracked_metrics") as tracked_metrics:
tracked_metrics.return_value = metrics tracked_metrics.return_value = metrics
configurator.check_agent_memory_usage() configurator.check_agent_memory_usage()

Просмотреть файл

@ -19,7 +19,7 @@ import os
import random import random
import time import time
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils import fileutil
from tests.lib.tools import AgentTestCase, data_dir, patch from tests.lib.tools import AgentTestCase, data_dir, patch
@ -105,10 +105,10 @@ class TestCGroupsTelemetry(AgentTestCase):
@staticmethod @staticmethod
def _track_new_extension_cgroups(num_extensions): def _track_new_extension_cgroups(num_extensions):
for i in range(num_extensions): for i in range(num_extensions):
dummy_cpu_cgroup = CpuCgroup("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i)) dummy_cpu_cgroup = CpuMetrics("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i))
CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)
dummy_memory_cgroup = MemoryCgroup("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i)) dummy_memory_cgroup = MemoryMetrics("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i))
CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)
def _assert_cgroups_are_tracked(self, num_extensions): def _assert_cgroups_are_tracked(self, num_extensions):
@ -136,12 +136,12 @@ class TestCGroupsTelemetry(AgentTestCase):
self._track_new_extension_cgroups(num_extensions) self._track_new_extension_cgroups(num_extensions)
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage: with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage:
with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage:
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
patch_is_active.return_value = True patch_is_active.return_value = True
current_cpu = 30 current_cpu = 30
@ -163,10 +163,10 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected) self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected)
self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory, current_swap_memory) self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory, current_swap_memory)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror) @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active", return_value=False) @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active", return_value=False)
def test_telemetry_polling_with_inactive_cgroups(self, *_): def test_telemetry_polling_with_inactive_cgroups(self, *_):
num_extensions = 5 num_extensions = 5
no_extensions_expected = 0 # pylint: disable=unused-variable no_extensions_expected = 0 # pylint: disable=unused-variable
@ -182,10 +182,10 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertEqual(len(metrics), 0) self.assertEqual(len(metrics), 0)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage")
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage")
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active") @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active")
def test_telemetry_polling_with_changing_cgroups_state(self, patch_is_active, patch_get_cpu_usage, # pylint: disable=unused-argument def test_telemetry_polling_with_changing_cgroups_state(self, patch_is_active, patch_get_cpu_usage, # pylint: disable=unused-argument
patch_get_mem, patch_get_max_mem, *args): patch_get_mem, patch_get_max_mem, *args):
num_extensions = 5 num_extensions = 5
@ -274,11 +274,11 @@ class TestCGroupsTelemetry(AgentTestCase):
CGroupsTelemetry.poll_all_tracked() CGroupsTelemetry.poll_all_tracked()
self.assertEqual(expected_call_count, patch_periodic_warn.call_count) self.assertEqual(expected_call_count, patch_periodic_warn.call_count)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage")
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage")
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage")
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active") @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active")
def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, patch_try_memory_swap_usage, def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, patch_try_memory_swap_usage,
*args): # pylint: disable=unused-argument *args): # pylint: disable=unused-argument
num_polls = 10 num_polls = 10
@ -321,13 +321,13 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertFalse(CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path")) self.assertFalse(CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path"))
self.assertFalse(CGroupsTelemetry.is_tracked("not_present_memory_dummy_path")) self.assertFalse(CGroupsTelemetry.is_tracked("not_present_memory_dummy_path"))
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror)
def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # pylint: disable=unused-argument def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # pylint: disable=unused-argument
num_extensions = 5 num_extensions = 5
self._track_new_extension_cgroups(num_extensions) self._track_new_extension_cgroups(num_extensions)
with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage: with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage:
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
patch_is_active.return_value = True patch_is_active.return_value = True
current_cpu = 30 current_cpu = 30
@ -341,16 +341,16 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated
self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0) self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0)
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror)
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylint: disable=unused-argument def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylint: disable=unused-argument
num_extensions = 5 num_extensions = 5
self._track_new_extension_cgroups(num_extensions) self._track_new_extension_cgroups(num_extensions)
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage: with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage: with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage: with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage:
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
patch_is_active.return_value = True patch_is_active.return_value = True
current_memory = 209715200 current_memory = 209715200
@ -367,14 +367,14 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertEqual(len(metrics), num_extensions * 3) self.assertEqual(len(metrics), num_extensions * 3)
self._assert_polled_metrics_equal(metrics, 0, current_memory, current_max_memory, current_swap_memory) self._assert_polled_metrics_equal(metrics, 0, current_memory, current_max_memory, current_swap_memory)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror) @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror)
def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # pylint: disable=unused-argument def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # pylint: disable=unused-argument
num_extensions = 5 num_extensions = 5
self._track_new_extension_cgroups(num_extensions) self._track_new_extension_cgroups(num_extensions)
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active: with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
patch_is_active.return_value = False patch_is_active.return_value = False
poll_count = 1 poll_count = 1
@ -383,9 +383,9 @@ class TestCGroupsTelemetry(AgentTestCase):
metrics = CGroupsTelemetry.poll_all_tracked() metrics = CGroupsTelemetry.poll_all_tracked()
self.assertEqual(0, len(metrics)) self.assertEqual(0, len(metrics))
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_throttled_time") @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_throttled_time")
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active") @patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active")
def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_active, path_get_throttled_time, patch_get_cpu_usage): def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_active, path_get_throttled_time, patch_get_cpu_usage):
num_polls = 5 num_polls = 5
@ -396,7 +396,7 @@ class TestCGroupsTelemetry(AgentTestCase):
cpu_percent_values.append(-1) cpu_percent_values.append(-1)
cpu_throttled_values = [random.randint(0, 60 * 60) for _ in range(num_polls)] cpu_throttled_values = [random.randint(0, 60 * 60) for _ in range(num_polls)]
dummy_cpu_cgroup = CpuCgroup("dummy_extension_name", "dummy_cpu_path") dummy_cpu_cgroup = CpuMetrics("dummy_extension_name", "dummy_cpu_path")
CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)
self.assertEqual(1, len(CGroupsTelemetry._tracked)) self.assertEqual(1, len(CGroupsTelemetry._tracked))

Просмотреть файл

@ -18,7 +18,7 @@ import contextlib
import os import os
from azurelinuxagent.common import logger, conf from azurelinuxagent.common import logger, conf
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
from azurelinuxagent.common.logger import Logger from azurelinuxagent.common.logger import Logger
from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.protocol.util import ProtocolUtil
@ -197,8 +197,8 @@ def _create_log_collector_monitor_handler(iterations=1):
monitor_log_collector.join() monitor_log_collector.join()
cgroups = [ cgroups = [
CpuCgroup("test", "dummy_cpu_path"), CpuMetrics("test", "dummy_cpu_path"),
MemoryCgroup("test", "dummy_memory_path") MemoryMetrics("test", "dummy_memory_path")
] ]
monitor_log_collector = get_log_collector_monitor_handler(cgroups) monitor_log_collector = get_log_collector_monitor_handler(cgroups)
monitor_log_collector.run_and_wait = run_and_wait monitor_log_collector.run_and_wait = run_and_wait

Просмотреть файл

@ -22,7 +22,7 @@ import os
import random import random
import shutil import shutil
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricsCounter, CounterNotFound from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricsCounter, CounterNotFound
from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils import fileutil
@ -36,35 +36,35 @@ def consume_cpu_time():
return waste return waste
class TestCGroup(AgentTestCase): class TestControllerMetrics(AgentTestCase):
def test_is_active(self): def test_is_active(self):
test_cgroup = CpuCgroup("test_extension", self.tmp_dir) test_metrics = CpuMetrics("test_extension", self.tmp_dir)
self.assertEqual(False, test_cgroup.is_active()) self.assertEqual(False, test_metrics.is_active())
with open(os.path.join(self.tmp_dir, "tasks"), mode="wb") as tasks: with open(os.path.join(self.tmp_dir, "tasks"), mode="wb") as tasks:
tasks.write(str(1000).encode()) tasks.write(str(1000).encode())
self.assertEqual(True, test_cgroup.is_active()) self.assertEqual(True, test_metrics.is_active())
@patch("azurelinuxagent.common.logger.periodic_warn") @patch("azurelinuxagent.common.logger.periodic_warn")
def test_is_active_file_not_present(self, patch_periodic_warn): def test_is_active_file_not_present(self, patch_periodic_warn):
test_cgroup = CpuCgroup("test_extension", self.tmp_dir) test_metrics = CpuMetrics("test_extension", self.tmp_dir)
self.assertEqual(False, test_cgroup.is_active()) self.assertEqual(False, test_metrics.is_active())
test_cgroup = MemoryCgroup("test_extension", os.path.join(self.tmp_dir, "this_cgroup_does_not_exist")) test_metrics = MemoryMetrics("test_extension", os.path.join(self.tmp_dir, "this_cgroup_does_not_exist"))
self.assertEqual(False, test_cgroup.is_active()) self.assertEqual(False, test_metrics.is_active())
self.assertEqual(0, patch_periodic_warn.call_count) self.assertEqual(0, patch_periodic_warn.call_count)
@patch("azurelinuxagent.common.logger.periodic_warn") @patch("azurelinuxagent.common.logger.periodic_warn")
def test_is_active_incorrect_file(self, patch_periodic_warn): def test_is_active_incorrect_file(self, patch_periodic_warn):
open(os.path.join(self.tmp_dir, "tasks"), mode="wb").close() open(os.path.join(self.tmp_dir, "tasks"), mode="wb").close()
test_cgroup = CpuCgroup("test_extension", os.path.join(self.tmp_dir, "tasks")) test_metrics = CpuMetrics("test_extension", os.path.join(self.tmp_dir, "tasks"))
self.assertEqual(False, test_cgroup.is_active()) self.assertEqual(False, test_metrics.is_active())
self.assertEqual(1, patch_periodic_warn.call_count) self.assertEqual(1, patch_periodic_warn.call_count)
class TestCpuCgroup(AgentTestCase): class TestCpuMetrics(AgentTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
AgentTestCase.setUpClass() AgentTestCase.setUpClass()
@ -96,147 +96,147 @@ class TestCpuCgroup(AgentTestCase):
def setUp(self): def setUp(self):
AgentTestCase.setUp(self) AgentTestCase.setUp(self)
TestCpuCgroup.mock_read_file_map.clear() TestCpuMetrics.mock_read_file_map.clear()
def test_initialize_cpu_usage_should_set_current_cpu_usage(self): def test_initialize_cpu_usage_should_set_current_cpu_usage(self):
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
TestCpuCgroup.mock_read_file_map = { TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
} }
cgroup.initialize_cpu_usage() metrics.initialize_cpu_usage()
self.assertEqual(cgroup._current_cgroup_cpu, 63763) self.assertEqual(metrics._current_cgroup_cpu, 63763)
self.assertEqual(cgroup._current_system_cpu, 5496872) self.assertEqual(metrics._current_system_cpu, 5496872)
def test_get_cpu_usage_should_return_the_cpu_usage_since_its_last_invocation(self): def test_get_cpu_usage_should_return_the_cpu_usage_since_its_last_invocation(self):
osutil = get_osutil() osutil = get_osutil()
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
TestCpuCgroup.mock_read_file_map = { TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
} }
cgroup.initialize_cpu_usage() metrics.initialize_cpu_usage()
TestCpuCgroup.mock_read_file_map = { TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t1"), "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t1"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t1") os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t1")
} }
cpu_usage = cgroup.get_cpu_usage() cpu_usage = metrics.get_cpu_usage()
self.assertEqual(cpu_usage, round(100.0 * 0.000307697876885 * osutil.get_processor_cores(), 3)) self.assertEqual(cpu_usage, round(100.0 * 0.000307697876885 * osutil.get_processor_cores(), 3))
TestCpuCgroup.mock_read_file_map = { TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t2"), "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t2"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t2") os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t2")
} }
cpu_usage = cgroup.get_cpu_usage() cpu_usage = metrics.get_cpu_usage()
self.assertEqual(cpu_usage, round(100.0 * 0.000445181085968 * osutil.get_processor_cores(), 3)) self.assertEqual(cpu_usage, round(100.0 * 0.000445181085968 * osutil.get_processor_cores(), 3))
def test_initialize_cpu_usage_should_set_the_cgroup_usage_to_0_when_the_cgroup_does_not_exist(self): def test_initialize_cpu_usage_should_set_the_cgroup_usage_to_0_when_the_cgroup_does_not_exist(self):
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
io_error_2 = IOError() io_error_2 = IOError()
io_error_2.errno = errno.ENOENT # "No such directory" io_error_2.errno = errno.ENOENT # "No such directory"
TestCpuCgroup.mock_read_file_map = { TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
os.path.join(cgroup.path, "cpuacct.stat"): io_error_2 os.path.join(metrics.path, "cpuacct.stat"): io_error_2
} }
cgroup.initialize_cpu_usage() metrics.initialize_cpu_usage()
self.assertEqual(cgroup._current_cgroup_cpu, 0) self.assertEqual(metrics._current_cgroup_cpu, 0)
self.assertEqual(cgroup._current_system_cpu, 5496872) # check the system usage just for test sanity self.assertEqual(metrics._current_system_cpu, 5496872) # check the system usage just for test sanity
def test_initialize_cpu_usage_should_raise_an_exception_when_called_more_than_once(self): def test_initialize_cpu_usage_should_raise_an_exception_when_called_more_than_once(self):
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
TestCpuCgroup.mock_read_file_map = { TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"), "/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0") os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
} }
cgroup.initialize_cpu_usage() metrics.initialize_cpu_usage()
with self.assertRaises(CGroupsException): with self.assertRaises(CGroupsException):
cgroup.initialize_cpu_usage() metrics.initialize_cpu_usage()
def test_get_cpu_usage_should_raise_an_exception_when_initialize_cpu_usage_has_not_been_invoked(self): def test_get_cpu_usage_should_raise_an_exception_when_initialize_cpu_usage_has_not_been_invoked(self):
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test") metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
with self.assertRaises(CGroupsException): with self.assertRaises(CGroupsException):
cpu_usage = cgroup.get_cpu_usage() # pylint: disable=unused-variable cpu_usage = metrics.get_cpu_usage() # pylint: disable=unused-variable
def test_get_throttled_time_should_return_the_value_since_its_last_invocation(self): def test_get_throttled_time_should_return_the_value_since_its_last_invocation(self):
test_file = os.path.join(self.tmp_dir, "cpu.stat") test_file = os.path.join(self.tmp_dir, "cpu.stat")
shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"), test_file) # throttled_time = 50 shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"), test_file) # throttled_time = 50
cgroup = CpuCgroup("test", self.tmp_dir) metrics = CpuMetrics("test", self.tmp_dir)
cgroup.initialize_cpu_usage() metrics.initialize_cpu_usage()
shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t1"), test_file) # throttled_time = 2075541442327 shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t1"), test_file) # throttled_time = 2075541442327
throttled_time = cgroup.get_cpu_throttled_time() throttled_time = metrics.get_cpu_throttled_time()
self.assertEqual(throttled_time, float(2075541442327 - 50) / 1E9, "The value of throttled_time is incorrect") self.assertEqual(throttled_time, float(2075541442327 - 50) / 1E9, "The value of throttled_time is incorrect")
def test_get_tracked_metrics_should_return_the_throttled_time(self): def test_get_tracked_metrics_should_return_the_throttled_time(self):
cgroup = CpuCgroup("test", os.path.join(data_dir, "cgroups")) metrics = CpuMetrics("test", os.path.join(data_dir, "cgroups"))
cgroup.initialize_cpu_usage() metrics.initialize_cpu_usage()
def find_throttled_time(metrics): def find_throttled_time(metrics):
return [m for m in metrics if m.counter == MetricsCounter.THROTTLED_TIME] return [m for m in metrics if m.counter == MetricsCounter.THROTTLED_TIME]
found = find_throttled_time(cgroup.get_tracked_metrics()) found = find_throttled_time(metrics.get_tracked_metrics())
self.assertTrue(len(found) == 0, "get_tracked_metrics should not fetch the throttled time by default. Found: {0}".format(found)) self.assertTrue(len(found) == 0, "get_tracked_metrics should not fetch the throttled time by default. Found: {0}".format(found))
found = find_throttled_time(cgroup.get_tracked_metrics(track_throttled_time=True)) found = find_throttled_time(metrics.get_tracked_metrics(track_throttled_time=True))
self.assertTrue(len(found) == 1, "get_tracked_metrics should have fetched the throttled time by default. Found: {0}".format(found)) self.assertTrue(len(found) == 1, "get_tracked_metrics should have fetched the throttled time by default. Found: {0}".format(found))
class TestMemoryCgroup(AgentTestCase): class TestMemoryMetrics(AgentTestCase):
def test_get_metrics(self): def test_get_metrics(self):
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "memory_mount")) test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "memory_mount"))
memory_usage = test_mem_cg.get_memory_usage() memory_usage = test_mem_metrics.get_memory_usage()
self.assertEqual(150000, memory_usage) self.assertEqual(150000, memory_usage)
max_memory_usage = test_mem_cg.get_max_memory_usage() max_memory_usage = test_mem_metrics.get_max_memory_usage()
self.assertEqual(1000000, max_memory_usage) self.assertEqual(1000000, max_memory_usage)
swap_memory_usage = test_mem_cg.try_swap_memory_usage() swap_memory_usage = test_mem_metrics.try_swap_memory_usage()
self.assertEqual(20000, swap_memory_usage) self.assertEqual(20000, swap_memory_usage)
def test_get_metrics_when_files_not_present(self): def test_get_metrics_when_files_not_present(self):
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups")) test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups"))
with self.assertRaises(IOError) as e: with self.assertRaises(IOError) as e:
test_mem_cg.get_memory_usage() test_mem_metrics.get_memory_usage()
self.assertEqual(e.exception.errno, errno.ENOENT) self.assertEqual(e.exception.errno, errno.ENOENT)
with self.assertRaises(IOError) as e: with self.assertRaises(IOError) as e:
test_mem_cg.get_max_memory_usage() test_mem_metrics.get_max_memory_usage()
self.assertEqual(e.exception.errno, errno.ENOENT) self.assertEqual(e.exception.errno, errno.ENOENT)
with self.assertRaises(IOError) as e: with self.assertRaises(IOError) as e:
test_mem_cg.try_swap_memory_usage() test_mem_metrics.try_swap_memory_usage()
self.assertEqual(e.exception.errno, errno.ENOENT) self.assertEqual(e.exception.errno, errno.ENOENT)
def test_get_memory_usage_counters_not_found(self): def test_get_memory_usage_counters_not_found(self):
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters")) test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters"))
with self.assertRaises(CounterNotFound): with self.assertRaises(CounterNotFound):
test_mem_cg.get_memory_usage() test_mem_metrics.get_memory_usage()
swap_memory_usage = test_mem_cg.try_swap_memory_usage() swap_memory_usage = test_mem_metrics.try_swap_memory_usage()
self.assertEqual(0, swap_memory_usage) self.assertEqual(0, swap_memory_usage)

Просмотреть файл

@ -21,7 +21,7 @@ import random
import string import string
from azurelinuxagent.common import event, logger from azurelinuxagent.common import event, logger
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue, _REPORT_EVERY_HOUR from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue, _REPORT_EVERY_HOUR
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.event import EVENTS_DIRECTORY from azurelinuxagent.common.event import EVENTS_DIRECTORY
from azurelinuxagent.common.protocol.healthservice import HealthService from azurelinuxagent.common.protocol.healthservice import HealthService
@ -222,7 +222,7 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
self.assertEqual(0, patch_add_metric.call_count) self.assertEqual(0, patch_add_metric.call_count)
@patch('azurelinuxagent.common.event.EventLogger.add_metric') @patch('azurelinuxagent.common.event.EventLogger.add_metric')
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") @patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage")
@patch('azurelinuxagent.common.logger.Logger.periodic_warn') @patch('azurelinuxagent.common.logger.Logger.periodic_warn')
def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument
patch_get_memory_usage, patch_get_memory_usage,
@ -231,14 +231,14 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
ioerror.errno = 2 ioerror.errno = 2
patch_get_memory_usage.side_effect = ioerror patch_get_memory_usage.side_effect = ioerror
CGroupsTelemetry._tracked["/test/path"] = MemoryCgroup("cgroup_name", "/test/path") CGroupsTelemetry._tracked["/test/path"] = MemoryMetrics("_cgroup_name", "/test/path")
PollResourceUsage().run() PollResourceUsage().run()
self.assertEqual(0, patch_periodic_warn.call_count) self.assertEqual(0, patch_periodic_warn.call_count)
self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent. self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent.
@patch('azurelinuxagent.common.event.EventLogger.add_metric') @patch('azurelinuxagent.common.event.EventLogger.add_metric')
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") @patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
@patch('azurelinuxagent.common.logger.Logger.periodic_warn') @patch('azurelinuxagent.common.logger.Logger.periodic_warn')
def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument
patch_cpu_usage, patch_add_metric, patch_cpu_usage, patch_add_metric,
@ -247,7 +247,7 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
ioerror.errno = 2 ioerror.errno = 2
patch_cpu_usage.side_effect = ioerror patch_cpu_usage.side_effect = ioerror
CGroupsTelemetry._tracked["/test/path"] = CpuCgroup("cgroup_name", "/test/path") CGroupsTelemetry._tracked["/test/path"] = CpuMetrics("_cgroup_name", "/test/path")
PollResourceUsage().run() PollResourceUsage().run()
self.assertEqual(0, patch_periodic_warn.call_count) self.assertEqual(0, patch_periodic_warn.call_count)

Просмотреть файл

@ -122,7 +122,9 @@ _MOCKED_COMMANDS_HYBRID = [
_MOCKED_FILES_V1 = [ _MOCKED_FILES_V1 = [
("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_self_cgroup')), ("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_self_cgroup')),
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup')) (r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup')),
(r"/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs')),
(r"/sys/fs/cgroup/memory/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs'))
] ]
_MOCKED_FILES_V2 = [ _MOCKED_FILES_V2 = [
@ -130,7 +132,8 @@ _MOCKED_FILES_V2 = [
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v2', 'proc_pid_cgroup')), (r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v2', 'proc_pid_cgroup')),
("/sys/fs/cgroup/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')), ("/sys/fs/cgroup/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')),
("/sys/fs/cgroup/azure.slice/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')), ("/sys/fs/cgroup/azure.slice/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')),
("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty')) ("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty')),
(r"/sys/fs/cgroup/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs'))
] ]
_MOCKED_FILES_HYBRID = [ _MOCKED_FILES_HYBRID = [

Просмотреть файл

@ -24,8 +24,9 @@ from azurelinuxagent.common import conf
from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga import logcollector, cgroupconfigurator from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, InvalidCgroupMountpointException from azurelinuxagent.ga.cgroupapi import InvalidCgroupMountpointException, CgroupV1
from azurelinuxagent.ga.collect_logs import CollectLogsHandler from azurelinuxagent.ga.collect_logs import CollectLogsHandler
from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR
from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment
from tests.lib.tools import AgentTestCase, data_dir, Mock, patch from tests.lib.tools import AgentTestCase, data_dir, Mock, patch
@ -247,16 +248,24 @@ class TestAgent(AgentTestCase):
CollectLogsHandler.enable_monitor_cgroups_check() CollectLogsHandler.enable_monitor_cgroups_check()
mock_log_collector.run = Mock() mock_log_collector.run = Mock()
# Mock cgroup paths so process is in the log collector slice # Mock cgroup so process is in the log collector slice
def mock_cgroup_paths(*args, **kwargs): def mock_cgroup(*args, **kwargs): # pylint: disable=W0613
if args and args[0] == "self": relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) return CgroupV1(
return (relative_path, relative_path) cgroup_name=AGENT_LOG_COLLECTOR,
return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs) controller_mountpoints={
'cpu,cpuacct':"/sys/fs/cgroup/cpu,cpuacct",
'memory':"/sys/fs/cgroup/memory"
},
controller_paths={
'cpu,cpuacct':"/sys/fs/cgroup/cpu,cpuacct/{0}".format(relative_path),
'memory':"/sys/fs/cgroup/memory/{0}".format(relative_path)
}
)
with mock_cgroup_v1_environment(self.tmp_dir): with mock_cgroup_v1_environment(self.tmp_dir):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup",
side_effect=mock_cgroup_paths): side_effect=mock_cgroup):
agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf"))
agent.collect_logs(is_full_mode=True) agent.collect_logs(is_full_mode=True)
@ -296,17 +305,26 @@ class TestAgent(AgentTestCase):
CollectLogsHandler.enable_monitor_cgroups_check() CollectLogsHandler.enable_monitor_cgroups_check()
mock_log_collector.run = Mock() mock_log_collector.run = Mock()
# Mock cgroup paths so process is in incorrect slice # Mock cgroup so process is in incorrect slice
def mock_cgroup_paths(*args, **kwargs): def mock_cgroup(*args, **kwargs): # pylint: disable=W0613
if args and args[0] == "self": relative_path = "NOT_THE_CORRECT_PATH"
return ("NOT_THE_CORRECT_PATH", "NOT_THE_CORRECT_PATH") return CgroupV1(
return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs) cgroup_name=AGENT_LOG_COLLECTOR,
controller_mountpoints={
'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct",
'memory': "/sys/fs/cgroup/memory"
},
controller_paths={
'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct/{0}".format(relative_path),
'memory': "/sys/fs/cgroup/memory/{0}".format(relative_path)
}
)
def raise_on_sys_exit(*args): def raise_on_sys_exit(*args):
raise RuntimeError(args[0] if args else "Exiting") raise RuntimeError(args[0] if args else "Exiting")
with mock_cgroup_v1_environment(self.tmp_dir): with mock_cgroup_v1_environment(self.tmp_dir):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", side_effect=mock_cgroup_paths): with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup", side_effect=mock_cgroup):
agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf"))
with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit: with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit:
@ -346,19 +364,25 @@ class TestAgent(AgentTestCase):
CollectLogsHandler.enable_monitor_cgroups_check() CollectLogsHandler.enable_monitor_cgroups_check()
mock_log_collector.run = Mock() mock_log_collector.run = Mock()
# Mock cgroup paths so process is in the log collector slice and cpu is not mounted # Mock cgroup so process is in the log collector slice and cpu is not mounted
def mock_cgroup_paths(*args, **kwargs): def mock_cgroup(*args, **kwargs): # pylint: disable=W0613
if args and args[0] == "self": relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT) return CgroupV1(
return (None, relative_path) cgroup_name=AGENT_LOG_COLLECTOR,
return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs) controller_mountpoints={
'memory': "/sys/fs/cgroup/memory"
},
controller_paths={
'memory': "/sys/fs/cgroup/memory/{0}".format(relative_path)
}
)
def raise_on_sys_exit(*args): def raise_on_sys_exit(*args):
raise RuntimeError(args[0] if args else "Exiting") raise RuntimeError(args[0] if args else "Exiting")
with mock_cgroup_v1_environment(self.tmp_dir): with mock_cgroup_v1_environment(self.tmp_dir):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup",
side_effect=mock_cgroup_paths): side_effect=mock_cgroup):
agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf")) agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf"))
with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit: with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit:

Просмотреть файл

@ -7,7 +7,7 @@ from assertpy import assert_that, fail
from azurelinuxagent.common.osutil import systemd from azurelinuxagent.common.osutil import systemd
from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.utils import shellutil
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION
from azurelinuxagent.ga.cgroupapi import get_cgroup_api from azurelinuxagent.ga.cgroupapi import get_cgroup_api, SystemdCgroupApiv1
from tests_e2e.tests.lib.agent_log import AgentLog from tests_e2e.tests.lib.agent_log import AgentLog
from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry_if_false from tests_e2e.tests.lib.retry import retry_if_false
@ -164,9 +164,14 @@ def check_log_message(message, after_timestamp=datetime.datetime.min):
return False return False
def get_unit_cgroup_paths(unit_name): def get_unit_cgroup_proc_path(unit_name, controller):
""" """
Returns the cgroup paths for the given unit Returns the cgroup.procs path for the given unit and controller.
""" """
cgroups_api = get_cgroup_api() cgroups_api = get_cgroup_api()
return cgroups_api.get_unit_cgroup_paths(unit_name) unit_cgroup = cgroups_api.get_unit_cgroup(unit_name=unit_name, cgroup_name="test cgroup")
if isinstance(cgroups_api, SystemdCgroupApiv1):
return unit_cgroup.get_controller_procs_path(controller=controller)
else:
return unit_cgroup.get_procs_path()

Просмотреть файл

@ -18,14 +18,13 @@
# This script forces the process check by putting unknown process in the agent's cgroup # This script forces the process check by putting unknown process in the agent's cgroup
import os
import subprocess import subprocess
import datetime import datetime
from assertpy import fail from assertpy import fail
from azurelinuxagent.common.utils import shellutil from azurelinuxagent.common.utils import shellutil
from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_paths, AGENT_SERVICE_NAME from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_proc_path, AGENT_SERVICE_NAME
from tests_e2e.tests.lib.logging import log from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry_if_false from tests_e2e.tests.lib.retry import retry_if_false
@ -62,8 +61,8 @@ def disable_agent_cgroups_with_unknown_process(pid):
Note: System may kick the added process out of the cgroups, keeps adding until agent detect that process Note: System may kick the added process out of the cgroups, keeps adding until agent detect that process
""" """
def unknown_process_found(cpu_cgroup): def unknown_process_found():
cgroup_procs_path = os.path.join(cpu_cgroup, "cgroup.procs") cgroup_procs_path = get_unit_cgroup_proc_path(AGENT_SERVICE_NAME, 'cpu,cpuacct')
log.info("Adding dummy process %s to cgroup.procs file %s", pid, cgroup_procs_path) log.info("Adding dummy process %s to cgroup.procs file %s", pid, cgroup_procs_path)
try: try:
with open(cgroup_procs_path, 'a') as f: with open(cgroup_procs_path, 'a') as f:
@ -81,9 +80,7 @@ def disable_agent_cgroups_with_unknown_process(pid):
pid)), attempts=3) pid)), attempts=3)
return found and retry_if_false(check_agent_quota_disabled, attempts=3) return found and retry_if_false(check_agent_quota_disabled, attempts=3)
cpu_cgroup, _ = get_unit_cgroup_paths(AGENT_SERVICE_NAME) found: bool = retry_if_false(unknown_process_found, attempts=3)
found: bool = retry_if_false(lambda: unknown_process_found(cpu_cgroup), attempts=3)
if not found: if not found:
fail("The agent did not detect unknown process: {0}".format(pid)) fail("The agent did not detect unknown process: {0}".format(pid))