* refactor cgroup controllers (#13)

* Refactor Cgroup, CpuCgroup, MemoryCgroup to ControllerMetrics, CpuMetrics, MemoryMetrics

* Create methods to get unit/process cgroup representation

* Refactoring changes

* Refactoring changes

* Fix e2e test

* Fix unintentional comment change

* Remove unneeded comments

* Clean up comments and make code more readable

* Simplify get controller metrics

* Clean up cgroupapi

* Cleanup cgroup -> controllermetrics changes

* Clean up cgroup configurator

* Fix unit tests for agent.py

* Fix cgroupapi tests

* Fix cgroupconfigurator and tests

* Rename controller metrics tests

* Ignore pylint issues

* Improve test coverage for cgroupapi

* Rename cgroup to metrics

* Update cgroup.procs to accurately represent file

* Do not track metrics if controller is not mounted

* We should set cpu quota before tracking cpu metrics

* Pylint

* address pr comments (#14)

* Address Nag's comments

* pyling

* pylint

* remove lambda (#15)
This commit is contained in:
maddieford 2024-06-16 11:41:57 -07:00 коммит произвёл GitHub
Родитель cc6501d6dd
Коммит 610e12b3f1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
20 изменённых файлов: 983 добавлений и 663 удалений

Просмотреть файл

@ -31,7 +31,7 @@ import threading
from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR, CpuMetrics
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException
import azurelinuxagent.common.conf as conf
@ -208,8 +208,7 @@ class Agent(object):
# Check the cgroups unit
log_collector_monitor = None
cpu_cgroup_path = None
memory_cgroup_path = None
tracked_metrics = []
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
try:
cgroup_api = get_cgroup_api()
@ -220,40 +219,27 @@ class Agent(object):
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
cpu_cgroup_path, memory_cgroup_path = cgroup_api.get_process_cgroup_paths("self")
cpu_slice_matches = False
memory_slice_matches = False
if cpu_cgroup_path is not None:
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
if memory_cgroup_path is not None:
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)
if not cpu_slice_matches or not memory_slice_matches:
log_cgroup_warning("The Log Collector process is not in the proper cgroups:", send_event=False)
if not cpu_slice_matches:
log_cgroup_warning("\tunexpected cpu slice: {0}".format(cpu_cgroup_path), send_event=False)
if not memory_slice_matches:
log_cgroup_warning("\tunexpected memory slice: {0}".format(memory_cgroup_path), send_event=False)
log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
tracked_metrics = log_collector_cgroup.get_controller_metrics()
if len(tracked_metrics) != len(log_collector_cgroup.get_supported_controllers()):
log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controllers()))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path):
cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path)
msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup)
logger.info(msg)
cpu_cgroup.initialize_cpu_usage()
memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path)
msg = "Started tracking memory cgroup {0}".format(memory_cgroup)
logger.info(msg)
return [cpu_cgroup, memory_cgroup]
if not log_collector_cgroup.check_in_expected_slice(cgroupconfigurator.LOGCOLLECTOR_SLICE):
log_cgroup_warning("The Log Collector process is not in the proper cgroups", send_event=False)
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
try:
log_collector = LogCollector(is_full_mode)
# Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector.
# Running log collector resource monitoring only if agent starts the log collector.
# If Log collector start by any other means, then it will not be monitored.
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path)
log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups)
for metric in tracked_metrics:
if isinstance(metric, CpuMetrics):
metric.initialize_cpu_usage()
break
log_collector_monitor = get_log_collector_monitor_handler(tracked_metrics)
log_collector_monitor.run()
archive = log_collector.collect_logs_and_get_archive()
logger.info("Log collection successfully completed. Archive can be found at {0} "

Просмотреть файл

@ -24,7 +24,7 @@ import uuid
from azurelinuxagent.common import logger
from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.conf import get_agent_pid_file_path
from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, \
@ -185,14 +185,14 @@ def get_cgroup_api():
if available_unified_controllers != "":
raise CGroupsException("Detected hybrid cgroup mode, but there are controllers available to be enabled in unified hierarchy: {0}".format(available_unified_controllers))
cgroup_api = SystemdCgroupApiv1()
cgroup_api_v1 = SystemdCgroupApiv1()
# Previously the agent supported users mounting cgroup v1 controllers in locations other than the systemd
# default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If either the cpu or memory
# controller is mounted in a location other than the systemd default, raise Exception.
if not cgroup_api.are_mountpoints_systemd_created():
raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api.get_controller_root_paths())))
# default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If any agent supported controller is
# mounted in a location other than the systemd default, raise Exception.
if not cgroup_api_v1.are_mountpoints_systemd_created():
raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api_v1.get_controller_mountpoints())))
log_cgroup_info("Using cgroup v1 for resource enforcement and monitoring")
return cgroup_api
return cgroup_api_v1
raise CGroupsException("{0} has an unexpected file type: {1}".format(CGROUP_FILE_SYSTEM_ROOT, root_hierarchy_mode))
@ -202,7 +202,6 @@ class _SystemdCgroupApi(object):
Cgroup interface via systemd. Contains common api implementations between cgroup v1 and v2.
"""
def __init__(self):
self._agent_unit_name = None
self._systemd_run_commands = []
self._systemd_run_commands_lock = threading.RLock()
@ -213,55 +212,36 @@ class _SystemdCgroupApi(object):
with self._systemd_run_commands_lock:
return self._systemd_run_commands[:]
def get_controller_root_paths(self):
def get_unit_cgroup(self, unit_name, cgroup_name):
"""
Cgroup version specific. Returns a tuple with the root paths for the cpu and memory controllers; the values can
be None if the corresponding controller is not mounted or enabled at the root cgroup.
Cgroup version specific. Returns a representation of the unit cgroup.
:param unit_name: The unit to return the cgroup of.
:param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes.
"""
raise NotImplementedError()
def get_unit_cgroup_paths(self, unit_name):
def get_cgroup_from_relative_path(self, relative_path, cgroup_name):
"""
Returns a tuple with the path of the cpu and memory cgroups for the given unit.
The values returned can be None if the controller is not mounted or enabled.
Cgroup version specific. Returns a representation of the cgroup at the provided relative path.
:param relative_path: The relative path to return the cgroup of.
:param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes.
"""
# Ex: ControlGroup=/azure.slice/walinuxagent.service
# controlgroup_path[1:] = azure.slice/walinuxagent.service
controlgroup_path = systemd.get_unit_property(unit_name, "ControlGroup")
cpu_root_path, memory_root_path = self.get_controller_root_paths()
raise NotImplementedError()
cpu_cgroup_path = os.path.join(cpu_root_path, controlgroup_path[1:]) \
if cpu_root_path is not None else None
memory_cgroup_path = os.path.join(memory_root_path, controlgroup_path[1:]) \
if memory_root_path is not None else None
return cpu_cgroup_path, memory_cgroup_path
def get_process_cgroup_paths(self, process_id):
def get_process_cgroup(self, process_id, cgroup_name):
"""
Returns a tuple with the path of the cpu and memory cgroups for the given process.
The 'process_id' can be a numeric PID or the string "self" for the current process.
The values returned can be None if the controller is not mounted or enabled.
Cgroup version specific. Returns a representation of the process' cgroup.
:param process_id: A numeric PID to return the cgroup of, or the string "self" to return the cgroup of the current process.
:param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes.
"""
cpu_cgroup_relative_path, memory_cgroup_relative_path = self.get_process_cgroup_relative_paths(process_id)
raise NotImplementedError()
cpu_root_path, memory_root_path = self.get_controller_root_paths()
cpu_cgroup_path = os.path.join(cpu_root_path, cpu_cgroup_relative_path) \
if cpu_root_path is not None and cpu_cgroup_relative_path is not None else None
memory_cgroup_path = os.path.join(memory_root_path, memory_cgroup_relative_path) \
if memory_root_path is not None and memory_cgroup_relative_path is not None else None
return cpu_cgroup_path, memory_cgroup_path
def get_process_cgroup_relative_paths(self, process_id):
def log_root_paths(self):
"""
Cgroup version specific. Returns a tuple with the path of the cpu and memory cgroups for the given process
(relative to the root path of the corresponding controller).
The 'process_id' can be a numeric PID or the string "self" for the current process.
The values returned can be None if the controller is not mounted or enabled.
Cgroup version specific. Logs the root paths of the cgroup filesystem/controllers.
"""
raise NotImplementedError()
@ -279,11 +259,6 @@ class _SystemdCgroupApi(object):
unit_not_found = "Unit {0} not found.".format(scope_name)
return unit_not_found in stderr or scope_name not in stderr
@staticmethod
def get_processes_in_cgroup(cgroup_path):
with open(os.path.join(cgroup_path, "cgroup.procs"), "r") as cgroup_procs:
return [int(pid) for pid in cgroup_procs.read().split()]
class SystemdCgroupApiv1(_SystemdCgroupApi):
"""
@ -293,7 +268,8 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
super(SystemdCgroupApiv1, self).__init__()
self._cgroup_mountpoints = self._get_controller_mountpoints()
def _get_controller_mountpoints(self):
@staticmethod
def _get_controller_mountpoints():
"""
In v1, each controller is mounted at a different path. Use findmnt to get each path.
@ -304,7 +280,8 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
/sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct
etc
Returns a dictionary of the controller-path mappings.
Returns a dictionary of the controller-path mappings. The dictionary only includes the controllers which are
supported by the agent.
"""
mount_points = {}
for line in shellutil.run_command(['findmnt', '-t', 'cgroup', '--noheadings']).splitlines():
@ -315,51 +292,91 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
if match is not None:
path = match.group('path')
controller = match.group('controller')
if controller is not None and path is not None:
if controller is not None and path is not None and controller in CgroupV1.get_supported_controllers():
mount_points[controller] = path
return mount_points
def get_controller_mountpoints(self):
"""
Returns a dictionary of controller-mountpoint mappings.
"""
return self._cgroup_mountpoints
def are_mountpoints_systemd_created(self):
"""
Systemd mounts each controller at '/sys/fs/cgroup/<controller>'. Returns True if both cpu and memory
mountpoints match this pattern, False otherwise.
Systemd mounts each controller at '/sys/fs/cgroup/<controller>'. Returns True if all mounted controllers which
are supported by the agent have mountpoints which match this pattern, False otherwise.
The agent does not support cgroup usage if the default root systemd mountpoint (/sys/fs/cgroup) is not used.
This method is used to check if any users are using non-systemd mountpoints. If they are, the agent drop-in
files will be cleaned up in cgroupconfigurator.
"""
cpu_mountpoint = self._cgroup_mountpoints.get('cpu,cpuacct')
memory_mountpoint = self._cgroup_mountpoints.get('memory')
if cpu_mountpoint is not None and cpu_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'cpu,cpuacct'):
return False
if memory_mountpoint is not None and memory_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'memory'):
return False
for controller, mount_point in self._cgroup_mountpoints.items():
if mount_point != os.path.join(CGROUP_FILE_SYSTEM_ROOT, controller):
return False
return True
def get_controller_root_paths(self):
# Return a tuple representing the mountpoints for cpu and memory. Either should be None if the corresponding
# controller is not mounted.
return self._cgroup_mountpoints.get('cpu,cpuacct'), self._cgroup_mountpoints.get('memory')
@staticmethod
def _get_process_relative_controller_paths(process_id):
"""
Returns the relative paths of the cgroup for the given process as a dict of controller-path mappings. The result
only includes controllers which are supported.
The contents of the /proc/{process_id}/cgroup file are similar to
# cat /proc/1218/cgroup
10:memory:/system.slice/walinuxagent.service
3:cpu,cpuacct:/system.slice/walinuxagent.service
etc
def get_process_cgroup_relative_paths(self, process_id):
# The contents of the file are similar to
# # cat /proc/1218/cgroup
# 10:memory:/system.slice/walinuxagent.service
# 3:cpu,cpuacct:/system.slice/walinuxagent.service
# etc
cpu_path = None
memory_path = None
:param process_id: A numeric PID to return the relative paths of, or the string "self" to return the relative paths of the current process.
"""
conroller_relative_paths = {}
for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines():
match = re.match(r'\d+:(?P<controller>(memory|.*cpuacct.*)):(?P<path>.+)', line)
match = re.match(r'\d+:(?P<controller>.+):(?P<path>.+)', line)
if match is not None:
controller = match.group('controller')
path = match.group('path').lstrip('/') if match.group('path') != '/' else None
if controller == 'memory':
memory_path = path
else:
cpu_path = path
if path is not None and controller in CgroupV1.get_supported_controllers():
conroller_relative_paths[controller] = path
return cpu_path, memory_path
return conroller_relative_paths
def get_unit_cgroup(self, unit_name, cgroup_name):
unit_cgroup_relative_path = systemd.get_unit_property(unit_name, "ControlGroup")
unit_controller_paths = {}
for controller, mountpoint in self._cgroup_mountpoints.items():
unit_controller_paths[controller] = os.path.join(mountpoint, unit_cgroup_relative_path[1:])
return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints,
controller_paths=unit_controller_paths)
def get_cgroup_from_relative_path(self, relative_path, cgroup_name):
controller_paths = {}
for controller, mountpoint in self._cgroup_mountpoints.items():
controller_paths[controller] = os.path.join(mountpoint, relative_path)
return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints,
controller_paths=controller_paths)
def get_process_cgroup(self, process_id, cgroup_name):
relative_controller_paths = self._get_process_relative_controller_paths(process_id)
process_controller_paths = {}
for controller, mountpoint in self._cgroup_mountpoints.items():
relative_controller_path = relative_controller_paths.get(controller)
if relative_controller_path is not None:
process_controller_paths[controller] = os.path.join(mountpoint, relative_controller_path)
return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints,
controller_paths=process_controller_paths)
def log_root_paths(self):
for controller in CgroupV1.get_supported_controllers():
mount_point = self._cgroup_mountpoints.get(controller)
if mount_point is None:
log_cgroup_info("The {0} controller is not mounted".format(controller), send_event=False)
else:
log_cgroup_info("The {0} controller is mounted at {1}".format(controller, mount_point), send_event=False)
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
error_code=ExtensionErrorCodes.PluginUnknownFailure):
@ -385,25 +402,14 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
log_cgroup_info("Started extension in unit '{0}'".format(scope_name), send_event=False)
cpu_cgroup = None
cpu_metrics = None
try:
cgroup_relative_path = os.path.join('azure.slice/azure-vmextensions.slice', extension_slice_name)
cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_controller_root_paths()
if cpu_cgroup_mountpoint is None:
log_cgroup_info("The CPU controller is not mounted; will not track resource usage", send_event=False)
else:
cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path)
cpu_cgroup = CpuCgroup(extension_name, cpu_cgroup_path)
CGroupsTelemetry.track_cgroup(cpu_cgroup)
if memory_cgroup_mountpoint is None:
log_cgroup_info("The Memory controller is not mounted; will not track resource usage", send_event=False)
else:
memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path)
memory_cgroup = MemoryCgroup(extension_name, memory_cgroup_path)
CGroupsTelemetry.track_cgroup(memory_cgroup)
cgroup = self.get_cgroup_from_relative_path(cgroup_relative_path, extension_name)
for metrics in cgroup.get_controller_metrics():
if isinstance(metrics, CpuMetrics):
cpu_metrics = metrics
CGroupsTelemetry.track_cgroup(metrics)
except IOError as e:
if e.errno == 2: # 'No such file or directory'
@ -415,7 +421,7 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
# Wait for process completion or timeout
try:
return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
stderr=stderr, error_code=error_code, cpu_cgroup=cpu_cgroup)
stderr=stderr, error_code=error_code, cpu_metrics=cpu_metrics)
except ExtensionError as e:
# The extension didn't terminate successfully. Determine whether it was due to systemd errors or
# extension errors.
@ -448,7 +454,7 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
def __init__(self):
super(SystemdCgroupApiv2, self).__init__()
self._root_cgroup_path = self._get_root_cgroup_path()
self._controllers_enabled_at_root = self._get_controllers_enabled_at_root(self._root_cgroup_path) if self._root_cgroup_path is not None else []
self._controllers_enabled_at_root = self._get_controllers_enabled_at_root(self._root_cgroup_path) if self._root_cgroup_path != "" else []
@staticmethod
def _get_root_cgroup_path():
@ -459,7 +465,7 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
$ findmnt -t cgroup2 --noheadings
/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot
Returns None if the root cgroup cannot be determined from the output above.
Returns empty string if the root cgroup cannot be determined from the output above.
"""
#
for line in shellutil.run_command(['findmnt', '-t', 'cgroup2', '--noheadings']).splitlines():
@ -470,7 +476,13 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
root_cgroup_path = match.group('path')
if root_cgroup_path is not None:
return root_cgroup_path
return None
return ""
def get_root_cgroup_path(self):
"""
Returns the unified cgroup mountpoint.
"""
return self._root_cgroup_path
@staticmethod
def _get_controllers_enabled_at_root(root_cgroup_path):
@ -478,47 +490,229 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
Returns a list of the controllers enabled at the root cgroup. The cgroup.subtree_control file at the root shows
a space separated list of the controllers which are enabled to control resource distribution from the root
cgroup to its children. If a controller is listed here, then that controller is available to enable in children
cgroups.
cgroups. Returns only the enabled controllers which are supported by the agent.
$ cat /sys/fs/cgroup/cgroup.subtree_control
cpuset cpu io memory hugetlb pids rdma misc
"""
controllers_enabled_at_root = []
enabled_controllers_file = os.path.join(root_cgroup_path, 'cgroup.subtree_control')
if os.path.exists(enabled_controllers_file):
controllers_enabled_at_root = fileutil.read_file(enabled_controllers_file).rstrip().split()
return controllers_enabled_at_root
return list(set(controllers_enabled_at_root) & set(CgroupV2.get_supported_controllers()))
return []
def get_controller_root_paths(self):
# Return a tuple representing the root cgroups for cpu and memory. Either should be None if the corresponding
# controller is not enabled at the root. This check is necessary because all non-root "cgroup.subtree_control"
# files can only contain controllers which are enabled in the parent's "cgroup.subtree_control" file.
@staticmethod
def _get_process_relative_cgroup_path(process_id):
"""
Returns the relative path of the cgroup for the given process.
The contents of the /proc/{process_id}/cgroup file are similar to
# cat /proc/1218/cgroup
0::/azure.slice/walinuxagent.service
root_cpu_path = None
root_memory_path = None
if self._root_cgroup_path is not None:
if 'cpu' in self._controllers_enabled_at_root:
root_cpu_path = self._root_cgroup_path
if 'memory' in self._controllers_enabled_at_root:
root_memory_path = self._root_cgroup_path
return root_cpu_path, root_memory_path
def get_process_cgroup_relative_paths(self, process_id):
# The contents of the file are similar to
# # cat /proc/1218/cgroup
# 0::/azure.slice/walinuxagent.service
cpu_path = None
memory_path = None
:param process_id: A numeric PID to return the relative path of, or the string "self" to return the relative path of the current process.
"""
relative_path = ""
for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines():
match = re.match(r'0::(?P<path>\S+)', line)
if match is not None:
path = match.group('path').lstrip('/') if match.group('path') != '/' else None
memory_path = path
cpu_path = path
relative_path = match.group('path').lstrip('/') if match.group('path') != '/' else ""
return cpu_path, memory_path
return relative_path
def get_unit_cgroup(self, unit_name, cgroup_name):
unit_cgroup_relative_path = systemd.get_unit_property(unit_name, "ControlGroup")
unit_cgroup_path = ""
if self._root_cgroup_path != "":
unit_cgroup_path = os.path.join(self._root_cgroup_path, unit_cgroup_relative_path[1:])
return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=unit_cgroup_path, enabled_controllers=self._controllers_enabled_at_root)
def get_cgroup_from_relative_path(self, relative_path, cgroup_name):
cgroup_path = ""
if self._root_cgroup_path != "":
cgroup_path = os.path.join(self._root_cgroup_path, relative_path)
return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=cgroup_path, enabled_controllers=self._controllers_enabled_at_root)
def get_process_cgroup(self, process_id, cgroup_name):
relative_path = self._get_process_relative_cgroup_path(process_id)
cgroup_path = ""
if self._root_cgroup_path != "":
cgroup_path = os.path.join(self._root_cgroup_path, relative_path)
return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=cgroup_path, enabled_controllers=self._controllers_enabled_at_root)
def log_root_paths(self):
log_cgroup_info("The root cgroup path is {0}".format(self._root_cgroup_path), send_event=False)
for controller in CgroupV2.get_supported_controllers():
if controller in self._controllers_enabled_at_root:
log_cgroup_info("The {0} controller is enabled at the root cgroup".format(controller), send_event=False)
else:
log_cgroup_info("The {0} controller is not enabled at the root cgroup".format(controller), send_event=False)
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
error_code=ExtensionErrorCodes.PluginUnknownFailure):
raise NotImplementedError()
class Cgroup(object):
MEMORY_CONTROLLER = "memory"
def __init__(self, cgroup_name):
self._cgroup_name = cgroup_name
@staticmethod
def get_supported_controllers():
"""
Cgroup version specific. Returns a list of the controllers which the agent supports.
"""
raise NotImplementedError()
def check_in_expected_slice(self, expected_slice):
"""
Cgroup version specific. Returns True if the cgroup is in the expected slice, False otherwise.
:param expected_slice: The slice the cgroup is expected to be in.
"""
raise NotImplementedError()
def get_controller_metrics(self, expected_relative_path=None):
"""
Cgroup version specific. Returns a list of the metrics for the agent supported controllers which are
mounted/enabled for the cgroup.
:param expected_relative_path: The expected relative path of the cgroup. If provided, only metrics for controllers at this expected path will be returned.
"""
raise NotImplementedError()
def get_processes(self):
"""
Cgroup version specific. Returns a list of all the process ids in the cgroup.
"""
raise NotImplementedError()
class CgroupV1(Cgroup):
CPU_CONTROLLER = "cpu,cpuacct"
def __init__(self, cgroup_name, controller_mountpoints, controller_paths):
"""
:param cgroup_name: The name of the cgroup. Used for logging/tracking purposes.
:param controller_mountpoints: A dictionary of controller-mountpoint mappings for each agent supported controller which is mounted.
:param controller_paths: A dictionary of controller-path mappings for each agent supported controller which is mounted. The path represents the absolute path of the controller.
"""
super(CgroupV1, self).__init__(cgroup_name=cgroup_name)
self._controller_mountpoints = controller_mountpoints
self._controller_paths = controller_paths
@staticmethod
def get_supported_controllers():
return [CgroupV1.CPU_CONTROLLER, CgroupV1.MEMORY_CONTROLLER]
def check_in_expected_slice(self, expected_slice):
in_expected_slice = True
for controller, path in self._controller_paths.items():
if expected_slice not in path:
log_cgroup_warning("The {0} controller for the {1} cgroup is not mounted in the expected slice. Expected slice: {2}. Actual controller path: {3}".format(controller, self._cgroup_name, expected_slice, path), send_event=False)
in_expected_slice = False
return in_expected_slice
def get_controller_metrics(self, expected_relative_path=None):
metrics = []
for controller in self.get_supported_controllers():
controller_metrics = None
controller_path = self._controller_paths.get(controller)
controller_mountpoint = self._controller_mountpoints.get(controller)
if controller_mountpoint is None:
log_cgroup_warning("{0} controller is not mounted; will not track metrics".format(controller), send_event=False)
continue
if controller_path is None:
log_cgroup_warning("{0} is not mounted for the {1} cgroup; will not track metrics".format(controller, self._cgroup_name), send_event=False)
continue
if expected_relative_path is not None:
expected_path = os.path.join(controller_mountpoint, expected_relative_path)
if controller_path != expected_path:
log_cgroup_warning("The {0} controller is not mounted at the expected path for the {1} cgroup; will not track metrics. Actual cgroup path:[{2}] Expected:[{3}]".format(controller, self._cgroup_name, controller_path, expected_path), send_event=False)
continue
if controller == self.CPU_CONTROLLER:
controller_metrics = CpuMetrics(self._cgroup_name, controller_path)
elif controller == self.MEMORY_CONTROLLER:
controller_metrics = MemoryMetrics(self._cgroup_name, controller_path)
if controller_metrics is not None:
msg = "{0} metrics for cgroup: {1}".format(controller, controller_metrics)
log_cgroup_info(msg, send_event=False)
metrics.append(controller_metrics)
return metrics
def get_controller_procs_path(self, controller):
controller_path = self._controller_paths.get(controller)
if controller_path is not None and controller_path != "":
return os.path.join(controller_path, "cgroup.procs")
return ""
def get_processes(self):
pids = set()
for controller in self._controller_paths.keys():
procs_path = self.get_controller_procs_path(controller)
if os.path.exists(procs_path):
with open(procs_path, "r") as cgroup_procs:
for pid in cgroup_procs.read().split():
pids.add(int(pid))
return list(pids)
class CgroupV2(Cgroup):
CPU_CONTROLLER = "cpu"
def __init__(self, cgroup_name, root_cgroup_path, cgroup_path, enabled_controllers):
"""
:param cgroup_name: The name of the cgroup. Used for logging/tracking purposes.
:param root_cgroup_path: A string representing the root cgroup path. String can be empty.
:param cgroup_path: A string representing the absolute cgroup path. String can be empty.
:param enabled_controllers: A list of strings representing the agent supported controllers enabled at the root cgroup.
"""
super(CgroupV2, self).__init__(cgroup_name)
self._root_cgroup_path = root_cgroup_path
self._cgroup_path = cgroup_path
self._enabled_controllers = enabled_controllers
@staticmethod
def get_supported_controllers():
return [CgroupV2.CPU_CONTROLLER, CgroupV2.MEMORY_CONTROLLER]
def check_in_expected_slice(self, expected_slice):
if expected_slice not in self._cgroup_path:
log_cgroup_warning("The {0} cgroup is not in the expected slice. Expected slice: {1}. Actual cgroup path: {2}".format(self._cgroup_name, expected_slice, self._cgroup_path), send_event=False)
return False
return True
def get_controller_metrics(self, expected_relative_path=None):
# TODO - Implement controller metrics for cgroup v2
raise NotImplementedError()
def get_procs_path(self):
if self._cgroup_path != "":
return os.path.join(self._cgroup_path, "cgroup.procs")
return ""
def get_processes(self):
pids = set()
procs_path = self.get_procs_path()
if os.path.exists(procs_path):
with open(procs_path, "r") as cgroup_procs:
for pid in cgroup_procs.read().split():
pids.add(int(pid))
return list(pids)

Просмотреть файл

@ -23,7 +23,7 @@ import threading
from azurelinuxagent.common import conf
from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup
from azurelinuxagent.ga.controllermetrics import CpuMetrics, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryMetrics
from azurelinuxagent.ga.cgroupapi import SystemdRunError, EXTENSION_SLICE_PREFIX, CGroupUtil, SystemdCgroupApiv2, \
log_cgroup_info, log_cgroup_warning, get_cgroup_api, InvalidCgroupMountpointException
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
@ -130,9 +130,8 @@ class CGroupConfigurator(object):
self._agent_cgroups_enabled = False
self._extensions_cgroups_enabled = False
self._cgroups_api = None
self._agent_cpu_cgroup_path = None
self._agent_memory_cgroup_path = None
self._agent_memory_cgroup = None
self._agent_cgroup = None
self._agent_memory_metrics = None
self._check_cgroups_lock = threading.RLock() # Protect the check_cgroups which is called from Monitor thread and main loop.
def initialize(self):
@ -189,28 +188,30 @@ class CGroupConfigurator(object):
self.__setup_azure_slice()
cpu_controller_root, memory_controller_root = self.__get_cgroup_controller_roots()
self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroup_paths(agent_slice,
cpu_controller_root,
memory_controller_root)
# Log mount points/root paths for cgroup controllers
self._cgroups_api.log_root_paths()
# Get agent cgroup
self._agent_cgroup = self._cgroups_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_NAME_TELEMETRY)
if conf.get_cgroup_disable_on_process_check_failure() and self._check_fails_if_processes_found_in_agent_cgroup_before_enable(agent_slice):
reason = "Found unexpected processes in the agent cgroup before agent enable cgroups."
self.disable(reason, DisableCgroups.ALL)
return
if self._agent_cpu_cgroup_path is not None or self._agent_memory_cgroup_path is not None:
# Get metrics to track
metrics = self._agent_cgroup.get_controller_metrics(expected_relative_path=os.path.join(agent_slice, systemd.get_agent_unit_name()))
if len(metrics) > 0:
self.enable()
if self._agent_cpu_cgroup_path is not None:
log_cgroup_info("Agent CPU cgroup: {0}".format(self._agent_cpu_cgroup_path))
self.__set_cpu_quota(conf.get_agent_cpu_quota())
CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))
if self._agent_memory_cgroup_path is not None:
log_cgroup_info("Agent Memory cgroup: {0}".format(self._agent_memory_cgroup_path))
self._agent_memory_cgroup = MemoryCgroup(AGENT_NAME_TELEMETRY, self._agent_memory_cgroup_path)
CGroupsTelemetry.track_cgroup(self._agent_memory_cgroup)
for metric in metrics:
for prop in metric.get_unit_properties():
log_cgroup_info('{0}: {1}'.format(prop, systemd.get_unit_property(systemd.get_agent_unit_name(), prop)))
if isinstance(metric, CpuMetrics):
self.__set_cpu_quota(conf.get_agent_cpu_quota())
elif isinstance(metric, MemoryMetrics):
self._agent_memory_metrics = metric
CGroupsTelemetry.track_cgroup(metric)
except Exception as exception:
log_cgroup_warning("Error initializing cgroups: {0}".format(ustr(exception)))
@ -229,21 +230,6 @@ class CGroupConfigurator(object):
return False
return True
def __get_cgroup_controller_roots(self):
cpu_controller_root, memory_controller_root = self._cgroups_api.get_controller_root_paths()
if cpu_controller_root is not None:
log_cgroup_info("The CPU cgroup controller root path is {0}".format(cpu_controller_root), send_event=False)
else:
log_cgroup_warning("The CPU cgroup controller is not mounted or enabled")
if memory_controller_root is not None:
log_cgroup_info("The memory cgroup controller root path is {0}".format(memory_controller_root), send_event=False)
else:
log_cgroup_warning("The memory cgroup controller is not mounted or enabled")
return cpu_controller_root, memory_controller_root
@staticmethod
def __setup_azure_slice():
"""
@ -416,47 +402,6 @@ class CGroupConfigurator(object):
return True
return False
def __get_agent_cgroup_paths(self, agent_slice, cpu_controller_root, memory_controller_root):
agent_unit_name = systemd.get_agent_unit_name()
expected_relative_path = os.path.join(agent_slice, agent_unit_name)
cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths(
"self")
if cpu_cgroup_relative_path is None:
log_cgroup_warning("The agent's process is not within a CPU cgroup")
else:
if cpu_cgroup_relative_path == expected_relative_path:
log_cgroup_info('CPUAccounting: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUAccounting")))
log_cgroup_info('CPUQuota: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec")))
else:
log_cgroup_warning(
"The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]".format(cpu_cgroup_relative_path, expected_relative_path))
cpu_cgroup_relative_path = None # Set the path to None to prevent monitoring
if memory_cgroup_relative_path is None:
log_cgroup_warning("The agent's process is not within a memory cgroup")
else:
if memory_cgroup_relative_path == expected_relative_path:
memory_accounting = systemd.get_unit_property(agent_unit_name, "MemoryAccounting")
log_cgroup_info('MemoryAccounting: {0}'.format(memory_accounting))
else:
log_cgroup_warning(
"The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]".format(memory_cgroup_relative_path, expected_relative_path))
memory_cgroup_relative_path = None # Set the path to None to prevent monitoring
if cpu_controller_root is not None and cpu_cgroup_relative_path is not None:
agent_cpu_cgroup_path = os.path.join(cpu_controller_root, cpu_cgroup_relative_path)
else:
agent_cpu_cgroup_path = None
if memory_controller_root is not None and memory_cgroup_relative_path is not None:
agent_memory_cgroup_path = os.path.join(memory_controller_root, memory_cgroup_relative_path)
else:
agent_memory_cgroup_path = None
return agent_cpu_cgroup_path, agent_memory_cgroup_path
def supported(self):
return self._cgroups_supported
@ -496,7 +441,11 @@ class CGroupConfigurator(object):
elif disable_cgroups == DisableCgroups.AGENT: # disable agent
self._agent_cgroups_enabled = False
self.__reset_agent_cpu_quota()
CGroupsTelemetry.stop_tracking(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))
agent_metrics = self._agent_cgroup.get_controller_metrics()
for metric in agent_metrics:
if isinstance(metric, CpuMetrics):
CGroupsTelemetry.stop_tracking(metric)
break
log_cgroup_warning("Disabling resource usage monitoring. Reason: {0}".format(reason), op=WALAEventOperation.CGroupsDisabled)
@ -612,11 +561,7 @@ class CGroupConfigurator(object):
"""
unexpected = []
agent_cgroup_proc_names = []
# Now we call _check_processes_in_agent_cgroup before we enable the cgroups or any one of the controller is not mounted, agent cgroup paths can be None.
# so we need to check both.
cgroup_path = self._agent_cpu_cgroup_path if self._agent_cpu_cgroup_path is not None else self._agent_memory_cgroup_path
if cgroup_path is None:
return
try:
daemon = os.getppid()
extension_handler = os.getpid()
@ -624,12 +569,12 @@ class CGroupConfigurator(object):
agent_commands.update(shellutil.get_running_commands())
systemd_run_commands = set()
systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
agent_cgroup = self._cgroups_api.get_processes_in_cgroup(cgroup_path)
agent_cgroup_proccesses = self._agent_cgroup.get_processes()
# get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup;
agent_commands.update(shellutil.get_running_commands())
systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
for process in agent_cgroup:
for process in agent_cgroup_proccesses:
agent_cgroup_proc_names.append(self.__format_process(process))
# Note that the agent uses systemd-run to start extensions; systemd-run belongs to the agent cgroup, though the extensions don't.
if process in (daemon, extension_handler) or process in systemd_run_commands:
@ -753,8 +698,8 @@ class CGroupConfigurator(object):
raise CGroupsException("The agent has been throttled for {0} seconds".format(metric.value))
def check_agent_memory_usage(self):
if self.enabled() and self._agent_memory_cgroup:
metrics = self._agent_memory_cgroup.get_tracked_metrics()
if self.enabled() and self._agent_memory_metrics is not None:
metrics = self._agent_memory_metrics.get_tracked_metrics()
current_usage = 0
for metric in metrics:
if metric.counter == MetricsCounter.TOTAL_MEM_USAGE:
@ -780,59 +725,37 @@ class CGroupConfigurator(object):
return 0
def start_tracking_unit_cgroups(self, unit_name):
"""
TODO: Start tracking Memory Cgroups
"""
try:
cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name)
cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name)
metrics = cgroup.get_controller_metrics()
if cpu_cgroup_path is None:
log_cgroup_info("The CPU controller is not mounted or enabled; will not track resource usage", send_event=False)
else:
CGroupsTelemetry.track_cgroup(CpuCgroup(unit_name, cpu_cgroup_path))
if memory_cgroup_path is None:
log_cgroup_info("The Memory controller is not mounted or enabled; will not track resource usage", send_event=False)
else:
CGroupsTelemetry.track_cgroup(MemoryCgroup(unit_name, memory_cgroup_path))
for metric in metrics:
CGroupsTelemetry.track_cgroup(metric)
except Exception as exception:
log_cgroup_info("Failed to start tracking resource usage for the extension: {0}".format(ustr(exception)), send_event=False)
def stop_tracking_unit_cgroups(self, unit_name):
"""
TODO: remove Memory cgroups from tracked list.
"""
try:
cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name)
cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name)
metrics = cgroup.get_controller_metrics()
if cpu_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(CpuCgroup(unit_name, cpu_cgroup_path))
if memory_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(MemoryCgroup(unit_name, memory_cgroup_path))
for metric in metrics:
CGroupsTelemetry.stop_tracking(metric)
except Exception as exception:
log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False)
def stop_tracking_extension_cgroups(self, extension_name):
"""
TODO: remove extension Memory cgroups from tracked list
"""
try:
extension_slice_name = CGroupUtil.get_extension_slice_name(extension_name)
cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE,
extension_slice_name)
cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE, extension_slice_name)
cpu_root_path, memory_root_path = self._cgroups_api.get_controller_root_paths()
cpu_cgroup_path = os.path.join(cpu_root_path, cgroup_relative_path)
memory_cgroup_path = os.path.join(memory_root_path, cgroup_relative_path)
if cpu_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(CpuCgroup(extension_name, cpu_cgroup_path))
if memory_cgroup_path is not None:
CGroupsTelemetry.stop_tracking(MemoryCgroup(extension_name, memory_cgroup_path))
cgroup = self._cgroups_api.get_cgroup_from_relative_path(relative_path=cgroup_relative_path,
cgroup_name=extension_name)
metrics = cgroup.get_controller_metrics()
for metric in metrics:
CGroupsTelemetry.stop_tracking(metric)
except Exception as exception:
log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False)

Просмотреть файл

@ -17,7 +17,7 @@ import errno
import threading
from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import CpuCgroup
from azurelinuxagent.ga.controllermetrics import CpuMetrics
from azurelinuxagent.common.future import ustr
@ -41,7 +41,7 @@ class CGroupsTelemetry(object):
"""
Adds the given item to the dictionary of tracked cgroups
"""
if isinstance(cgroup, CpuCgroup):
if isinstance(cgroup, CpuMetrics):
# set the current cpu usage
cgroup.initialize_cpu_usage()

Просмотреть файл

@ -25,7 +25,7 @@ from azurelinuxagent.ga import logcollector, cgroupconfigurator
import azurelinuxagent.common.conf as conf
from azurelinuxagent.common import logger
from azurelinuxagent.ga.cgroup import MetricsCounter
from azurelinuxagent.ga.controllermetrics import MetricsCounter
from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation, report_metric
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.interfaces import ThreadHandlerInterface

Просмотреть файл

@ -88,7 +88,7 @@ class MetricsCounter(object):
re_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n')
class CGroup(object):
class ControllerMetrics(object):
def __init__(self, name, cgroup_path):
"""
Initialize _data collection for the Memory controller
@ -169,10 +169,16 @@ class CGroup(object):
"""
raise NotImplementedError()
def get_unit_properties(self):
"""
Returns a list of the unit properties to collect for the controller.
"""
raise NotImplementedError()
class CpuCgroup(CGroup):
class CpuMetrics(ControllerMetrics):
def __init__(self, name, cgroup_path):
super(CpuCgroup, self).__init__(name, cgroup_path)
super(CpuMetrics, self).__init__(name, cgroup_path)
self._osutil = get_osutil()
self._previous_cgroup_cpu = None
@ -306,10 +312,13 @@ class CpuCgroup(CGroup):
return tracked
def get_unit_properties(self):
return ["CPUAccounting", "CPUQuotaPerSecUSec"]
class MemoryCgroup(CGroup):
class MemoryMetrics(ControllerMetrics):
def __init__(self, name, cgroup_path):
super(MemoryCgroup, self).__init__(name, cgroup_path)
super(MemoryMetrics, self).__init__(name, cgroup_path)
self._counter_not_found_error_count = 0
@ -390,3 +399,6 @@ class MemoryCgroup(CGroup):
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, self.name,
self.try_swap_memory_usage(), _REPORT_EVERY_HOUR)
]
def get_unit_properties(self):
return["MemoryAccounting"]

Просмотреть файл

@ -31,7 +31,7 @@ from azurelinuxagent.common.future import ustr
TELEMETRY_MESSAGE_MAX_LEN = 3200
def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup):
def wait_for_process_completion_or_timeout(process, timeout, cpu_metrics):
"""
Utility function that waits for the process to complete within the given time frame. This function will terminate
the process if when the given time frame elapses.
@ -47,7 +47,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup):
throttled_time = 0
if timeout == 0:
throttled_time = get_cpu_throttled_time(cpu_cgroup)
throttled_time = get_cpu_throttled_time(cpu_metrics)
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
else:
# process completed or forked; sleep 1 sec to give the child process (if any) a chance to start
@ -57,7 +57,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup):
return timeout == 0, return_code, throttled_time
def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_cgroup=None):
def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_metrics=None):
"""
Utility function that waits for process completion and retrieves its output (stdout and stderr) if it completed
before the timeout period. Otherwise, the process will get killed and an ExtensionError will be raised.
@ -68,15 +68,15 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c
:param stdout: Must be a file since we seek on it when parsing the subprocess output
:param stderr: Must be a file since we seek on it when parsing the subprocess outputs
:param error_code: The error code to set if we raise an ExtensionError
:param cpu_cgroup: Reference the cpu cgroup name and path
:param cpu_metrics: References the cpu metrics for the cgroup
:return:
"""
# Wait for process completion or timeout
timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup)
timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_metrics)
process_output = read_output(stdout, stderr)
if timed_out:
if cpu_cgroup is not None: # Report CPUThrottledTime when timeout happens
if cpu_metrics is not None: # Report CPUThrottledTime when timeout happens
raise ExtensionError("Timeout({0});CPUThrottledTime({1}secs): {2}\n{3}".format(timeout, throttled_time, command, process_output),
code=ExtensionErrorCodes.PluginHandlerScriptTimedout)
@ -211,14 +211,14 @@ def format_stdout_stderr(stdout, stderr):
return to_s(stdout, -1*max_len_each, stderr, -1*max_len_each)
def get_cpu_throttled_time(cpu_cgroup):
def get_cpu_throttled_time(cpu_metrics):
"""
return the throttled time for the given cgroup.
"""
throttled_time = 0
if cpu_cgroup is not None:
if cpu_metrics is not None:
try:
throttled_time = cpu_cgroup.get_cpu_throttled_time(read_previous_throttled_time=False)
throttled_time = cpu_metrics.get_cpu_throttled_time(read_previous_throttled_time=False)
except Exception as e:
logger.warn("Failed to get cpu throttled time for the extension: {0}", ustr(e))

Просмотреть файл

@ -22,7 +22,7 @@ import threading
import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.networkutil as networkutil
from azurelinuxagent.ga.cgroup import MetricValue, MetricsCategory, MetricsCounter
from azurelinuxagent.ga.controllermetrics import MetricValue, MetricsCategory, MetricsCounter
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.errorstate import ErrorState

Просмотреть файл

@ -19,7 +19,7 @@ import shutil
import subprocess
import tempfile
from azurelinuxagent.ga.cgroup import CpuCgroup
from azurelinuxagent.ga.controllermetrics import CpuMetrics
from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes
from azurelinuxagent.common.future import ustr
from azurelinuxagent.ga.extensionprocessutil import format_stdout_stderr, read_output, \
@ -52,7 +52,7 @@ class TestProcessUtils(AgentTestCase):
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_cgroup=None)
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None)
self.assertEqual(timed_out, False)
self.assertEqual(ret, 0)
@ -70,7 +70,8 @@ class TestProcessUtils(AgentTestCase):
# We don't actually mock the kill, just wrap it so we can assert its call count
with patch('azurelinuxagent.ga.extensionprocessutil.os.killpg', wraps=os.killpg) as patch_kill:
with patch('time.sleep') as mock_sleep:
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout, cpu_cgroup=None)
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout,
cpu_metrics=None)
# We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
# we're "waiting" the correct amount of time before killing the process
@ -89,7 +90,7 @@ class TestProcessUtils(AgentTestCase):
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_cgroup=None)
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None)
self.assertEqual(timed_out, False)
self.assertEqual(ret, 2)
@ -105,12 +106,8 @@ class TestProcessUtils(AgentTestCase):
stderr=stderr,
preexec_fn=os.setsid)
process_output = handle_process_completion(process=process,
command=command,
timeout=5,
stdout=stdout,
stderr=stderr,
error_code=42)
process_output = handle_process_completion(process=process, command=command, timeout=5, stdout=stdout,
stderr=stderr, error_code=42)
expected_output = "[stdout]\ndummy stdout\n\n\n[stderr]\ndummy stderr\n"
self.assertEqual(process_output, expected_output)
@ -130,12 +127,8 @@ class TestProcessUtils(AgentTestCase):
stderr=stderr,
preexec_fn=os.setsid)
handle_process_completion(process=process,
command=command,
timeout=timeout,
stdout=stdout,
stderr=stderr,
error_code=42)
handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
stderr=stderr, error_code=42)
# We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
# we're "waiting" the correct amount of time before killing the process and raising an exception
@ -158,7 +151,7 @@ class TestProcessUtils(AgentTestCase):
test_file = os.path.join(self.tmp_dir, "cpu.stat")
shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"),
test_file) # throttled_time = 50
cgroup = CpuCgroup("test", self.tmp_dir)
cgroup = CpuMetrics("test", self.tmp_dir)
process = subprocess.Popen(command, # pylint: disable=subprocess-popen-preexec-fn
shell=True,
cwd=self.tmp_dir,
@ -167,13 +160,8 @@ class TestProcessUtils(AgentTestCase):
stderr=stderr,
preexec_fn=os.setsid)
handle_process_completion(process=process,
command=command,
timeout=timeout,
stdout=stdout,
stderr=stderr,
error_code=42,
cpu_cgroup=cgroup)
handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
stderr=stderr, error_code=42, cpu_metrics=cgroup)
# We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
# we're "waiting" the correct amount of time before killing the process and raising an exception
@ -200,11 +188,7 @@ class TestProcessUtils(AgentTestCase):
stderr=stderr,
preexec_fn=os.setsid)
handle_process_completion(process=process,
command=command,
timeout=4,
stdout=stdout,
stderr=stderr,
handle_process_completion(process=process, command=command, timeout=4, stdout=stdout, stderr=stderr,
error_code=error_code)
self.assertEqual(context_manager.exception.code, error_code)

Просмотреть файл

@ -0,0 +1,3 @@
123
234
345

Просмотреть файл

@ -24,10 +24,11 @@ import tempfile
from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga.cgroupapi import SystemdCgroupApiv1, SystemdCgroupApiv2, CGroupUtil, get_cgroup_api, \
InvalidCgroupMountpointException
InvalidCgroupMountpointException, CgroupV1, CgroupV2
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.osutil import systemd
from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics
from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, mock_cgroup_v2_environment, \
mock_cgroup_hybrid_environment
from tests.lib.mock_environment import MockCommand
@ -85,7 +86,7 @@ class CGroupUtilTestCase(AgentTestCase):
class SystemdCgroupsApiTestCase(AgentTestCase):
def test_get_cgroup_api_raises_exception_when_systemd_mount_point_does_not_exist(self):
def test_get_cgroup_api_raises_exception_when_systemd_mountpoint_does_not_exist(self):
with mock_cgroup_v1_environment(self.tmp_dir):
# Mock os.path.exists to return False for the os.path.exists(CGROUP_FILE_SYSTEM_ROOT) check
with patch("os.path.exists", return_value=False):
@ -151,106 +152,16 @@ class SystemdCgroupsApiTestCase(AgentTestCase):
class SystemdCgroupsApiv1TestCase(AgentTestCase):
def test_get_unit_cgroup_paths_should_return_the_cgroup_v1_mount_points(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service',
"The mount point for the CPU controller is incorrect")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service',
"The mount point for the memory controller is incorrect")
def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service',
"The mount point for the CPU controller is incorrect")
self.assertIsNone(memory,
"The mount point for the memory controller is None so unit cgroup should be None")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertIsNone(cpu, "The mount point for the cpu controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service',
"The mount point for the memory controller is incorrect")
def test_get_process_cgroup_paths_should_return_the_cgroup_v1_mount_points(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
"The mount point for the CPU controller is incorrect")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service',
"The mount point for the memory controller is incorrect")
def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
"The mount point for the CPU controller is incorrect")
self.assertIsNone(memory,
"The mount point for the memory controller is None so unit cgroup should be None")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIsNone(cpu, "The mount point for the CPU controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service',
"The mount point for the memory controller is incorrect")
def test_get_process_cgroup_v1_path_should_return_None_if_either_relative_path_is_None(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=('system.slice/walinuxagent.service', None)):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
"The mount point for the CPU controller is incorrect")
self.assertIsNone(memory,
"The relative cgroup path for the memory controller is None so unit cgroup should be None")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=(None, 'system.slice/walinuxagent.service')):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service',
"The mount point for the memory controller is incorrect")
def test_get_controller_root_paths_should_return_the_cgroup_v1_controller_mount_points(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the CPU controller is incorrect")
self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect")
def test_get_controller_root_paths_should_return_None_if_either_controller_not_mounted(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory', 'io': '/sys/fs/cgroup/io'}):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertIsNone(cpu, "The CPU controller is mot mounted, so the cpu controller path should be None")
self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'io': '/sys/fs/cgroup/io'}):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertIsNone(memory, "The memory controller is mot mounted, so the memory controller path should be None")
self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the cpu controller is incorrect")
def test_get_controller_mountpoints_should_return_all_controller_mount_points(self):
def test_get_controller_mountpoints_should_return_only_supported_controllers(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup_api = get_cgroup_api()
# Expected value comes from findmnt output in the mocked environment
self.assertEqual(cgroup_api._get_controller_mountpoints(), {
'systemd': '/sys/fs/cgroup/systemd',
'devices': '/sys/fs/cgroup/devices',
'rdma': '/sys/fs/cgroup/rdma',
'perf_event': '/sys/fs/cgroup/perf_event',
'net_cls,net_prio': '/sys/fs/cgroup/net_cls,net_prio',
'blkio': '/sys/fs/cgroup/blkio',
'cpuset': '/sys/fs/cgroup/cpuset',
'misc': '/sys/fs/cgroup/misc',
'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct',
'memory': '/sys/fs/cgroup/memory',
'freezer': '/sys/fs/cgroup/freezer',
'hugetlb': '/sys/fs/cgroup/hugetlb',
'pids': '/sys/fs/cgroup/pids',
'memory': '/sys/fs/cgroup/memory'
}, "The controller mountpoints are not correct")
def test_are_mountpoints_systemd_created_should_return_False_if_cpu_or_memory_are_not_systemd_mountpoints(self):
def test_are_mountpoints_systemd_created_should_return_False_if_mountpoints_are_not_systemd(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/custom/mountpoint/path', 'memory': '/custom/mountpoint/path'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
@ -261,23 +172,123 @@ class SystemdCgroupsApiv1TestCase(AgentTestCase):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/custom/mountpoint/path'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
def test_are_mountpoints_systemd_created_should_return_True_if_cpu_and_memory_are_systemd_mountpoints(self):
def test_are_mountpoints_systemd_created_should_return_True_if_mountpoints_are_systemd(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup', 'memory': '/sys/fs/cgroup'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'}):
self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
# are_mountpoints_systemd_created should only check controllers which are mounted
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup'}):
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory'}):
self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
def test_get_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self')
self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect")
self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect")
relative_paths = get_cgroup_api()._get_process_relative_controller_paths('self')
self.assertEqual(len(relative_paths), 2)
self.assertEqual(relative_paths.get('cpu,cpuacct'), "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect")
self.assertEqual(relative_paths.get('memory'), "system.slice/walinuxagent.service", "The relative memory for the memory cgroup is incorrect")
def test_get_unit_cgroup_should_return_correct_paths_for_cgroup_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct':'/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service', 'memory': '/sys/fs/cgroup/memory/system.slice/extension.service'})
def test_get_unit_cgroup_should_return_only_mounted_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct':'/sys/fs/cgroup/cpu,cpuacct'})
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'})
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._controller_mountpoints, {})
self.assertEqual(cgroup._controller_paths, {})
def test_get_cgroup_from_relative_path_should_return_the_correct_paths_for_cgroup_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertEqual(cgroup._controller_mountpoints,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/some/relative/path',
'memory': '/sys/fs/cgroup/memory/some/relative/path'})
def test_get_cgroup_from_relative_path_should_return_only_mounted_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertEqual(cgroup._controller_mountpoints,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'})
self.assertEqual(cgroup._controller_paths,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/some/relative/path'})
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertEqual(cgroup._controller_mountpoints, {})
self.assertEqual(cgroup._controller_paths, {})
def test_get_process_cgroup_should_return_the_correct_paths_for_cgroup_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths,
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
'memory': '/sys/fs/cgroup/memory/system.slice/walinuxagent.service'})
def test_get_process_cgroup_should_return_only_mounted_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'})
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service'})
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints, {})
self.assertEqual(cgroup._controller_paths, {})
def test_get_process_cgroup_should_return_only_mounted_process_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'relative/path'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/relative/path'})
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV1)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'})
self.assertEqual(cgroup._controller_paths, {})
@patch('time.sleep', side_effect=lambda _: mock_sleep())
def test_start_extension_cgroups_v1_command_should_return_the_command_output(self, _):
@ -354,17 +365,6 @@ class SystemdCgroupsApiv1TestCase(AgentTestCase):
class SystemdCgroupsApiv2TestCase(AgentTestCase):
def test_get_controllers_enabled_at_root_should_return_list_of_enabled_controllers(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._get_controllers_enabled_at_root('/sys/fs/cgroup'), ['cpuset', 'cpu', 'io', 'memory', 'pids'])
def test_get_controllers_enabled_at_root_should_return_empty_list_if_root_cgroup_path_is_None(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None):
cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._controllers_enabled_at_root, [])
def test_get_root_cgroup_path_should_return_v2_cgroup_root(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup_api = get_cgroup_api()
@ -374,97 +374,113 @@ class SystemdCgroupsApiv2TestCase(AgentTestCase):
with mock_cgroup_v2_environment(self.tmp_dir) as env:
# Mock an environment which has multiple v2 mountpoints
env.add_command(MockCommand(r"^findmnt -t cgroup2 --noheadings$",
'''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime
/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime
/custom/mountpoint/path2 none cgroup2 rw,relatime
'''))
'''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime
/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime
/custom/mountpoint/path2 none cgroup2 rw,relatime
'''))
cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._get_root_cgroup_path(), '/sys/fs/cgroup')
def test_get_unit_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self):
def test_get_controllers_enabled_at_root_should_return_list_of_agent_supported_and_enabled_controllers(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertEqual(cpu, '/sys/fs/cgroup/system.slice/extension.service',
"The cgroup path for the CPU controller is incorrect")
self.assertEqual(memory, '/sys/fs/cgroup/system.slice/extension.service',
"The cgroup path for the memory controller is incorrect")
cgroup_api = get_cgroup_api()
enabled_controllers = cgroup_api._get_controllers_enabled_at_root('/sys/fs/cgroup')
self.assertEqual(len(enabled_controllers), 2)
self.assertIn('cpu', enabled_controllers)
self.assertIn('memory', enabled_controllers)
def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self):
def test_get_controllers_enabled_at_root_should_return_empty_list_if_root_cgroup_path_is_empty(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertIn(cpu, '/sys/fs/cgroup/system.slice/extension.service',
"The cgroup path for the CPU controller is incorrect")
self.assertIsNone(memory,
"The cgroup path for the memory controller is None so unit cgroup should be None")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._controllers_enabled_at_root, [])
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')):
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
self.assertIsNone(cpu, "The cgroup path for the cpu controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/system.slice/extension.service',
"The cgroup path for the memory controller is incorrect")
def test_get_process_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self):
def test_get_process_relative_cgroup_path_should_return_relative_path(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service',
"The cgroup path for the CPU controller is incorrect")
self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service',
"The cgroup path for the memory controller is incorrect")
cgroup_api = get_cgroup_api()
self.assertEqual(cgroup_api._get_process_relative_cgroup_path(process_id="self"), "system.slice/walinuxagent.service")
def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self):
def test_get_unit_cgroup_should_return_correct_paths_for_cgroup_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service',
"The cgroup path for the CPU controller is incorrect")
self.assertIsNone(memory,
"The cgroup path for the memory controller is None so unit cgroup should be None")
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service")
self.assertEqual(len(cgroup._enabled_controllers), 2)
self.assertIn('cpu', cgroup._enabled_controllers)
self.assertIn('memory', cgroup._enabled_controllers)
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIsNone(cpu, "The cgroup path for the CPU controller is None so unit cgroup should be None")
self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service',
"The cgroup path for the memory controller is incorrect")
def test_get_process_cgroup_v2_path_should_return_None_if_relative_path_is_None(self):
def test_get_unit_cgroup_should_return_empty_paths_if_root_path_empty_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_process_cgroup_relative_paths', return_value=(None, None)):
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None")
self.assertIsNone(memory,
"The relative cgroup path for the memory controller is None so unit cgroup should be None")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._root_cgroup_path, "")
self.assertEqual(cgroup._cgroup_path, "")
self.assertEqual(len(cgroup._enabled_controllers), 0)
def test_get_controller_root_paths_should_return_the_cgroup_v2_root_cgroup_path(self):
def test_get_unit_cgroup_should_return_only_enabled_controllers_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect")
self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['cpu']):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service")
self.assertEqual(len(cgroup._enabled_controllers), 1)
self.assertIn('cpu', cgroup._enabled_controllers)
def test_get_controller_root_paths_should_return_None_if_root_cgroup_path_is_None(self):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=[]):
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "extension")
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service")
self.assertEqual(len(cgroup._enabled_controllers), 0)
def test_get_cgroup_from_relative_path_should_return_the_correct_paths_for_cgroup_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertIsNone(cpu, "The root cgroup path is None, so the CPU controller path should be None")
self.assertIsNone(memory, "The root cgroup path is None, so the memory controller path should be None")
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/some/relative/path")
self.assertEqual(len(cgroup._enabled_controllers), 2)
self.assertIn('cpu', cgroup._enabled_controllers)
self.assertIn('memory', cgroup._enabled_controllers)
def test_get_controller_root_paths_should_return_None_if_either_controller_not_enabled(self):
def test_get_cgroup_from_relative_path_should_return_empty_paths_if_root_path_empty_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['io', 'memory']):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertIsNone(cpu, "The CPU controller is not enabled, so the CPU controller path should be None")
self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
self.assertEqual(cgroup._root_cgroup_path, "")
self.assertEqual(cgroup._cgroup_path, "")
self.assertEqual(len(cgroup._enabled_controllers), 0)
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['cpu', 'io']):
cpu, memory = get_cgroup_api().get_controller_root_paths()
self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect")
self.assertIsNone(memory, "The memory controller is not enabled, so the memory controller path should be None")
def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v2_relative_paths(self):
def test_get_process_cgroup_should_return_the_correct_paths_for_cgroup_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self')
self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect")
self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect")
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/walinuxagent.service")
self.assertEqual(len(cgroup._enabled_controllers), 2)
self.assertIn('cpu', cgroup._enabled_controllers)
self.assertIn('memory', cgroup._enabled_controllers)
def test_get_process_cgroup_should_return_empty_paths_if_root_path_empty_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertIsInstance(cgroup, CgroupV2)
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
self.assertEqual(cgroup._root_cgroup_path, "")
self.assertEqual(cgroup._cgroup_path, "")
self.assertEqual(len(cgroup._enabled_controllers), 0)
class SystemdCgroupsApiMockedFileSystemTestCase(_MockedFileSystemTestCase):
@ -483,3 +499,176 @@ class SystemdCgroupsApiMockedFileSystemTestCase(_MockedFileSystemTestCase):
self.assertEqual(legacy_cgroups, 2, "cleanup_legacy_cgroups() did not find all the expected cgroups")
self.assertFalse(os.path.exists(legacy_cpu_cgroup), "cleanup_legacy_cgroups() did not remove the CPU legacy cgroup")
self.assertFalse(os.path.exists(legacy_memory_cgroup), "cleanup_legacy_cgroups() did not remove the memory legacy cgroup")
class CgroupsApiv1TestCase(AgentTestCase):
def test_get_supported_controllers_returns_v1_controllers(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
controllers = cgroup.get_supported_controllers()
self.assertEqual(len(controllers), 2)
self.assertIn('cpu,cpuacct', controllers)
self.assertIn('memory', controllers)
def test_check_in_expected_slice_returns_True_if_all_paths_in_expected_slice(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
def test_check_in_expected_slice_returns_False_if_any_paths_not_in_expected_slice(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service', 'memory': 'user.slice/walinuxagent.service'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': '', 'memory': ''}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice'))
def test_get_controller_metrics_returns_all_supported_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics()
self.assertEqual(len(metrics), 2)
self.assertIsInstance(metrics[0], CpuMetrics)
self.assertEqual(metrics[0].name, "walinuxagent")
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service")
self.assertIsInstance(metrics[1], MemoryMetrics)
self.assertEqual(metrics[1].name, "walinuxagent")
self.assertEqual(metrics[1].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service")
def test_get_controller_metrics_returns_only_mounted_controllers_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics()
self.assertEqual(len(metrics), 1)
self.assertIsInstance(metrics[0], CpuMetrics)
self.assertEqual(metrics[0].name, "walinuxagent")
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics()
self.assertEqual(len(metrics), 1)
self.assertIsInstance(metrics[0], MemoryMetrics)
self.assertEqual(metrics[0].name, "walinuxagent")
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics()
self.assertEqual(len(metrics), 0)
def test_get_controller_metrics_returns_only_controllers_at_expected_path_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service', 'memory': 'unexpected/path'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service")
self.assertEqual(len(metrics), 1)
self.assertIsInstance(metrics[0], CpuMetrics)
self.assertEqual(metrics[0].name, "walinuxagent")
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service")
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'unexpected/path', 'memory': 'unexpected/path'}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service")
self.assertEqual(len(metrics), 0)
def test_get_procs_path_returns_correct_path_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs_path = cgroup.get_controller_procs_path(controller='cpu,cpuacct')
self.assertEqual(procs_path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service/cgroup.procs")
procs_path = cgroup.get_controller_procs_path(controller='memory')
self.assertEqual(procs_path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service/cgroup.procs")
def test_get_processes_returns_processes_at_all_controller_paths_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertEqual(len(procs), 3)
self.assertIn(int(123), procs)
self.assertIn(int(234), procs)
self.assertIn(int(345), procs)
def test_get_processes_returns_empty_list_if_no_controllers_mounted_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertIsInstance(procs, list)
self.assertEqual(len(procs), 0)
def test_get_processes_returns_empty_list_if_procs_path_empty_v1(self):
with mock_cgroup_v1_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.CgroupV1.get_controller_procs_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertIsInstance(procs, list)
self.assertEqual(len(procs), 0)
class CgroupsApiv2TestCase(AgentTestCase):
def test_get_supported_controllers_returns_v2_controllers(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
controllers = cgroup.get_supported_controllers()
self.assertEqual(len(controllers), 2)
self.assertIn('cpu', controllers)
self.assertIn('memory', controllers)
def test_check_in_expected_slice_returns_True_if_cgroup_path_in_expected_slice(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
def test_check_in_expected_slice_returns_False_if_cgroup_path_not_in_expected_slice(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice'))
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_process_relative_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice'))
def test_get_procs_path_returns_empty_if_root_cgroup_empty_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs_path = cgroup.get_procs_path()
self.assertEqual(procs_path, "")
def test_get_procs_path_returns_correct_path_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs_path = cgroup.get_procs_path()
self.assertEqual(procs_path, "/sys/fs/cgroup/system.slice/walinuxagent.service/cgroup.procs")
def test_get_processes_returns_processes_at_all_controller_paths_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertEqual(len(procs), 3)
self.assertIn(int(123), procs)
self.assertIn(int(234), procs)
self.assertIn(int(345), procs)
def test_get_processes_returns_empty_list_if_root_cgroup_empty_v2(self):
with mock_cgroup_v2_environment(self.tmp_dir):
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
procs = cgroup.get_processes()
self.assertEqual(len(procs), 0)

Просмотреть файл

@ -27,7 +27,7 @@ import time
import threading
from azurelinuxagent.common import conf
from azurelinuxagent.ga.cgroup import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuCgroup
from azurelinuxagent.ga.controllermetrics import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuMetrics
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, DisableCgroups
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.event import WALAEventOperation
@ -272,7 +272,7 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \
'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \
CpuCgroup('Microsoft.CPlat.Extension',
CpuMetrics('Microsoft.CPlat.Extension',
'/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice')
configurator.remove_extension_slice(extension_name="Microsoft.CPlat.Extension")
@ -369,10 +369,10 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
configurator.setup_extension_slice(extension_name=extension_name, cpu_quota=5)
configurator.set_extension_services_cpu_memory_quota(service_list)
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \
CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \
'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \
CpuCgroup('Microsoft.CPlat.Extension',
CpuMetrics('Microsoft.CPlat.Extension',
'/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice')
configurator.disable("UNIT TEST", DisableCgroups.ALL)
@ -717,7 +717,7 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
with self._get_cgroup_configurator() as configurator:
with patch("os.path.exists") as mock_path:
mock_path.return_value = True
CGroupsTelemetry.track_cgroup(CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'))
CGroupsTelemetry.track_cgroup(CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'))
configurator.stop_tracking_extension_services_cgroups(service_list)
tracked = CGroupsTelemetry._tracked
@ -776,7 +776,7 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
with patch("os.path.exists") as mock_path:
mock_path.side_effect = side_effect
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \
CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
configurator.stop_tracking_unit_cgroups("extension.service")
tracked = CGroupsTelemetry._tracked
@ -911,7 +911,7 @@ exit 0
agent_processes = [os.getppid(), os.getpid()] + agent_command_processes + [start_extension.systemd_run_pid]
other_processes = [1, get_completed_process()] + extension_processes
with patch("azurelinuxagent.ga.cgroupapi._SystemdCgroupApi.get_processes_in_cgroup", return_value=agent_processes + other_processes):
with patch("azurelinuxagent.ga.cgroupapi.CgroupV1.get_processes", return_value=agent_processes + other_processes):
with self.assertRaises(CGroupsException) as context_manager:
configurator._check_processes_in_agent_cgroup()
@ -1012,7 +1012,7 @@ exit 0
with self.assertRaises(AgentMemoryExceededException) as context_manager:
with self._get_cgroup_configurator() as configurator:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_tracked_metrics") as tracked_metrics:
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_tracked_metrics") as tracked_metrics:
tracked_metrics.return_value = metrics
configurator.check_agent_memory_usage()

Просмотреть файл

@ -19,7 +19,7 @@ import os
import random
import time
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.utils import fileutil
from tests.lib.tools import AgentTestCase, data_dir, patch
@ -105,10 +105,10 @@ class TestCGroupsTelemetry(AgentTestCase):
@staticmethod
def _track_new_extension_cgroups(num_extensions):
for i in range(num_extensions):
dummy_cpu_cgroup = CpuCgroup("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i))
dummy_cpu_cgroup = CpuMetrics("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i))
CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)
dummy_memory_cgroup = MemoryCgroup("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i))
dummy_memory_cgroup = MemoryMetrics("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i))
CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)
def _assert_cgroups_are_tracked(self, num_extensions):
@ -136,12 +136,12 @@ class TestCGroupsTelemetry(AgentTestCase):
self._track_new_extension_cgroups(num_extensions)
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage:
with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage:
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active:
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage:
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage:
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage:
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage:
with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage:
with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
patch_is_active.return_value = True
current_cpu = 30
@ -163,10 +163,10 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected)
self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory, current_swap_memory)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active", return_value=False)
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active", return_value=False)
def test_telemetry_polling_with_inactive_cgroups(self, *_):
num_extensions = 5
no_extensions_expected = 0 # pylint: disable=unused-variable
@ -182,10 +182,10 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertEqual(len(metrics), 0)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage")
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage")
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage")
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active")
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage")
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage")
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
@patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active")
def test_telemetry_polling_with_changing_cgroups_state(self, patch_is_active, patch_get_cpu_usage, # pylint: disable=unused-argument
patch_get_mem, patch_get_max_mem, *args):
num_extensions = 5
@ -274,11 +274,11 @@ class TestCGroupsTelemetry(AgentTestCase):
CGroupsTelemetry.poll_all_tracked()
self.assertEqual(expected_call_count, patch_periodic_warn.call_count)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage")
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage")
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage")
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage")
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active")
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage")
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage")
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage")
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
@patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active")
def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, patch_try_memory_swap_usage,
*args): # pylint: disable=unused-argument
num_polls = 10
@ -321,13 +321,13 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertFalse(CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path"))
self.assertFalse(CGroupsTelemetry.is_tracked("not_present_memory_dummy_path"))
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror)
def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # pylint: disable=unused-argument
num_extensions = 5
self._track_new_extension_cgroups(num_extensions)
with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage:
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active:
with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage:
with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
patch_is_active.return_value = True
current_cpu = 30
@ -341,16 +341,16 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated
self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0)
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror)
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylint: disable=unused-argument
num_extensions = 5
self._track_new_extension_cgroups(num_extensions)
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage:
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active:
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage:
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage:
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage:
with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
patch_is_active.return_value = True
current_memory = 209715200
@ -367,14 +367,14 @@ class TestCGroupsTelemetry(AgentTestCase):
self.assertEqual(len(metrics), num_extensions * 3)
self._assert_polled_metrics_equal(metrics, 0, current_memory, current_max_memory, current_swap_memory)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror)
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror)
def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # pylint: disable=unused-argument
num_extensions = 5
self._track_new_extension_cgroups(num_extensions)
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active:
with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
patch_is_active.return_value = False
poll_count = 1
@ -383,9 +383,9 @@ class TestCGroupsTelemetry(AgentTestCase):
metrics = CGroupsTelemetry.poll_all_tracked()
self.assertEqual(0, len(metrics))
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage")
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_throttled_time")
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active")
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_throttled_time")
@patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active")
def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_active, path_get_throttled_time, patch_get_cpu_usage):
num_polls = 5
@ -396,7 +396,7 @@ class TestCGroupsTelemetry(AgentTestCase):
cpu_percent_values.append(-1)
cpu_throttled_values = [random.randint(0, 60 * 60) for _ in range(num_polls)]
dummy_cpu_cgroup = CpuCgroup("dummy_extension_name", "dummy_cpu_path")
dummy_cpu_cgroup = CpuMetrics("dummy_extension_name", "dummy_cpu_path")
CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)
self.assertEqual(1, len(CGroupsTelemetry._tracked))

Просмотреть файл

@ -18,7 +18,7 @@ import contextlib
import os
from azurelinuxagent.common import logger, conf
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
from azurelinuxagent.common.logger import Logger
from azurelinuxagent.common.protocol.util import ProtocolUtil
@ -197,8 +197,8 @@ def _create_log_collector_monitor_handler(iterations=1):
monitor_log_collector.join()
cgroups = [
CpuCgroup("test", "dummy_cpu_path"),
MemoryCgroup("test", "dummy_memory_path")
CpuMetrics("test", "dummy_cpu_path"),
MemoryMetrics("test", "dummy_memory_path")
]
monitor_log_collector = get_log_collector_monitor_handler(cgroups)
monitor_log_collector.run_and_wait = run_and_wait

Просмотреть файл

@ -22,7 +22,7 @@ import os
import random
import shutil
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricsCounter, CounterNotFound
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricsCounter, CounterNotFound
from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils import fileutil
@ -36,35 +36,35 @@ def consume_cpu_time():
return waste
class TestCGroup(AgentTestCase):
class TestControllerMetrics(AgentTestCase):
def test_is_active(self):
test_cgroup = CpuCgroup("test_extension", self.tmp_dir)
self.assertEqual(False, test_cgroup.is_active())
test_metrics = CpuMetrics("test_extension", self.tmp_dir)
self.assertEqual(False, test_metrics.is_active())
with open(os.path.join(self.tmp_dir, "tasks"), mode="wb") as tasks:
tasks.write(str(1000).encode())
self.assertEqual(True, test_cgroup.is_active())
self.assertEqual(True, test_metrics.is_active())
@patch("azurelinuxagent.common.logger.periodic_warn")
def test_is_active_file_not_present(self, patch_periodic_warn):
test_cgroup = CpuCgroup("test_extension", self.tmp_dir)
self.assertEqual(False, test_cgroup.is_active())
test_metrics = CpuMetrics("test_extension", self.tmp_dir)
self.assertEqual(False, test_metrics.is_active())
test_cgroup = MemoryCgroup("test_extension", os.path.join(self.tmp_dir, "this_cgroup_does_not_exist"))
self.assertEqual(False, test_cgroup.is_active())
test_metrics = MemoryMetrics("test_extension", os.path.join(self.tmp_dir, "this_cgroup_does_not_exist"))
self.assertEqual(False, test_metrics.is_active())
self.assertEqual(0, patch_periodic_warn.call_count)
@patch("azurelinuxagent.common.logger.periodic_warn")
def test_is_active_incorrect_file(self, patch_periodic_warn):
open(os.path.join(self.tmp_dir, "tasks"), mode="wb").close()
test_cgroup = CpuCgroup("test_extension", os.path.join(self.tmp_dir, "tasks"))
self.assertEqual(False, test_cgroup.is_active())
test_metrics = CpuMetrics("test_extension", os.path.join(self.tmp_dir, "tasks"))
self.assertEqual(False, test_metrics.is_active())
self.assertEqual(1, patch_periodic_warn.call_count)
class TestCpuCgroup(AgentTestCase):
class TestCpuMetrics(AgentTestCase):
@classmethod
def setUpClass(cls):
AgentTestCase.setUpClass()
@ -96,147 +96,147 @@ class TestCpuCgroup(AgentTestCase):
def setUp(self):
AgentTestCase.setUp(self)
TestCpuCgroup.mock_read_file_map.clear()
TestCpuMetrics.mock_read_file_map.clear()
def test_initialize_cpu_usage_should_set_current_cpu_usage(self):
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
TestCpuCgroup.mock_read_file_map = {
TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
}
cgroup.initialize_cpu_usage()
metrics.initialize_cpu_usage()
self.assertEqual(cgroup._current_cgroup_cpu, 63763)
self.assertEqual(cgroup._current_system_cpu, 5496872)
self.assertEqual(metrics._current_cgroup_cpu, 63763)
self.assertEqual(metrics._current_system_cpu, 5496872)
def test_get_cpu_usage_should_return_the_cpu_usage_since_its_last_invocation(self):
osutil = get_osutil()
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
TestCpuCgroup.mock_read_file_map = {
TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
}
cgroup.initialize_cpu_usage()
metrics.initialize_cpu_usage()
TestCpuCgroup.mock_read_file_map = {
TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t1"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t1")
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t1")
}
cpu_usage = cgroup.get_cpu_usage()
cpu_usage = metrics.get_cpu_usage()
self.assertEqual(cpu_usage, round(100.0 * 0.000307697876885 * osutil.get_processor_cores(), 3))
TestCpuCgroup.mock_read_file_map = {
TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t2"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t2")
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t2")
}
cpu_usage = cgroup.get_cpu_usage()
cpu_usage = metrics.get_cpu_usage()
self.assertEqual(cpu_usage, round(100.0 * 0.000445181085968 * osutil.get_processor_cores(), 3))
def test_initialize_cpu_usage_should_set_the_cgroup_usage_to_0_when_the_cgroup_does_not_exist(self):
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
io_error_2 = IOError()
io_error_2.errno = errno.ENOENT # "No such directory"
TestCpuCgroup.mock_read_file_map = {
TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
os.path.join(cgroup.path, "cpuacct.stat"): io_error_2
os.path.join(metrics.path, "cpuacct.stat"): io_error_2
}
cgroup.initialize_cpu_usage()
metrics.initialize_cpu_usage()
self.assertEqual(cgroup._current_cgroup_cpu, 0)
self.assertEqual(cgroup._current_system_cpu, 5496872) # check the system usage just for test sanity
self.assertEqual(metrics._current_cgroup_cpu, 0)
self.assertEqual(metrics._current_system_cpu, 5496872) # check the system usage just for test sanity
def test_initialize_cpu_usage_should_raise_an_exception_when_called_more_than_once(self):
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
TestCpuCgroup.mock_read_file_map = {
TestCpuMetrics.mock_read_file_map = {
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
}
cgroup.initialize_cpu_usage()
metrics.initialize_cpu_usage()
with self.assertRaises(CGroupsException):
cgroup.initialize_cpu_usage()
metrics.initialize_cpu_usage()
def test_get_cpu_usage_should_raise_an_exception_when_initialize_cpu_usage_has_not_been_invoked(self):
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
with self.assertRaises(CGroupsException):
cpu_usage = cgroup.get_cpu_usage() # pylint: disable=unused-variable
cpu_usage = metrics.get_cpu_usage() # pylint: disable=unused-variable
def test_get_throttled_time_should_return_the_value_since_its_last_invocation(self):
test_file = os.path.join(self.tmp_dir, "cpu.stat")
shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"), test_file) # throttled_time = 50
cgroup = CpuCgroup("test", self.tmp_dir)
cgroup.initialize_cpu_usage()
metrics = CpuMetrics("test", self.tmp_dir)
metrics.initialize_cpu_usage()
shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t1"), test_file) # throttled_time = 2075541442327
throttled_time = cgroup.get_cpu_throttled_time()
throttled_time = metrics.get_cpu_throttled_time()
self.assertEqual(throttled_time, float(2075541442327 - 50) / 1E9, "The value of throttled_time is incorrect")
def test_get_tracked_metrics_should_return_the_throttled_time(self):
cgroup = CpuCgroup("test", os.path.join(data_dir, "cgroups"))
cgroup.initialize_cpu_usage()
metrics = CpuMetrics("test", os.path.join(data_dir, "cgroups"))
metrics.initialize_cpu_usage()
def find_throttled_time(metrics):
return [m for m in metrics if m.counter == MetricsCounter.THROTTLED_TIME]
found = find_throttled_time(cgroup.get_tracked_metrics())
found = find_throttled_time(metrics.get_tracked_metrics())
self.assertTrue(len(found) == 0, "get_tracked_metrics should not fetch the throttled time by default. Found: {0}".format(found))
found = find_throttled_time(cgroup.get_tracked_metrics(track_throttled_time=True))
found = find_throttled_time(metrics.get_tracked_metrics(track_throttled_time=True))
self.assertTrue(len(found) == 1, "get_tracked_metrics should have fetched the throttled time by default. Found: {0}".format(found))
class TestMemoryCgroup(AgentTestCase):
class TestMemoryMetrics(AgentTestCase):
def test_get_metrics(self):
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "memory_mount"))
test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "memory_mount"))
memory_usage = test_mem_cg.get_memory_usage()
memory_usage = test_mem_metrics.get_memory_usage()
self.assertEqual(150000, memory_usage)
max_memory_usage = test_mem_cg.get_max_memory_usage()
max_memory_usage = test_mem_metrics.get_max_memory_usage()
self.assertEqual(1000000, max_memory_usage)
swap_memory_usage = test_mem_cg.try_swap_memory_usage()
swap_memory_usage = test_mem_metrics.try_swap_memory_usage()
self.assertEqual(20000, swap_memory_usage)
def test_get_metrics_when_files_not_present(self):
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups"))
test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups"))
with self.assertRaises(IOError) as e:
test_mem_cg.get_memory_usage()
test_mem_metrics.get_memory_usage()
self.assertEqual(e.exception.errno, errno.ENOENT)
with self.assertRaises(IOError) as e:
test_mem_cg.get_max_memory_usage()
test_mem_metrics.get_max_memory_usage()
self.assertEqual(e.exception.errno, errno.ENOENT)
with self.assertRaises(IOError) as e:
test_mem_cg.try_swap_memory_usage()
test_mem_metrics.try_swap_memory_usage()
self.assertEqual(e.exception.errno, errno.ENOENT)
def test_get_memory_usage_counters_not_found(self):
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters"))
test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters"))
with self.assertRaises(CounterNotFound):
test_mem_cg.get_memory_usage()
test_mem_metrics.get_memory_usage()
swap_memory_usage = test_mem_cg.try_swap_memory_usage()
swap_memory_usage = test_mem_metrics.try_swap_memory_usage()
self.assertEqual(0, swap_memory_usage)

Просмотреть файл

@ -21,7 +21,7 @@ import random
import string
from azurelinuxagent.common import event, logger
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue, _REPORT_EVERY_HOUR
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue, _REPORT_EVERY_HOUR
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
from azurelinuxagent.common.event import EVENTS_DIRECTORY
from azurelinuxagent.common.protocol.healthservice import HealthService
@ -222,7 +222,7 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
self.assertEqual(0, patch_add_metric.call_count)
@patch('azurelinuxagent.common.event.EventLogger.add_metric')
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage")
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage")
@patch('azurelinuxagent.common.logger.Logger.periodic_warn')
def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument
patch_get_memory_usage,
@ -231,14 +231,14 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
ioerror.errno = 2
patch_get_memory_usage.side_effect = ioerror
CGroupsTelemetry._tracked["/test/path"] = MemoryCgroup("cgroup_name", "/test/path")
CGroupsTelemetry._tracked["/test/path"] = MemoryMetrics("_cgroup_name", "/test/path")
PollResourceUsage().run()
self.assertEqual(0, patch_periodic_warn.call_count)
self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent.
@patch('azurelinuxagent.common.event.EventLogger.add_metric')
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage")
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
@patch('azurelinuxagent.common.logger.Logger.periodic_warn')
def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument
patch_cpu_usage, patch_add_metric,
@ -247,7 +247,7 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
ioerror.errno = 2
patch_cpu_usage.side_effect = ioerror
CGroupsTelemetry._tracked["/test/path"] = CpuCgroup("cgroup_name", "/test/path")
CGroupsTelemetry._tracked["/test/path"] = CpuMetrics("_cgroup_name", "/test/path")
PollResourceUsage().run()
self.assertEqual(0, patch_periodic_warn.call_count)

Просмотреть файл

@ -122,7 +122,9 @@ _MOCKED_COMMANDS_HYBRID = [
_MOCKED_FILES_V1 = [
("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_self_cgroup')),
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup'))
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup')),
(r"/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs')),
(r"/sys/fs/cgroup/memory/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs'))
]
_MOCKED_FILES_V2 = [
@ -130,7 +132,8 @@ _MOCKED_FILES_V2 = [
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v2', 'proc_pid_cgroup')),
("/sys/fs/cgroup/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')),
("/sys/fs/cgroup/azure.slice/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')),
("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty'))
("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty')),
(r"/sys/fs/cgroup/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs'))
]
_MOCKED_FILES_HYBRID = [

Просмотреть файл

@ -24,8 +24,9 @@ from azurelinuxagent.common import conf
from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, InvalidCgroupMountpointException
from azurelinuxagent.ga.cgroupapi import InvalidCgroupMountpointException, CgroupV1
from azurelinuxagent.ga.collect_logs import CollectLogsHandler
from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR
from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment
from tests.lib.tools import AgentTestCase, data_dir, Mock, patch
@ -247,16 +248,24 @@ class TestAgent(AgentTestCase):
CollectLogsHandler.enable_monitor_cgroups_check()
mock_log_collector.run = Mock()
# Mock cgroup paths so process is in the log collector slice
def mock_cgroup_paths(*args, **kwargs):
if args and args[0] == "self":
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
return (relative_path, relative_path)
return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs)
# Mock cgroup so process is in the log collector slice
def mock_cgroup(*args, **kwargs): # pylint: disable=W0613
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
return CgroupV1(
cgroup_name=AGENT_LOG_COLLECTOR,
controller_mountpoints={
'cpu,cpuacct':"/sys/fs/cgroup/cpu,cpuacct",
'memory':"/sys/fs/cgroup/memory"
},
controller_paths={
'cpu,cpuacct':"/sys/fs/cgroup/cpu,cpuacct/{0}".format(relative_path),
'memory':"/sys/fs/cgroup/memory/{0}".format(relative_path)
}
)
with mock_cgroup_v1_environment(self.tmp_dir):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths",
side_effect=mock_cgroup_paths):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup",
side_effect=mock_cgroup):
agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf"))
agent.collect_logs(is_full_mode=True)
@ -296,17 +305,26 @@ class TestAgent(AgentTestCase):
CollectLogsHandler.enable_monitor_cgroups_check()
mock_log_collector.run = Mock()
# Mock cgroup paths so process is in incorrect slice
def mock_cgroup_paths(*args, **kwargs):
if args and args[0] == "self":
return ("NOT_THE_CORRECT_PATH", "NOT_THE_CORRECT_PATH")
return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs)
# Mock cgroup so process is in incorrect slice
def mock_cgroup(*args, **kwargs): # pylint: disable=W0613
relative_path = "NOT_THE_CORRECT_PATH"
return CgroupV1(
cgroup_name=AGENT_LOG_COLLECTOR,
controller_mountpoints={
'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct",
'memory': "/sys/fs/cgroup/memory"
},
controller_paths={
'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct/{0}".format(relative_path),
'memory': "/sys/fs/cgroup/memory/{0}".format(relative_path)
}
)
def raise_on_sys_exit(*args):
raise RuntimeError(args[0] if args else "Exiting")
with mock_cgroup_v1_environment(self.tmp_dir):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", side_effect=mock_cgroup_paths):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup", side_effect=mock_cgroup):
agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf"))
with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit:
@ -346,19 +364,25 @@ class TestAgent(AgentTestCase):
CollectLogsHandler.enable_monitor_cgroups_check()
mock_log_collector.run = Mock()
# Mock cgroup paths so process is in the log collector slice and cpu is not mounted
def mock_cgroup_paths(*args, **kwargs):
if args and args[0] == "self":
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
return (None, relative_path)
return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs)
# Mock cgroup so process is in the log collector slice and cpu is not mounted
def mock_cgroup(*args, **kwargs): # pylint: disable=W0613
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
return CgroupV1(
cgroup_name=AGENT_LOG_COLLECTOR,
controller_mountpoints={
'memory': "/sys/fs/cgroup/memory"
},
controller_paths={
'memory': "/sys/fs/cgroup/memory/{0}".format(relative_path)
}
)
def raise_on_sys_exit(*args):
raise RuntimeError(args[0] if args else "Exiting")
with mock_cgroup_v1_environment(self.tmp_dir):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths",
side_effect=mock_cgroup_paths):
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup",
side_effect=mock_cgroup):
agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf"))
with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit:

Просмотреть файл

@ -7,7 +7,7 @@ from assertpy import assert_that, fail
from azurelinuxagent.common.osutil import systemd
from azurelinuxagent.common.utils import shellutil
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION
from azurelinuxagent.ga.cgroupapi import get_cgroup_api
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, SystemdCgroupApiv1
from tests_e2e.tests.lib.agent_log import AgentLog
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry_if_false
@ -164,9 +164,14 @@ def check_log_message(message, after_timestamp=datetime.datetime.min):
return False
def get_unit_cgroup_paths(unit_name):
def get_unit_cgroup_proc_path(unit_name, controller):
"""
Returns the cgroup paths for the given unit
Returns the cgroup.procs path for the given unit and controller.
"""
cgroups_api = get_cgroup_api()
return cgroups_api.get_unit_cgroup_paths(unit_name)
unit_cgroup = cgroups_api.get_unit_cgroup(unit_name=unit_name, cgroup_name="test cgroup")
if isinstance(cgroups_api, SystemdCgroupApiv1):
return unit_cgroup.get_controller_procs_path(controller=controller)
else:
return unit_cgroup.get_procs_path()

Просмотреть файл

@ -18,14 +18,13 @@
# This script forces the process check by putting unknown process in the agent's cgroup
import os
import subprocess
import datetime
from assertpy import fail
from azurelinuxagent.common.utils import shellutil
from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_paths, AGENT_SERVICE_NAME
from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_proc_path, AGENT_SERVICE_NAME
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry_if_false
@ -62,8 +61,8 @@ def disable_agent_cgroups_with_unknown_process(pid):
Note: System may kick the added process out of the cgroups, keeps adding until agent detect that process
"""
def unknown_process_found(cpu_cgroup):
cgroup_procs_path = os.path.join(cpu_cgroup, "cgroup.procs")
def unknown_process_found():
cgroup_procs_path = get_unit_cgroup_proc_path(AGENT_SERVICE_NAME, 'cpu,cpuacct')
log.info("Adding dummy process %s to cgroup.procs file %s", pid, cgroup_procs_path)
try:
with open(cgroup_procs_path, 'a') as f:
@ -81,9 +80,7 @@ def disable_agent_cgroups_with_unknown_process(pid):
pid)), attempts=3)
return found and retry_if_false(check_agent_quota_disabled, attempts=3)
cpu_cgroup, _ = get_unit_cgroup_paths(AGENT_SERVICE_NAME)
found: bool = retry_if_false(lambda: unknown_process_found(cpu_cgroup), attempts=3)
found: bool = retry_if_false(unknown_process_found, attempts=3)
if not found:
fail("The agent did not detect unknown process: {0}".format(pid))