зеркало из https://github.com/Azure/WALinuxAgent.git
refactor cgroup controllers (#3135)
* refactor cgroup controllers (#13) * Refactor Cgroup, CpuCgroup, MemoryCgroup to ControllerMetrics, CpuMetrics, MemoryMetrics * Create methods to get unit/process cgroup representation * Refactoring changes * Refactoring changes * Fix e2e test * Fix unintentional comment change * Remove unneeded comments * Clean up comments and make code more readable * Simplify get controller metrics * Clean up cgroupapi * Cleanup cgroup -> controllermetrics changes * Clean up cgroup configurator * Fix unit tests for agent.py * Fix cgroupapi tests * Fix cgroupconfigurator and tests * Rename controller metrics tests * Ignore pylint issues * Improve test coverage for cgroupapi * Rename cgroup to metrics * Update cgroup.procs to accurately represent file * Do not track metrics if controller is not mounted * We should set cpu quota before tracking cpu metrics * Pylint * address pr comments (#14) * Address Nag's comments * pyling * pylint * remove lambda (#15)
This commit is contained in:
Родитель
cc6501d6dd
Коммит
610e12b3f1
|
@ -31,7 +31,7 @@ import threading
|
|||
|
||||
from azurelinuxagent.common.exception import CGroupsException
|
||||
from azurelinuxagent.ga import logcollector, cgroupconfigurator
|
||||
from azurelinuxagent.ga.cgroup import AGENT_LOG_COLLECTOR, CpuCgroup, MemoryCgroup
|
||||
from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR, CpuMetrics
|
||||
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, log_cgroup_warning, InvalidCgroupMountpointException
|
||||
|
||||
import azurelinuxagent.common.conf as conf
|
||||
|
@ -208,8 +208,7 @@ class Agent(object):
|
|||
|
||||
# Check the cgroups unit
|
||||
log_collector_monitor = None
|
||||
cpu_cgroup_path = None
|
||||
memory_cgroup_path = None
|
||||
tracked_metrics = []
|
||||
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
|
||||
try:
|
||||
cgroup_api = get_cgroup_api()
|
||||
|
@ -220,40 +219,27 @@ class Agent(object):
|
|||
log_cgroup_warning("Unable to determine which cgroup version to use: {0}".format(ustr(e)), send_event=True)
|
||||
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
|
||||
|
||||
cpu_cgroup_path, memory_cgroup_path = cgroup_api.get_process_cgroup_paths("self")
|
||||
cpu_slice_matches = False
|
||||
memory_slice_matches = False
|
||||
if cpu_cgroup_path is not None:
|
||||
cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path)
|
||||
if memory_cgroup_path is not None:
|
||||
memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path)
|
||||
|
||||
if not cpu_slice_matches or not memory_slice_matches:
|
||||
log_cgroup_warning("The Log Collector process is not in the proper cgroups:", send_event=False)
|
||||
if not cpu_slice_matches:
|
||||
log_cgroup_warning("\tunexpected cpu slice: {0}".format(cpu_cgroup_path), send_event=False)
|
||||
if not memory_slice_matches:
|
||||
log_cgroup_warning("\tunexpected memory slice: {0}".format(memory_cgroup_path), send_event=False)
|
||||
log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
|
||||
tracked_metrics = log_collector_cgroup.get_controller_metrics()
|
||||
|
||||
if len(tracked_metrics) != len(log_collector_cgroup.get_supported_controllers()):
|
||||
log_cgroup_warning("At least one required controller is missing. The following controllers are required for the log collector to run: {0}".format(log_collector_cgroup.get_supported_controllers()))
|
||||
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
|
||||
|
||||
def initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path):
|
||||
cpu_cgroup = CpuCgroup(AGENT_LOG_COLLECTOR, cpu_cgroup_path)
|
||||
msg = "Started tracking cpu cgroup {0}".format(cpu_cgroup)
|
||||
logger.info(msg)
|
||||
cpu_cgroup.initialize_cpu_usage()
|
||||
memory_cgroup = MemoryCgroup(AGENT_LOG_COLLECTOR, memory_cgroup_path)
|
||||
msg = "Started tracking memory cgroup {0}".format(memory_cgroup)
|
||||
logger.info(msg)
|
||||
return [cpu_cgroup, memory_cgroup]
|
||||
if not log_collector_cgroup.check_in_expected_slice(cgroupconfigurator.LOGCOLLECTOR_SLICE):
|
||||
log_cgroup_warning("The Log Collector process is not in the proper cgroups", send_event=False)
|
||||
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
|
||||
|
||||
try:
|
||||
log_collector = LogCollector(is_full_mode)
|
||||
# Running log collector resource(CPU, Memory) monitoring only if agent starts the log collector.
|
||||
# Running log collector resource monitoring only if agent starts the log collector.
|
||||
# If Log collector start by any other means, then it will not be monitored.
|
||||
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
|
||||
tracked_cgroups = initialize_cgroups_tracking(cpu_cgroup_path, memory_cgroup_path)
|
||||
log_collector_monitor = get_log_collector_monitor_handler(tracked_cgroups)
|
||||
for metric in tracked_metrics:
|
||||
if isinstance(metric, CpuMetrics):
|
||||
metric.initialize_cpu_usage()
|
||||
break
|
||||
log_collector_monitor = get_log_collector_monitor_handler(tracked_metrics)
|
||||
log_collector_monitor.run()
|
||||
archive = log_collector.collect_logs_and_get_archive()
|
||||
logger.info("Log collection successfully completed. Archive can be found at {0} "
|
||||
|
|
|
@ -24,7 +24,7 @@ import uuid
|
|||
|
||||
from azurelinuxagent.common import logger
|
||||
from azurelinuxagent.common.event import WALAEventOperation, add_event
|
||||
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup
|
||||
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics
|
||||
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
|
||||
from azurelinuxagent.common.conf import get_agent_pid_file_path
|
||||
from azurelinuxagent.common.exception import CGroupsException, ExtensionErrorCodes, ExtensionError, \
|
||||
|
@ -185,14 +185,14 @@ def get_cgroup_api():
|
|||
if available_unified_controllers != "":
|
||||
raise CGroupsException("Detected hybrid cgroup mode, but there are controllers available to be enabled in unified hierarchy: {0}".format(available_unified_controllers))
|
||||
|
||||
cgroup_api = SystemdCgroupApiv1()
|
||||
cgroup_api_v1 = SystemdCgroupApiv1()
|
||||
# Previously the agent supported users mounting cgroup v1 controllers in locations other than the systemd
|
||||
# default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If either the cpu or memory
|
||||
# controller is mounted in a location other than the systemd default, raise Exception.
|
||||
if not cgroup_api.are_mountpoints_systemd_created():
|
||||
raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api.get_controller_root_paths())))
|
||||
# default ('/sys/fs/cgroup'). The agent no longer supports this scenario. If any agent supported controller is
|
||||
# mounted in a location other than the systemd default, raise Exception.
|
||||
if not cgroup_api_v1.are_mountpoints_systemd_created():
|
||||
raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api_v1.get_controller_mountpoints())))
|
||||
log_cgroup_info("Using cgroup v1 for resource enforcement and monitoring")
|
||||
return cgroup_api
|
||||
return cgroup_api_v1
|
||||
|
||||
raise CGroupsException("{0} has an unexpected file type: {1}".format(CGROUP_FILE_SYSTEM_ROOT, root_hierarchy_mode))
|
||||
|
||||
|
@ -202,7 +202,6 @@ class _SystemdCgroupApi(object):
|
|||
Cgroup interface via systemd. Contains common api implementations between cgroup v1 and v2.
|
||||
"""
|
||||
def __init__(self):
|
||||
self._agent_unit_name = None
|
||||
self._systemd_run_commands = []
|
||||
self._systemd_run_commands_lock = threading.RLock()
|
||||
|
||||
|
@ -213,55 +212,36 @@ class _SystemdCgroupApi(object):
|
|||
with self._systemd_run_commands_lock:
|
||||
return self._systemd_run_commands[:]
|
||||
|
||||
def get_controller_root_paths(self):
|
||||
def get_unit_cgroup(self, unit_name, cgroup_name):
|
||||
"""
|
||||
Cgroup version specific. Returns a tuple with the root paths for the cpu and memory controllers; the values can
|
||||
be None if the corresponding controller is not mounted or enabled at the root cgroup.
|
||||
Cgroup version specific. Returns a representation of the unit cgroup.
|
||||
|
||||
:param unit_name: The unit to return the cgroup of.
|
||||
:param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_unit_cgroup_paths(self, unit_name):
|
||||
def get_cgroup_from_relative_path(self, relative_path, cgroup_name):
|
||||
"""
|
||||
Returns a tuple with the path of the cpu and memory cgroups for the given unit.
|
||||
The values returned can be None if the controller is not mounted or enabled.
|
||||
Cgroup version specific. Returns a representation of the cgroup at the provided relative path.
|
||||
|
||||
:param relative_path: The relative path to return the cgroup of.
|
||||
:param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes.
|
||||
"""
|
||||
# Ex: ControlGroup=/azure.slice/walinuxagent.service
|
||||
# controlgroup_path[1:] = azure.slice/walinuxagent.service
|
||||
controlgroup_path = systemd.get_unit_property(unit_name, "ControlGroup")
|
||||
cpu_root_path, memory_root_path = self.get_controller_root_paths()
|
||||
raise NotImplementedError()
|
||||
|
||||
cpu_cgroup_path = os.path.join(cpu_root_path, controlgroup_path[1:]) \
|
||||
if cpu_root_path is not None else None
|
||||
|
||||
memory_cgroup_path = os.path.join(memory_root_path, controlgroup_path[1:]) \
|
||||
if memory_root_path is not None else None
|
||||
|
||||
return cpu_cgroup_path, memory_cgroup_path
|
||||
|
||||
def get_process_cgroup_paths(self, process_id):
|
||||
def get_process_cgroup(self, process_id, cgroup_name):
|
||||
"""
|
||||
Returns a tuple with the path of the cpu and memory cgroups for the given process.
|
||||
The 'process_id' can be a numeric PID or the string "self" for the current process.
|
||||
The values returned can be None if the controller is not mounted or enabled.
|
||||
Cgroup version specific. Returns a representation of the process' cgroup.
|
||||
|
||||
:param process_id: A numeric PID to return the cgroup of, or the string "self" to return the cgroup of the current process.
|
||||
:param cgroup_name: A name to represent the cgroup. Used for logging/tracking purposes.
|
||||
"""
|
||||
cpu_cgroup_relative_path, memory_cgroup_relative_path = self.get_process_cgroup_relative_paths(process_id)
|
||||
raise NotImplementedError()
|
||||
|
||||
cpu_root_path, memory_root_path = self.get_controller_root_paths()
|
||||
|
||||
cpu_cgroup_path = os.path.join(cpu_root_path, cpu_cgroup_relative_path) \
|
||||
if cpu_root_path is not None and cpu_cgroup_relative_path is not None else None
|
||||
|
||||
memory_cgroup_path = os.path.join(memory_root_path, memory_cgroup_relative_path) \
|
||||
if memory_root_path is not None and memory_cgroup_relative_path is not None else None
|
||||
|
||||
return cpu_cgroup_path, memory_cgroup_path
|
||||
|
||||
def get_process_cgroup_relative_paths(self, process_id):
|
||||
def log_root_paths(self):
|
||||
"""
|
||||
Cgroup version specific. Returns a tuple with the path of the cpu and memory cgroups for the given process
|
||||
(relative to the root path of the corresponding controller).
|
||||
The 'process_id' can be a numeric PID or the string "self" for the current process.
|
||||
The values returned can be None if the controller is not mounted or enabled.
|
||||
Cgroup version specific. Logs the root paths of the cgroup filesystem/controllers.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
@ -279,11 +259,6 @@ class _SystemdCgroupApi(object):
|
|||
unit_not_found = "Unit {0} not found.".format(scope_name)
|
||||
return unit_not_found in stderr or scope_name not in stderr
|
||||
|
||||
@staticmethod
|
||||
def get_processes_in_cgroup(cgroup_path):
|
||||
with open(os.path.join(cgroup_path, "cgroup.procs"), "r") as cgroup_procs:
|
||||
return [int(pid) for pid in cgroup_procs.read().split()]
|
||||
|
||||
|
||||
class SystemdCgroupApiv1(_SystemdCgroupApi):
|
||||
"""
|
||||
|
@ -293,7 +268,8 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
|
|||
super(SystemdCgroupApiv1, self).__init__()
|
||||
self._cgroup_mountpoints = self._get_controller_mountpoints()
|
||||
|
||||
def _get_controller_mountpoints(self):
|
||||
@staticmethod
|
||||
def _get_controller_mountpoints():
|
||||
"""
|
||||
In v1, each controller is mounted at a different path. Use findmnt to get each path.
|
||||
|
||||
|
@ -304,7 +280,8 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
|
|||
/sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct
|
||||
etc
|
||||
|
||||
Returns a dictionary of the controller-path mappings.
|
||||
Returns a dictionary of the controller-path mappings. The dictionary only includes the controllers which are
|
||||
supported by the agent.
|
||||
"""
|
||||
mount_points = {}
|
||||
for line in shellutil.run_command(['findmnt', '-t', 'cgroup', '--noheadings']).splitlines():
|
||||
|
@ -315,51 +292,91 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
|
|||
if match is not None:
|
||||
path = match.group('path')
|
||||
controller = match.group('controller')
|
||||
if controller is not None and path is not None:
|
||||
if controller is not None and path is not None and controller in CgroupV1.get_supported_controllers():
|
||||
mount_points[controller] = path
|
||||
return mount_points
|
||||
|
||||
def get_controller_mountpoints(self):
|
||||
"""
|
||||
Returns a dictionary of controller-mountpoint mappings.
|
||||
"""
|
||||
return self._cgroup_mountpoints
|
||||
|
||||
def are_mountpoints_systemd_created(self):
|
||||
"""
|
||||
Systemd mounts each controller at '/sys/fs/cgroup/<controller>'. Returns True if both cpu and memory
|
||||
mountpoints match this pattern, False otherwise.
|
||||
Systemd mounts each controller at '/sys/fs/cgroup/<controller>'. Returns True if all mounted controllers which
|
||||
are supported by the agent have mountpoints which match this pattern, False otherwise.
|
||||
|
||||
The agent does not support cgroup usage if the default root systemd mountpoint (/sys/fs/cgroup) is not used.
|
||||
This method is used to check if any users are using non-systemd mountpoints. If they are, the agent drop-in
|
||||
files will be cleaned up in cgroupconfigurator.
|
||||
"""
|
||||
cpu_mountpoint = self._cgroup_mountpoints.get('cpu,cpuacct')
|
||||
memory_mountpoint = self._cgroup_mountpoints.get('memory')
|
||||
if cpu_mountpoint is not None and cpu_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'cpu,cpuacct'):
|
||||
return False
|
||||
if memory_mountpoint is not None and memory_mountpoint != os.path.join(CGROUP_FILE_SYSTEM_ROOT, 'memory'):
|
||||
return False
|
||||
for controller, mount_point in self._cgroup_mountpoints.items():
|
||||
if mount_point != os.path.join(CGROUP_FILE_SYSTEM_ROOT, controller):
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_controller_root_paths(self):
|
||||
# Return a tuple representing the mountpoints for cpu and memory. Either should be None if the corresponding
|
||||
# controller is not mounted.
|
||||
return self._cgroup_mountpoints.get('cpu,cpuacct'), self._cgroup_mountpoints.get('memory')
|
||||
@staticmethod
|
||||
def _get_process_relative_controller_paths(process_id):
|
||||
"""
|
||||
Returns the relative paths of the cgroup for the given process as a dict of controller-path mappings. The result
|
||||
only includes controllers which are supported.
|
||||
The contents of the /proc/{process_id}/cgroup file are similar to
|
||||
# cat /proc/1218/cgroup
|
||||
10:memory:/system.slice/walinuxagent.service
|
||||
3:cpu,cpuacct:/system.slice/walinuxagent.service
|
||||
etc
|
||||
|
||||
def get_process_cgroup_relative_paths(self, process_id):
|
||||
# The contents of the file are similar to
|
||||
# # cat /proc/1218/cgroup
|
||||
# 10:memory:/system.slice/walinuxagent.service
|
||||
# 3:cpu,cpuacct:/system.slice/walinuxagent.service
|
||||
# etc
|
||||
cpu_path = None
|
||||
memory_path = None
|
||||
:param process_id: A numeric PID to return the relative paths of, or the string "self" to return the relative paths of the current process.
|
||||
"""
|
||||
conroller_relative_paths = {}
|
||||
for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines():
|
||||
match = re.match(r'\d+:(?P<controller>(memory|.*cpuacct.*)):(?P<path>.+)', line)
|
||||
match = re.match(r'\d+:(?P<controller>.+):(?P<path>.+)', line)
|
||||
if match is not None:
|
||||
controller = match.group('controller')
|
||||
path = match.group('path').lstrip('/') if match.group('path') != '/' else None
|
||||
if controller == 'memory':
|
||||
memory_path = path
|
||||
else:
|
||||
cpu_path = path
|
||||
if path is not None and controller in CgroupV1.get_supported_controllers():
|
||||
conroller_relative_paths[controller] = path
|
||||
|
||||
return cpu_path, memory_path
|
||||
return conroller_relative_paths
|
||||
|
||||
def get_unit_cgroup(self, unit_name, cgroup_name):
|
||||
unit_cgroup_relative_path = systemd.get_unit_property(unit_name, "ControlGroup")
|
||||
unit_controller_paths = {}
|
||||
|
||||
for controller, mountpoint in self._cgroup_mountpoints.items():
|
||||
unit_controller_paths[controller] = os.path.join(mountpoint, unit_cgroup_relative_path[1:])
|
||||
|
||||
return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints,
|
||||
controller_paths=unit_controller_paths)
|
||||
|
||||
def get_cgroup_from_relative_path(self, relative_path, cgroup_name):
|
||||
controller_paths = {}
|
||||
for controller, mountpoint in self._cgroup_mountpoints.items():
|
||||
controller_paths[controller] = os.path.join(mountpoint, relative_path)
|
||||
|
||||
return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints,
|
||||
controller_paths=controller_paths)
|
||||
|
||||
def get_process_cgroup(self, process_id, cgroup_name):
|
||||
relative_controller_paths = self._get_process_relative_controller_paths(process_id)
|
||||
process_controller_paths = {}
|
||||
|
||||
for controller, mountpoint in self._cgroup_mountpoints.items():
|
||||
relative_controller_path = relative_controller_paths.get(controller)
|
||||
if relative_controller_path is not None:
|
||||
process_controller_paths[controller] = os.path.join(mountpoint, relative_controller_path)
|
||||
|
||||
return CgroupV1(cgroup_name=cgroup_name, controller_mountpoints=self._cgroup_mountpoints,
|
||||
controller_paths=process_controller_paths)
|
||||
|
||||
def log_root_paths(self):
|
||||
for controller in CgroupV1.get_supported_controllers():
|
||||
mount_point = self._cgroup_mountpoints.get(controller)
|
||||
if mount_point is None:
|
||||
log_cgroup_info("The {0} controller is not mounted".format(controller), send_event=False)
|
||||
else:
|
||||
log_cgroup_info("The {0} controller is mounted at {1}".format(controller, mount_point), send_event=False)
|
||||
|
||||
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
|
||||
error_code=ExtensionErrorCodes.PluginUnknownFailure):
|
||||
|
@ -385,25 +402,14 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
|
|||
|
||||
log_cgroup_info("Started extension in unit '{0}'".format(scope_name), send_event=False)
|
||||
|
||||
cpu_cgroup = None
|
||||
cpu_metrics = None
|
||||
try:
|
||||
cgroup_relative_path = os.path.join('azure.slice/azure-vmextensions.slice', extension_slice_name)
|
||||
|
||||
cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_controller_root_paths()
|
||||
|
||||
if cpu_cgroup_mountpoint is None:
|
||||
log_cgroup_info("The CPU controller is not mounted; will not track resource usage", send_event=False)
|
||||
else:
|
||||
cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path)
|
||||
cpu_cgroup = CpuCgroup(extension_name, cpu_cgroup_path)
|
||||
CGroupsTelemetry.track_cgroup(cpu_cgroup)
|
||||
|
||||
if memory_cgroup_mountpoint is None:
|
||||
log_cgroup_info("The Memory controller is not mounted; will not track resource usage", send_event=False)
|
||||
else:
|
||||
memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path)
|
||||
memory_cgroup = MemoryCgroup(extension_name, memory_cgroup_path)
|
||||
CGroupsTelemetry.track_cgroup(memory_cgroup)
|
||||
cgroup = self.get_cgroup_from_relative_path(cgroup_relative_path, extension_name)
|
||||
for metrics in cgroup.get_controller_metrics():
|
||||
if isinstance(metrics, CpuMetrics):
|
||||
cpu_metrics = metrics
|
||||
CGroupsTelemetry.track_cgroup(metrics)
|
||||
|
||||
except IOError as e:
|
||||
if e.errno == 2: # 'No such file or directory'
|
||||
|
@ -415,7 +421,7 @@ class SystemdCgroupApiv1(_SystemdCgroupApi):
|
|||
# Wait for process completion or timeout
|
||||
try:
|
||||
return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
|
||||
stderr=stderr, error_code=error_code, cpu_cgroup=cpu_cgroup)
|
||||
stderr=stderr, error_code=error_code, cpu_metrics=cpu_metrics)
|
||||
except ExtensionError as e:
|
||||
# The extension didn't terminate successfully. Determine whether it was due to systemd errors or
|
||||
# extension errors.
|
||||
|
@ -448,7 +454,7 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
|
|||
def __init__(self):
|
||||
super(SystemdCgroupApiv2, self).__init__()
|
||||
self._root_cgroup_path = self._get_root_cgroup_path()
|
||||
self._controllers_enabled_at_root = self._get_controllers_enabled_at_root(self._root_cgroup_path) if self._root_cgroup_path is not None else []
|
||||
self._controllers_enabled_at_root = self._get_controllers_enabled_at_root(self._root_cgroup_path) if self._root_cgroup_path != "" else []
|
||||
|
||||
@staticmethod
|
||||
def _get_root_cgroup_path():
|
||||
|
@ -459,7 +465,7 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
|
|||
$ findmnt -t cgroup2 --noheadings
|
||||
/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot
|
||||
|
||||
Returns None if the root cgroup cannot be determined from the output above.
|
||||
Returns empty string if the root cgroup cannot be determined from the output above.
|
||||
"""
|
||||
#
|
||||
for line in shellutil.run_command(['findmnt', '-t', 'cgroup2', '--noheadings']).splitlines():
|
||||
|
@ -470,7 +476,13 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
|
|||
root_cgroup_path = match.group('path')
|
||||
if root_cgroup_path is not None:
|
||||
return root_cgroup_path
|
||||
return None
|
||||
return ""
|
||||
|
||||
def get_root_cgroup_path(self):
|
||||
"""
|
||||
Returns the unified cgroup mountpoint.
|
||||
"""
|
||||
return self._root_cgroup_path
|
||||
|
||||
@staticmethod
|
||||
def _get_controllers_enabled_at_root(root_cgroup_path):
|
||||
|
@ -478,47 +490,229 @@ class SystemdCgroupApiv2(_SystemdCgroupApi):
|
|||
Returns a list of the controllers enabled at the root cgroup. The cgroup.subtree_control file at the root shows
|
||||
a space separated list of the controllers which are enabled to control resource distribution from the root
|
||||
cgroup to its children. If a controller is listed here, then that controller is available to enable in children
|
||||
cgroups.
|
||||
cgroups. Returns only the enabled controllers which are supported by the agent.
|
||||
|
||||
$ cat /sys/fs/cgroup/cgroup.subtree_control
|
||||
cpuset cpu io memory hugetlb pids rdma misc
|
||||
"""
|
||||
controllers_enabled_at_root = []
|
||||
enabled_controllers_file = os.path.join(root_cgroup_path, 'cgroup.subtree_control')
|
||||
if os.path.exists(enabled_controllers_file):
|
||||
controllers_enabled_at_root = fileutil.read_file(enabled_controllers_file).rstrip().split()
|
||||
return controllers_enabled_at_root
|
||||
return list(set(controllers_enabled_at_root) & set(CgroupV2.get_supported_controllers()))
|
||||
return []
|
||||
|
||||
def get_controller_root_paths(self):
|
||||
# Return a tuple representing the root cgroups for cpu and memory. Either should be None if the corresponding
|
||||
# controller is not enabled at the root. This check is necessary because all non-root "cgroup.subtree_control"
|
||||
# files can only contain controllers which are enabled in the parent's "cgroup.subtree_control" file.
|
||||
@staticmethod
|
||||
def _get_process_relative_cgroup_path(process_id):
|
||||
"""
|
||||
Returns the relative path of the cgroup for the given process.
|
||||
The contents of the /proc/{process_id}/cgroup file are similar to
|
||||
# cat /proc/1218/cgroup
|
||||
0::/azure.slice/walinuxagent.service
|
||||
|
||||
root_cpu_path = None
|
||||
root_memory_path = None
|
||||
if self._root_cgroup_path is not None:
|
||||
if 'cpu' in self._controllers_enabled_at_root:
|
||||
root_cpu_path = self._root_cgroup_path
|
||||
if 'memory' in self._controllers_enabled_at_root:
|
||||
root_memory_path = self._root_cgroup_path
|
||||
|
||||
return root_cpu_path, root_memory_path
|
||||
|
||||
def get_process_cgroup_relative_paths(self, process_id):
|
||||
# The contents of the file are similar to
|
||||
# # cat /proc/1218/cgroup
|
||||
# 0::/azure.slice/walinuxagent.service
|
||||
cpu_path = None
|
||||
memory_path = None
|
||||
:param process_id: A numeric PID to return the relative path of, or the string "self" to return the relative path of the current process.
|
||||
"""
|
||||
relative_path = ""
|
||||
for line in fileutil.read_file("/proc/{0}/cgroup".format(process_id)).splitlines():
|
||||
match = re.match(r'0::(?P<path>\S+)', line)
|
||||
if match is not None:
|
||||
path = match.group('path').lstrip('/') if match.group('path') != '/' else None
|
||||
memory_path = path
|
||||
cpu_path = path
|
||||
relative_path = match.group('path').lstrip('/') if match.group('path') != '/' else ""
|
||||
|
||||
return cpu_path, memory_path
|
||||
return relative_path
|
||||
|
||||
def get_unit_cgroup(self, unit_name, cgroup_name):
|
||||
unit_cgroup_relative_path = systemd.get_unit_property(unit_name, "ControlGroup")
|
||||
unit_cgroup_path = ""
|
||||
|
||||
if self._root_cgroup_path != "":
|
||||
unit_cgroup_path = os.path.join(self._root_cgroup_path, unit_cgroup_relative_path[1:])
|
||||
|
||||
return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=unit_cgroup_path, enabled_controllers=self._controllers_enabled_at_root)
|
||||
|
||||
def get_cgroup_from_relative_path(self, relative_path, cgroup_name):
|
||||
cgroup_path = ""
|
||||
if self._root_cgroup_path != "":
|
||||
cgroup_path = os.path.join(self._root_cgroup_path, relative_path)
|
||||
|
||||
return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=cgroup_path, enabled_controllers=self._controllers_enabled_at_root)
|
||||
|
||||
def get_process_cgroup(self, process_id, cgroup_name):
|
||||
relative_path = self._get_process_relative_cgroup_path(process_id)
|
||||
cgroup_path = ""
|
||||
|
||||
if self._root_cgroup_path != "":
|
||||
cgroup_path = os.path.join(self._root_cgroup_path, relative_path)
|
||||
|
||||
return CgroupV2(cgroup_name=cgroup_name, root_cgroup_path=self._root_cgroup_path, cgroup_path=cgroup_path, enabled_controllers=self._controllers_enabled_at_root)
|
||||
|
||||
def log_root_paths(self):
|
||||
log_cgroup_info("The root cgroup path is {0}".format(self._root_cgroup_path), send_event=False)
|
||||
for controller in CgroupV2.get_supported_controllers():
|
||||
if controller in self._controllers_enabled_at_root:
|
||||
log_cgroup_info("The {0} controller is enabled at the root cgroup".format(controller), send_event=False)
|
||||
else:
|
||||
log_cgroup_info("The {0} controller is not enabled at the root cgroup".format(controller), send_event=False)
|
||||
|
||||
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr,
|
||||
error_code=ExtensionErrorCodes.PluginUnknownFailure):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class Cgroup(object):
|
||||
MEMORY_CONTROLLER = "memory"
|
||||
|
||||
def __init__(self, cgroup_name):
|
||||
self._cgroup_name = cgroup_name
|
||||
|
||||
@staticmethod
|
||||
def get_supported_controllers():
|
||||
"""
|
||||
Cgroup version specific. Returns a list of the controllers which the agent supports.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def check_in_expected_slice(self, expected_slice):
|
||||
"""
|
||||
Cgroup version specific. Returns True if the cgroup is in the expected slice, False otherwise.
|
||||
|
||||
:param expected_slice: The slice the cgroup is expected to be in.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_controller_metrics(self, expected_relative_path=None):
|
||||
"""
|
||||
Cgroup version specific. Returns a list of the metrics for the agent supported controllers which are
|
||||
mounted/enabled for the cgroup.
|
||||
|
||||
:param expected_relative_path: The expected relative path of the cgroup. If provided, only metrics for controllers at this expected path will be returned.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_processes(self):
|
||||
"""
|
||||
Cgroup version specific. Returns a list of all the process ids in the cgroup.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class CgroupV1(Cgroup):
|
||||
CPU_CONTROLLER = "cpu,cpuacct"
|
||||
|
||||
def __init__(self, cgroup_name, controller_mountpoints, controller_paths):
|
||||
"""
|
||||
:param cgroup_name: The name of the cgroup. Used for logging/tracking purposes.
|
||||
:param controller_mountpoints: A dictionary of controller-mountpoint mappings for each agent supported controller which is mounted.
|
||||
:param controller_paths: A dictionary of controller-path mappings for each agent supported controller which is mounted. The path represents the absolute path of the controller.
|
||||
"""
|
||||
super(CgroupV1, self).__init__(cgroup_name=cgroup_name)
|
||||
self._controller_mountpoints = controller_mountpoints
|
||||
self._controller_paths = controller_paths
|
||||
|
||||
@staticmethod
|
||||
def get_supported_controllers():
|
||||
return [CgroupV1.CPU_CONTROLLER, CgroupV1.MEMORY_CONTROLLER]
|
||||
|
||||
def check_in_expected_slice(self, expected_slice):
|
||||
in_expected_slice = True
|
||||
for controller, path in self._controller_paths.items():
|
||||
if expected_slice not in path:
|
||||
log_cgroup_warning("The {0} controller for the {1} cgroup is not mounted in the expected slice. Expected slice: {2}. Actual controller path: {3}".format(controller, self._cgroup_name, expected_slice, path), send_event=False)
|
||||
in_expected_slice = False
|
||||
|
||||
return in_expected_slice
|
||||
|
||||
def get_controller_metrics(self, expected_relative_path=None):
|
||||
metrics = []
|
||||
|
||||
for controller in self.get_supported_controllers():
|
||||
controller_metrics = None
|
||||
controller_path = self._controller_paths.get(controller)
|
||||
controller_mountpoint = self._controller_mountpoints.get(controller)
|
||||
|
||||
if controller_mountpoint is None:
|
||||
log_cgroup_warning("{0} controller is not mounted; will not track metrics".format(controller), send_event=False)
|
||||
continue
|
||||
|
||||
if controller_path is None:
|
||||
log_cgroup_warning("{0} is not mounted for the {1} cgroup; will not track metrics".format(controller, self._cgroup_name), send_event=False)
|
||||
continue
|
||||
|
||||
if expected_relative_path is not None:
|
||||
expected_path = os.path.join(controller_mountpoint, expected_relative_path)
|
||||
if controller_path != expected_path:
|
||||
log_cgroup_warning("The {0} controller is not mounted at the expected path for the {1} cgroup; will not track metrics. Actual cgroup path:[{2}] Expected:[{3}]".format(controller, self._cgroup_name, controller_path, expected_path), send_event=False)
|
||||
continue
|
||||
|
||||
if controller == self.CPU_CONTROLLER:
|
||||
controller_metrics = CpuMetrics(self._cgroup_name, controller_path)
|
||||
elif controller == self.MEMORY_CONTROLLER:
|
||||
controller_metrics = MemoryMetrics(self._cgroup_name, controller_path)
|
||||
|
||||
if controller_metrics is not None:
|
||||
msg = "{0} metrics for cgroup: {1}".format(controller, controller_metrics)
|
||||
log_cgroup_info(msg, send_event=False)
|
||||
metrics.append(controller_metrics)
|
||||
|
||||
return metrics
|
||||
|
||||
def get_controller_procs_path(self, controller):
|
||||
controller_path = self._controller_paths.get(controller)
|
||||
if controller_path is not None and controller_path != "":
|
||||
return os.path.join(controller_path, "cgroup.procs")
|
||||
return ""
|
||||
|
||||
def get_processes(self):
|
||||
pids = set()
|
||||
for controller in self._controller_paths.keys():
|
||||
procs_path = self.get_controller_procs_path(controller)
|
||||
if os.path.exists(procs_path):
|
||||
with open(procs_path, "r") as cgroup_procs:
|
||||
for pid in cgroup_procs.read().split():
|
||||
pids.add(int(pid))
|
||||
return list(pids)
|
||||
|
||||
|
||||
class CgroupV2(Cgroup):
|
||||
CPU_CONTROLLER = "cpu"
|
||||
|
||||
def __init__(self, cgroup_name, root_cgroup_path, cgroup_path, enabled_controllers):
|
||||
"""
|
||||
:param cgroup_name: The name of the cgroup. Used for logging/tracking purposes.
|
||||
:param root_cgroup_path: A string representing the root cgroup path. String can be empty.
|
||||
:param cgroup_path: A string representing the absolute cgroup path. String can be empty.
|
||||
:param enabled_controllers: A list of strings representing the agent supported controllers enabled at the root cgroup.
|
||||
"""
|
||||
super(CgroupV2, self).__init__(cgroup_name)
|
||||
self._root_cgroup_path = root_cgroup_path
|
||||
self._cgroup_path = cgroup_path
|
||||
self._enabled_controllers = enabled_controllers
|
||||
|
||||
@staticmethod
|
||||
def get_supported_controllers():
|
||||
return [CgroupV2.CPU_CONTROLLER, CgroupV2.MEMORY_CONTROLLER]
|
||||
|
||||
def check_in_expected_slice(self, expected_slice):
|
||||
if expected_slice not in self._cgroup_path:
|
||||
log_cgroup_warning("The {0} cgroup is not in the expected slice. Expected slice: {1}. Actual cgroup path: {2}".format(self._cgroup_name, expected_slice, self._cgroup_path), send_event=False)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_controller_metrics(self, expected_relative_path=None):
|
||||
# TODO - Implement controller metrics for cgroup v2
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_procs_path(self):
|
||||
if self._cgroup_path != "":
|
||||
return os.path.join(self._cgroup_path, "cgroup.procs")
|
||||
return ""
|
||||
|
||||
def get_processes(self):
|
||||
pids = set()
|
||||
procs_path = self.get_procs_path()
|
||||
if os.path.exists(procs_path):
|
||||
with open(procs_path, "r") as cgroup_procs:
|
||||
for pid in cgroup_procs.read().split():
|
||||
pids.add(int(pid))
|
||||
return list(pids)
|
||||
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ import threading
|
|||
|
||||
from azurelinuxagent.common import conf
|
||||
from azurelinuxagent.common import logger
|
||||
from azurelinuxagent.ga.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup
|
||||
from azurelinuxagent.ga.controllermetrics import CpuMetrics, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryMetrics
|
||||
from azurelinuxagent.ga.cgroupapi import SystemdRunError, EXTENSION_SLICE_PREFIX, CGroupUtil, SystemdCgroupApiv2, \
|
||||
log_cgroup_info, log_cgroup_warning, get_cgroup_api, InvalidCgroupMountpointException
|
||||
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
|
||||
|
@ -130,9 +130,8 @@ class CGroupConfigurator(object):
|
|||
self._agent_cgroups_enabled = False
|
||||
self._extensions_cgroups_enabled = False
|
||||
self._cgroups_api = None
|
||||
self._agent_cpu_cgroup_path = None
|
||||
self._agent_memory_cgroup_path = None
|
||||
self._agent_memory_cgroup = None
|
||||
self._agent_cgroup = None
|
||||
self._agent_memory_metrics = None
|
||||
self._check_cgroups_lock = threading.RLock() # Protect the check_cgroups which is called from Monitor thread and main loop.
|
||||
|
||||
def initialize(self):
|
||||
|
@ -189,28 +188,30 @@ class CGroupConfigurator(object):
|
|||
|
||||
self.__setup_azure_slice()
|
||||
|
||||
cpu_controller_root, memory_controller_root = self.__get_cgroup_controller_roots()
|
||||
self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroup_paths(agent_slice,
|
||||
cpu_controller_root,
|
||||
memory_controller_root)
|
||||
# Log mount points/root paths for cgroup controllers
|
||||
self._cgroups_api.log_root_paths()
|
||||
|
||||
# Get agent cgroup
|
||||
self._agent_cgroup = self._cgroups_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_NAME_TELEMETRY)
|
||||
|
||||
if conf.get_cgroup_disable_on_process_check_failure() and self._check_fails_if_processes_found_in_agent_cgroup_before_enable(agent_slice):
|
||||
reason = "Found unexpected processes in the agent cgroup before agent enable cgroups."
|
||||
self.disable(reason, DisableCgroups.ALL)
|
||||
return
|
||||
|
||||
if self._agent_cpu_cgroup_path is not None or self._agent_memory_cgroup_path is not None:
|
||||
# Get metrics to track
|
||||
metrics = self._agent_cgroup.get_controller_metrics(expected_relative_path=os.path.join(agent_slice, systemd.get_agent_unit_name()))
|
||||
if len(metrics) > 0:
|
||||
self.enable()
|
||||
|
||||
if self._agent_cpu_cgroup_path is not None:
|
||||
log_cgroup_info("Agent CPU cgroup: {0}".format(self._agent_cpu_cgroup_path))
|
||||
self.__set_cpu_quota(conf.get_agent_cpu_quota())
|
||||
CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))
|
||||
|
||||
if self._agent_memory_cgroup_path is not None:
|
||||
log_cgroup_info("Agent Memory cgroup: {0}".format(self._agent_memory_cgroup_path))
|
||||
self._agent_memory_cgroup = MemoryCgroup(AGENT_NAME_TELEMETRY, self._agent_memory_cgroup_path)
|
||||
CGroupsTelemetry.track_cgroup(self._agent_memory_cgroup)
|
||||
for metric in metrics:
|
||||
for prop in metric.get_unit_properties():
|
||||
log_cgroup_info('{0}: {1}'.format(prop, systemd.get_unit_property(systemd.get_agent_unit_name(), prop)))
|
||||
if isinstance(metric, CpuMetrics):
|
||||
self.__set_cpu_quota(conf.get_agent_cpu_quota())
|
||||
elif isinstance(metric, MemoryMetrics):
|
||||
self._agent_memory_metrics = metric
|
||||
CGroupsTelemetry.track_cgroup(metric)
|
||||
|
||||
except Exception as exception:
|
||||
log_cgroup_warning("Error initializing cgroups: {0}".format(ustr(exception)))
|
||||
|
@ -229,21 +230,6 @@ class CGroupConfigurator(object):
|
|||
return False
|
||||
return True
|
||||
|
||||
def __get_cgroup_controller_roots(self):
|
||||
cpu_controller_root, memory_controller_root = self._cgroups_api.get_controller_root_paths()
|
||||
|
||||
if cpu_controller_root is not None:
|
||||
log_cgroup_info("The CPU cgroup controller root path is {0}".format(cpu_controller_root), send_event=False)
|
||||
else:
|
||||
log_cgroup_warning("The CPU cgroup controller is not mounted or enabled")
|
||||
|
||||
if memory_controller_root is not None:
|
||||
log_cgroup_info("The memory cgroup controller root path is {0}".format(memory_controller_root), send_event=False)
|
||||
else:
|
||||
log_cgroup_warning("The memory cgroup controller is not mounted or enabled")
|
||||
|
||||
return cpu_controller_root, memory_controller_root
|
||||
|
||||
@staticmethod
|
||||
def __setup_azure_slice():
|
||||
"""
|
||||
|
@ -416,47 +402,6 @@ class CGroupConfigurator(object):
|
|||
return True
|
||||
return False
|
||||
|
||||
def __get_agent_cgroup_paths(self, agent_slice, cpu_controller_root, memory_controller_root):
|
||||
agent_unit_name = systemd.get_agent_unit_name()
|
||||
|
||||
expected_relative_path = os.path.join(agent_slice, agent_unit_name)
|
||||
cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths(
|
||||
"self")
|
||||
|
||||
if cpu_cgroup_relative_path is None:
|
||||
log_cgroup_warning("The agent's process is not within a CPU cgroup")
|
||||
else:
|
||||
if cpu_cgroup_relative_path == expected_relative_path:
|
||||
log_cgroup_info('CPUAccounting: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUAccounting")))
|
||||
log_cgroup_info('CPUQuota: {0}'.format(systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec")))
|
||||
else:
|
||||
log_cgroup_warning(
|
||||
"The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]".format(cpu_cgroup_relative_path, expected_relative_path))
|
||||
cpu_cgroup_relative_path = None # Set the path to None to prevent monitoring
|
||||
|
||||
if memory_cgroup_relative_path is None:
|
||||
log_cgroup_warning("The agent's process is not within a memory cgroup")
|
||||
else:
|
||||
if memory_cgroup_relative_path == expected_relative_path:
|
||||
memory_accounting = systemd.get_unit_property(agent_unit_name, "MemoryAccounting")
|
||||
log_cgroup_info('MemoryAccounting: {0}'.format(memory_accounting))
|
||||
else:
|
||||
log_cgroup_warning(
|
||||
"The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]".format(memory_cgroup_relative_path, expected_relative_path))
|
||||
memory_cgroup_relative_path = None # Set the path to None to prevent monitoring
|
||||
|
||||
if cpu_controller_root is not None and cpu_cgroup_relative_path is not None:
|
||||
agent_cpu_cgroup_path = os.path.join(cpu_controller_root, cpu_cgroup_relative_path)
|
||||
else:
|
||||
agent_cpu_cgroup_path = None
|
||||
|
||||
if memory_controller_root is not None and memory_cgroup_relative_path is not None:
|
||||
agent_memory_cgroup_path = os.path.join(memory_controller_root, memory_cgroup_relative_path)
|
||||
else:
|
||||
agent_memory_cgroup_path = None
|
||||
|
||||
return agent_cpu_cgroup_path, agent_memory_cgroup_path
|
||||
|
||||
def supported(self):
|
||||
return self._cgroups_supported
|
||||
|
||||
|
@ -496,7 +441,11 @@ class CGroupConfigurator(object):
|
|||
elif disable_cgroups == DisableCgroups.AGENT: # disable agent
|
||||
self._agent_cgroups_enabled = False
|
||||
self.__reset_agent_cpu_quota()
|
||||
CGroupsTelemetry.stop_tracking(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))
|
||||
agent_metrics = self._agent_cgroup.get_controller_metrics()
|
||||
for metric in agent_metrics:
|
||||
if isinstance(metric, CpuMetrics):
|
||||
CGroupsTelemetry.stop_tracking(metric)
|
||||
break
|
||||
|
||||
log_cgroup_warning("Disabling resource usage monitoring. Reason: {0}".format(reason), op=WALAEventOperation.CGroupsDisabled)
|
||||
|
||||
|
@ -612,11 +561,7 @@ class CGroupConfigurator(object):
|
|||
"""
|
||||
unexpected = []
|
||||
agent_cgroup_proc_names = []
|
||||
# Now we call _check_processes_in_agent_cgroup before we enable the cgroups or any one of the controller is not mounted, agent cgroup paths can be None.
|
||||
# so we need to check both.
|
||||
cgroup_path = self._agent_cpu_cgroup_path if self._agent_cpu_cgroup_path is not None else self._agent_memory_cgroup_path
|
||||
if cgroup_path is None:
|
||||
return
|
||||
|
||||
try:
|
||||
daemon = os.getppid()
|
||||
extension_handler = os.getpid()
|
||||
|
@ -624,12 +569,12 @@ class CGroupConfigurator(object):
|
|||
agent_commands.update(shellutil.get_running_commands())
|
||||
systemd_run_commands = set()
|
||||
systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
|
||||
agent_cgroup = self._cgroups_api.get_processes_in_cgroup(cgroup_path)
|
||||
agent_cgroup_proccesses = self._agent_cgroup.get_processes()
|
||||
# get the running commands again in case new commands started or completed while we were fetching the processes in the cgroup;
|
||||
agent_commands.update(shellutil.get_running_commands())
|
||||
systemd_run_commands.update(self._cgroups_api.get_systemd_run_commands())
|
||||
|
||||
for process in agent_cgroup:
|
||||
for process in agent_cgroup_proccesses:
|
||||
agent_cgroup_proc_names.append(self.__format_process(process))
|
||||
# Note that the agent uses systemd-run to start extensions; systemd-run belongs to the agent cgroup, though the extensions don't.
|
||||
if process in (daemon, extension_handler) or process in systemd_run_commands:
|
||||
|
@ -753,8 +698,8 @@ class CGroupConfigurator(object):
|
|||
raise CGroupsException("The agent has been throttled for {0} seconds".format(metric.value))
|
||||
|
||||
def check_agent_memory_usage(self):
|
||||
if self.enabled() and self._agent_memory_cgroup:
|
||||
metrics = self._agent_memory_cgroup.get_tracked_metrics()
|
||||
if self.enabled() and self._agent_memory_metrics is not None:
|
||||
metrics = self._agent_memory_metrics.get_tracked_metrics()
|
||||
current_usage = 0
|
||||
for metric in metrics:
|
||||
if metric.counter == MetricsCounter.TOTAL_MEM_USAGE:
|
||||
|
@ -780,59 +725,37 @@ class CGroupConfigurator(object):
|
|||
return 0
|
||||
|
||||
def start_tracking_unit_cgroups(self, unit_name):
|
||||
"""
|
||||
TODO: Start tracking Memory Cgroups
|
||||
"""
|
||||
try:
|
||||
cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name)
|
||||
cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name)
|
||||
metrics = cgroup.get_controller_metrics()
|
||||
|
||||
if cpu_cgroup_path is None:
|
||||
log_cgroup_info("The CPU controller is not mounted or enabled; will not track resource usage", send_event=False)
|
||||
else:
|
||||
CGroupsTelemetry.track_cgroup(CpuCgroup(unit_name, cpu_cgroup_path))
|
||||
|
||||
if memory_cgroup_path is None:
|
||||
log_cgroup_info("The Memory controller is not mounted or enabled; will not track resource usage", send_event=False)
|
||||
else:
|
||||
CGroupsTelemetry.track_cgroup(MemoryCgroup(unit_name, memory_cgroup_path))
|
||||
for metric in metrics:
|
||||
CGroupsTelemetry.track_cgroup(metric)
|
||||
|
||||
except Exception as exception:
|
||||
log_cgroup_info("Failed to start tracking resource usage for the extension: {0}".format(ustr(exception)), send_event=False)
|
||||
|
||||
def stop_tracking_unit_cgroups(self, unit_name):
|
||||
"""
|
||||
TODO: remove Memory cgroups from tracked list.
|
||||
"""
|
||||
try:
|
||||
cpu_cgroup_path, memory_cgroup_path = self._cgroups_api.get_unit_cgroup_paths(unit_name)
|
||||
cgroup = self._cgroups_api.get_unit_cgroup(unit_name, unit_name)
|
||||
metrics = cgroup.get_controller_metrics()
|
||||
|
||||
if cpu_cgroup_path is not None:
|
||||
CGroupsTelemetry.stop_tracking(CpuCgroup(unit_name, cpu_cgroup_path))
|
||||
|
||||
if memory_cgroup_path is not None:
|
||||
CGroupsTelemetry.stop_tracking(MemoryCgroup(unit_name, memory_cgroup_path))
|
||||
for metric in metrics:
|
||||
CGroupsTelemetry.stop_tracking(metric)
|
||||
|
||||
except Exception as exception:
|
||||
log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False)
|
||||
|
||||
def stop_tracking_extension_cgroups(self, extension_name):
|
||||
"""
|
||||
TODO: remove extension Memory cgroups from tracked list
|
||||
"""
|
||||
try:
|
||||
extension_slice_name = CGroupUtil.get_extension_slice_name(extension_name)
|
||||
cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE,
|
||||
extension_slice_name)
|
||||
cgroup_relative_path = os.path.join(_AZURE_VMEXTENSIONS_SLICE, extension_slice_name)
|
||||
|
||||
cpu_root_path, memory_root_path = self._cgroups_api.get_controller_root_paths()
|
||||
cpu_cgroup_path = os.path.join(cpu_root_path, cgroup_relative_path)
|
||||
memory_cgroup_path = os.path.join(memory_root_path, cgroup_relative_path)
|
||||
|
||||
if cpu_cgroup_path is not None:
|
||||
CGroupsTelemetry.stop_tracking(CpuCgroup(extension_name, cpu_cgroup_path))
|
||||
|
||||
if memory_cgroup_path is not None:
|
||||
CGroupsTelemetry.stop_tracking(MemoryCgroup(extension_name, memory_cgroup_path))
|
||||
cgroup = self._cgroups_api.get_cgroup_from_relative_path(relative_path=cgroup_relative_path,
|
||||
cgroup_name=extension_name)
|
||||
metrics = cgroup.get_controller_metrics()
|
||||
for metric in metrics:
|
||||
CGroupsTelemetry.stop_tracking(metric)
|
||||
|
||||
except Exception as exception:
|
||||
log_cgroup_info("Failed to stop tracking resource usage for the extension service: {0}".format(ustr(exception)), send_event=False)
|
||||
|
|
|
@ -17,7 +17,7 @@ import errno
|
|||
import threading
|
||||
|
||||
from azurelinuxagent.common import logger
|
||||
from azurelinuxagent.ga.cgroup import CpuCgroup
|
||||
from azurelinuxagent.ga.controllermetrics import CpuMetrics
|
||||
from azurelinuxagent.common.future import ustr
|
||||
|
||||
|
||||
|
@ -41,7 +41,7 @@ class CGroupsTelemetry(object):
|
|||
"""
|
||||
Adds the given item to the dictionary of tracked cgroups
|
||||
"""
|
||||
if isinstance(cgroup, CpuCgroup):
|
||||
if isinstance(cgroup, CpuMetrics):
|
||||
# set the current cpu usage
|
||||
cgroup.initialize_cpu_usage()
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ from azurelinuxagent.ga import logcollector, cgroupconfigurator
|
|||
|
||||
import azurelinuxagent.common.conf as conf
|
||||
from azurelinuxagent.common import logger
|
||||
from azurelinuxagent.ga.cgroup import MetricsCounter
|
||||
from azurelinuxagent.ga.controllermetrics import MetricsCounter
|
||||
from azurelinuxagent.common.event import elapsed_milliseconds, add_event, WALAEventOperation, report_metric
|
||||
from azurelinuxagent.common.future import ustr
|
||||
from azurelinuxagent.ga.interfaces import ThreadHandlerInterface
|
||||
|
|
|
@ -88,7 +88,7 @@ class MetricsCounter(object):
|
|||
re_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n')
|
||||
|
||||
|
||||
class CGroup(object):
|
||||
class ControllerMetrics(object):
|
||||
def __init__(self, name, cgroup_path):
|
||||
"""
|
||||
Initialize _data collection for the Memory controller
|
||||
|
@ -169,10 +169,16 @@ class CGroup(object):
|
|||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_unit_properties(self):
|
||||
"""
|
||||
Returns a list of the unit properties to collect for the controller.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
class CpuCgroup(CGroup):
|
||||
|
||||
class CpuMetrics(ControllerMetrics):
|
||||
def __init__(self, name, cgroup_path):
|
||||
super(CpuCgroup, self).__init__(name, cgroup_path)
|
||||
super(CpuMetrics, self).__init__(name, cgroup_path)
|
||||
|
||||
self._osutil = get_osutil()
|
||||
self._previous_cgroup_cpu = None
|
||||
|
@ -306,10 +312,13 @@ class CpuCgroup(CGroup):
|
|||
|
||||
return tracked
|
||||
|
||||
def get_unit_properties(self):
|
||||
return ["CPUAccounting", "CPUQuotaPerSecUSec"]
|
||||
|
||||
class MemoryCgroup(CGroup):
|
||||
|
||||
class MemoryMetrics(ControllerMetrics):
|
||||
def __init__(self, name, cgroup_path):
|
||||
super(MemoryCgroup, self).__init__(name, cgroup_path)
|
||||
super(MemoryMetrics, self).__init__(name, cgroup_path)
|
||||
|
||||
self._counter_not_found_error_count = 0
|
||||
|
||||
|
@ -390,3 +399,6 @@ class MemoryCgroup(CGroup):
|
|||
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, self.name,
|
||||
self.try_swap_memory_usage(), _REPORT_EVERY_HOUR)
|
||||
]
|
||||
|
||||
def get_unit_properties(self):
|
||||
return["MemoryAccounting"]
|
|
@ -31,7 +31,7 @@ from azurelinuxagent.common.future import ustr
|
|||
TELEMETRY_MESSAGE_MAX_LEN = 3200
|
||||
|
||||
|
||||
def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup):
|
||||
def wait_for_process_completion_or_timeout(process, timeout, cpu_metrics):
|
||||
"""
|
||||
Utility function that waits for the process to complete within the given time frame. This function will terminate
|
||||
the process if when the given time frame elapses.
|
||||
|
@ -47,7 +47,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup):
|
|||
throttled_time = 0
|
||||
|
||||
if timeout == 0:
|
||||
throttled_time = get_cpu_throttled_time(cpu_cgroup)
|
||||
throttled_time = get_cpu_throttled_time(cpu_metrics)
|
||||
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
|
||||
else:
|
||||
# process completed or forked; sleep 1 sec to give the child process (if any) a chance to start
|
||||
|
@ -57,7 +57,7 @@ def wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup):
|
|||
return timeout == 0, return_code, throttled_time
|
||||
|
||||
|
||||
def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_cgroup=None):
|
||||
def handle_process_completion(process, command, timeout, stdout, stderr, error_code, cpu_metrics=None):
|
||||
"""
|
||||
Utility function that waits for process completion and retrieves its output (stdout and stderr) if it completed
|
||||
before the timeout period. Otherwise, the process will get killed and an ExtensionError will be raised.
|
||||
|
@ -68,15 +68,15 @@ def handle_process_completion(process, command, timeout, stdout, stderr, error_c
|
|||
:param stdout: Must be a file since we seek on it when parsing the subprocess output
|
||||
:param stderr: Must be a file since we seek on it when parsing the subprocess outputs
|
||||
:param error_code: The error code to set if we raise an ExtensionError
|
||||
:param cpu_cgroup: Reference the cpu cgroup name and path
|
||||
:param cpu_metrics: References the cpu metrics for the cgroup
|
||||
:return:
|
||||
"""
|
||||
# Wait for process completion or timeout
|
||||
timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_cgroup)
|
||||
timed_out, return_code, throttled_time = wait_for_process_completion_or_timeout(process, timeout, cpu_metrics)
|
||||
process_output = read_output(stdout, stderr)
|
||||
|
||||
if timed_out:
|
||||
if cpu_cgroup is not None: # Report CPUThrottledTime when timeout happens
|
||||
if cpu_metrics is not None: # Report CPUThrottledTime when timeout happens
|
||||
raise ExtensionError("Timeout({0});CPUThrottledTime({1}secs): {2}\n{3}".format(timeout, throttled_time, command, process_output),
|
||||
code=ExtensionErrorCodes.PluginHandlerScriptTimedout)
|
||||
|
||||
|
@ -211,14 +211,14 @@ def format_stdout_stderr(stdout, stderr):
|
|||
return to_s(stdout, -1*max_len_each, stderr, -1*max_len_each)
|
||||
|
||||
|
||||
def get_cpu_throttled_time(cpu_cgroup):
|
||||
def get_cpu_throttled_time(cpu_metrics):
|
||||
"""
|
||||
return the throttled time for the given cgroup.
|
||||
"""
|
||||
throttled_time = 0
|
||||
if cpu_cgroup is not None:
|
||||
if cpu_metrics is not None:
|
||||
try:
|
||||
throttled_time = cpu_cgroup.get_cpu_throttled_time(read_previous_throttled_time=False)
|
||||
throttled_time = cpu_metrics.get_cpu_throttled_time(read_previous_throttled_time=False)
|
||||
except Exception as e:
|
||||
logger.warn("Failed to get cpu throttled time for the extension: {0}", ustr(e))
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import threading
|
|||
import azurelinuxagent.common.conf as conf
|
||||
import azurelinuxagent.common.logger as logger
|
||||
import azurelinuxagent.common.utils.networkutil as networkutil
|
||||
from azurelinuxagent.ga.cgroup import MetricValue, MetricsCategory, MetricsCounter
|
||||
from azurelinuxagent.ga.controllermetrics import MetricValue, MetricsCategory, MetricsCounter
|
||||
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
|
||||
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
|
||||
from azurelinuxagent.common.errorstate import ErrorState
|
||||
|
|
|
@ -19,7 +19,7 @@ import shutil
|
|||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from azurelinuxagent.ga.cgroup import CpuCgroup
|
||||
from azurelinuxagent.ga.controllermetrics import CpuMetrics
|
||||
from azurelinuxagent.common.exception import ExtensionError, ExtensionErrorCodes
|
||||
from azurelinuxagent.common.future import ustr
|
||||
from azurelinuxagent.ga.extensionprocessutil import format_stdout_stderr, read_output, \
|
||||
|
@ -52,7 +52,7 @@ class TestProcessUtils(AgentTestCase):
|
|||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
|
||||
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_cgroup=None)
|
||||
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None)
|
||||
self.assertEqual(timed_out, False)
|
||||
self.assertEqual(ret, 0)
|
||||
|
||||
|
@ -70,7 +70,8 @@ class TestProcessUtils(AgentTestCase):
|
|||
# We don't actually mock the kill, just wrap it so we can assert its call count
|
||||
with patch('azurelinuxagent.ga.extensionprocessutil.os.killpg', wraps=os.killpg) as patch_kill:
|
||||
with patch('time.sleep') as mock_sleep:
|
||||
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout, cpu_cgroup=None)
|
||||
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=timeout,
|
||||
cpu_metrics=None)
|
||||
|
||||
# We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
|
||||
# we're "waiting" the correct amount of time before killing the process
|
||||
|
@ -89,7 +90,7 @@ class TestProcessUtils(AgentTestCase):
|
|||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
|
||||
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_cgroup=None)
|
||||
timed_out, ret, _ = wait_for_process_completion_or_timeout(process=process, timeout=5, cpu_metrics=None)
|
||||
self.assertEqual(timed_out, False)
|
||||
self.assertEqual(ret, 2)
|
||||
|
||||
|
@ -105,12 +106,8 @@ class TestProcessUtils(AgentTestCase):
|
|||
stderr=stderr,
|
||||
preexec_fn=os.setsid)
|
||||
|
||||
process_output = handle_process_completion(process=process,
|
||||
command=command,
|
||||
timeout=5,
|
||||
stdout=stdout,
|
||||
stderr=stderr,
|
||||
error_code=42)
|
||||
process_output = handle_process_completion(process=process, command=command, timeout=5, stdout=stdout,
|
||||
stderr=stderr, error_code=42)
|
||||
|
||||
expected_output = "[stdout]\ndummy stdout\n\n\n[stderr]\ndummy stderr\n"
|
||||
self.assertEqual(process_output, expected_output)
|
||||
|
@ -130,12 +127,8 @@ class TestProcessUtils(AgentTestCase):
|
|||
stderr=stderr,
|
||||
preexec_fn=os.setsid)
|
||||
|
||||
handle_process_completion(process=process,
|
||||
command=command,
|
||||
timeout=timeout,
|
||||
stdout=stdout,
|
||||
stderr=stderr,
|
||||
error_code=42)
|
||||
handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
|
||||
stderr=stderr, error_code=42)
|
||||
|
||||
# We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
|
||||
# we're "waiting" the correct amount of time before killing the process and raising an exception
|
||||
|
@ -158,7 +151,7 @@ class TestProcessUtils(AgentTestCase):
|
|||
test_file = os.path.join(self.tmp_dir, "cpu.stat")
|
||||
shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"),
|
||||
test_file) # throttled_time = 50
|
||||
cgroup = CpuCgroup("test", self.tmp_dir)
|
||||
cgroup = CpuMetrics("test", self.tmp_dir)
|
||||
process = subprocess.Popen(command, # pylint: disable=subprocess-popen-preexec-fn
|
||||
shell=True,
|
||||
cwd=self.tmp_dir,
|
||||
|
@ -167,13 +160,8 @@ class TestProcessUtils(AgentTestCase):
|
|||
stderr=stderr,
|
||||
preexec_fn=os.setsid)
|
||||
|
||||
handle_process_completion(process=process,
|
||||
command=command,
|
||||
timeout=timeout,
|
||||
stdout=stdout,
|
||||
stderr=stderr,
|
||||
error_code=42,
|
||||
cpu_cgroup=cgroup)
|
||||
handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout,
|
||||
stderr=stderr, error_code=42, cpu_metrics=cgroup)
|
||||
|
||||
# We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
|
||||
# we're "waiting" the correct amount of time before killing the process and raising an exception
|
||||
|
@ -200,11 +188,7 @@ class TestProcessUtils(AgentTestCase):
|
|||
stderr=stderr,
|
||||
preexec_fn=os.setsid)
|
||||
|
||||
handle_process_completion(process=process,
|
||||
command=command,
|
||||
timeout=4,
|
||||
stdout=stdout,
|
||||
stderr=stderr,
|
||||
handle_process_completion(process=process, command=command, timeout=4, stdout=stdout, stderr=stderr,
|
||||
error_code=error_code)
|
||||
|
||||
self.assertEqual(context_manager.exception.code, error_code)
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
123
|
||||
234
|
||||
345
|
|
@ -24,10 +24,11 @@ import tempfile
|
|||
|
||||
from azurelinuxagent.common.exception import CGroupsException
|
||||
from azurelinuxagent.ga.cgroupapi import SystemdCgroupApiv1, SystemdCgroupApiv2, CGroupUtil, get_cgroup_api, \
|
||||
InvalidCgroupMountpointException
|
||||
InvalidCgroupMountpointException, CgroupV1, CgroupV2
|
||||
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
|
||||
from azurelinuxagent.common.osutil import systemd
|
||||
from azurelinuxagent.common.utils import fileutil
|
||||
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics
|
||||
from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment, mock_cgroup_v2_environment, \
|
||||
mock_cgroup_hybrid_environment
|
||||
from tests.lib.mock_environment import MockCommand
|
||||
|
@ -85,7 +86,7 @@ class CGroupUtilTestCase(AgentTestCase):
|
|||
|
||||
|
||||
class SystemdCgroupsApiTestCase(AgentTestCase):
|
||||
def test_get_cgroup_api_raises_exception_when_systemd_mount_point_does_not_exist(self):
|
||||
def test_get_cgroup_api_raises_exception_when_systemd_mountpoint_does_not_exist(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
# Mock os.path.exists to return False for the os.path.exists(CGROUP_FILE_SYSTEM_ROOT) check
|
||||
with patch("os.path.exists", return_value=False):
|
||||
|
@ -151,106 +152,16 @@ class SystemdCgroupsApiTestCase(AgentTestCase):
|
|||
|
||||
|
||||
class SystemdCgroupsApiv1TestCase(AgentTestCase):
|
||||
def test_get_unit_cgroup_paths_should_return_the_cgroup_v1_mount_points(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
|
||||
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service',
|
||||
"The mount point for the CPU controller is incorrect")
|
||||
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service',
|
||||
"The mount point for the memory controller is incorrect")
|
||||
|
||||
def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)):
|
||||
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
|
||||
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service',
|
||||
"The mount point for the CPU controller is incorrect")
|
||||
self.assertIsNone(memory,
|
||||
"The mount point for the memory controller is None so unit cgroup should be None")
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')):
|
||||
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
|
||||
self.assertIsNone(cpu, "The mount point for the cpu controller is None so unit cgroup should be None")
|
||||
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/extension.service',
|
||||
"The mount point for the memory controller is incorrect")
|
||||
|
||||
def test_get_process_cgroup_paths_should_return_the_cgroup_v1_mount_points(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
|
||||
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
|
||||
"The mount point for the CPU controller is incorrect")
|
||||
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service',
|
||||
"The mount point for the memory controller is incorrect")
|
||||
|
||||
def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v1_controller_not_mounted(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=('/sys/fs/cgroup/cpu,cpuacct', None)):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
|
||||
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
|
||||
"The mount point for the CPU controller is incorrect")
|
||||
self.assertIsNone(memory,
|
||||
"The mount point for the memory controller is None so unit cgroup should be None")
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup/memory')):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
|
||||
self.assertIsNone(cpu, "The mount point for the CPU controller is None so unit cgroup should be None")
|
||||
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service',
|
||||
"The mount point for the memory controller is incorrect")
|
||||
|
||||
def test_get_process_cgroup_v1_path_should_return_None_if_either_relative_path_is_None(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=('system.slice/walinuxagent.service', None)):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
|
||||
self.assertIn(cpu, '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
|
||||
"The mount point for the CPU controller is incorrect")
|
||||
self.assertIsNone(memory,
|
||||
"The relative cgroup path for the memory controller is None so unit cgroup should be None")
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_relative_paths', return_value=(None, 'system.slice/walinuxagent.service')):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
|
||||
self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None")
|
||||
self.assertIn(memory, '/sys/fs/cgroup/memory/system.slice/walinuxagent.service',
|
||||
"The mount point for the memory controller is incorrect")
|
||||
|
||||
def test_get_controller_root_paths_should_return_the_cgroup_v1_controller_mount_points(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cpu, memory = get_cgroup_api().get_controller_root_paths()
|
||||
self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the CPU controller is incorrect")
|
||||
self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect")
|
||||
|
||||
def test_get_controller_root_paths_should_return_None_if_either_controller_not_mounted(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory', 'io': '/sys/fs/cgroup/io'}):
|
||||
cpu, memory = get_cgroup_api().get_controller_root_paths()
|
||||
self.assertIsNone(cpu, "The CPU controller is mot mounted, so the cpu controller path should be None")
|
||||
self.assertEqual(memory, '/sys/fs/cgroup/memory', "The root cgroup for the memory controller is incorrect")
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'io': '/sys/fs/cgroup/io'}):
|
||||
cpu, memory = get_cgroup_api().get_controller_root_paths()
|
||||
self.assertIsNone(memory, "The memory controller is mot mounted, so the memory controller path should be None")
|
||||
self.assertEqual(cpu, '/sys/fs/cgroup/cpu,cpuacct', "The root cgroup for the cpu controller is incorrect")
|
||||
|
||||
def test_get_controller_mountpoints_should_return_all_controller_mount_points(self):
|
||||
def test_get_controller_mountpoints_should_return_only_supported_controllers(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup_api = get_cgroup_api()
|
||||
# Expected value comes from findmnt output in the mocked environment
|
||||
self.assertEqual(cgroup_api._get_controller_mountpoints(), {
|
||||
'systemd': '/sys/fs/cgroup/systemd',
|
||||
'devices': '/sys/fs/cgroup/devices',
|
||||
'rdma': '/sys/fs/cgroup/rdma',
|
||||
'perf_event': '/sys/fs/cgroup/perf_event',
|
||||
'net_cls,net_prio': '/sys/fs/cgroup/net_cls,net_prio',
|
||||
'blkio': '/sys/fs/cgroup/blkio',
|
||||
'cpuset': '/sys/fs/cgroup/cpuset',
|
||||
'misc': '/sys/fs/cgroup/misc',
|
||||
'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct',
|
||||
'memory': '/sys/fs/cgroup/memory',
|
||||
'freezer': '/sys/fs/cgroup/freezer',
|
||||
'hugetlb': '/sys/fs/cgroup/hugetlb',
|
||||
'pids': '/sys/fs/cgroup/pids',
|
||||
'memory': '/sys/fs/cgroup/memory'
|
||||
}, "The controller mountpoints are not correct")
|
||||
|
||||
def test_are_mountpoints_systemd_created_should_return_False_if_cpu_or_memory_are_not_systemd_mountpoints(self):
|
||||
def test_are_mountpoints_systemd_created_should_return_False_if_mountpoints_are_not_systemd(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/custom/mountpoint/path', 'memory': '/custom/mountpoint/path'}):
|
||||
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
|
||||
|
@ -261,23 +172,123 @@ class SystemdCgroupsApiv1TestCase(AgentTestCase):
|
|||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/custom/mountpoint/path'}):
|
||||
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
|
||||
|
||||
def test_are_mountpoints_systemd_created_should_return_True_if_cpu_and_memory_are_systemd_mountpoints(self):
|
||||
def test_are_mountpoints_systemd_created_should_return_True_if_mountpoints_are_systemd(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup', 'memory': '/sys/fs/cgroup'}):
|
||||
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'}):
|
||||
self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
|
||||
|
||||
# are_mountpoints_systemd_created should only check controllers which are mounted
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup'}):
|
||||
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
|
||||
self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup'}):
|
||||
self.assertFalse(SystemdCgroupApiv1().are_mountpoints_systemd_created())
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory'}):
|
||||
self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
|
||||
|
||||
def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
|
||||
self.assertTrue(SystemdCgroupApiv1().are_mountpoints_systemd_created())
|
||||
|
||||
def test_get_relative_paths_for_process_should_return_the_cgroup_v1_relative_paths(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self')
|
||||
self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect")
|
||||
self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect")
|
||||
relative_paths = get_cgroup_api()._get_process_relative_controller_paths('self')
|
||||
self.assertEqual(len(relative_paths), 2)
|
||||
self.assertEqual(relative_paths.get('cpu,cpuacct'), "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect")
|
||||
self.assertEqual(relative_paths.get('memory'), "system.slice/walinuxagent.service", "The relative memory for the memory cgroup is incorrect")
|
||||
|
||||
def test_get_unit_cgroup_should_return_correct_paths_for_cgroup_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "extension")
|
||||
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct':'/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'})
|
||||
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service', 'memory': '/sys/fs/cgroup/memory/system.slice/extension.service'})
|
||||
|
||||
def test_get_unit_cgroup_should_return_only_mounted_controllers_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
|
||||
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "extension")
|
||||
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct':'/sys/fs/cgroup/cpu,cpuacct'})
|
||||
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'})
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
|
||||
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "extension")
|
||||
self.assertEqual(cgroup._controller_mountpoints, {})
|
||||
self.assertEqual(cgroup._controller_paths, {})
|
||||
|
||||
def test_get_cgroup_from_relative_path_should_return_the_correct_paths_for_cgroup_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
|
||||
self.assertEqual(cgroup._controller_mountpoints,
|
||||
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'})
|
||||
self.assertEqual(cgroup._controller_paths,
|
||||
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/some/relative/path',
|
||||
'memory': '/sys/fs/cgroup/memory/some/relative/path'})
|
||||
|
||||
def test_get_cgroup_from_relative_path_should_return_only_mounted_controllers_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
|
||||
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
|
||||
self.assertEqual(cgroup._controller_mountpoints,
|
||||
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'})
|
||||
self.assertEqual(cgroup._controller_paths,
|
||||
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/some/relative/path'})
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
|
||||
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
|
||||
self.assertEqual(cgroup._controller_mountpoints, {})
|
||||
self.assertEqual(cgroup._controller_paths, {})
|
||||
|
||||
def test_get_process_cgroup_should_return_the_correct_paths_for_cgroup_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
|
||||
self.assertEqual(cgroup._controller_mountpoints,
|
||||
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory': '/sys/fs/cgroup/memory'})
|
||||
self.assertEqual(cgroup._controller_paths,
|
||||
{'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service',
|
||||
'memory': '/sys/fs/cgroup/memory/system.slice/walinuxagent.service'})
|
||||
|
||||
def test_get_process_cgroup_should_return_only_mounted_controllers_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
|
||||
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'})
|
||||
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service'})
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
|
||||
self.assertEqual(cgroup._controller_mountpoints, {})
|
||||
self.assertEqual(cgroup._controller_paths, {})
|
||||
|
||||
def test_get_process_cgroup_should_return_only_mounted_process_controllers_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'relative/path'}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
|
||||
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'})
|
||||
self.assertEqual(cgroup._controller_paths, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct/relative/path'})
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertIsInstance(cgroup, CgroupV1)
|
||||
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
|
||||
self.assertEqual(cgroup._controller_mountpoints, {'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct', 'memory':'/sys/fs/cgroup/memory'})
|
||||
self.assertEqual(cgroup._controller_paths, {})
|
||||
|
||||
@patch('time.sleep', side_effect=lambda _: mock_sleep())
|
||||
def test_start_extension_cgroups_v1_command_should_return_the_command_output(self, _):
|
||||
|
@ -354,17 +365,6 @@ class SystemdCgroupsApiv1TestCase(AgentTestCase):
|
|||
|
||||
|
||||
class SystemdCgroupsApiv2TestCase(AgentTestCase):
|
||||
def test_get_controllers_enabled_at_root_should_return_list_of_enabled_controllers(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cgroup_api = get_cgroup_api()
|
||||
self.assertEqual(cgroup_api._get_controllers_enabled_at_root('/sys/fs/cgroup'), ['cpuset', 'cpu', 'io', 'memory', 'pids'])
|
||||
|
||||
def test_get_controllers_enabled_at_root_should_return_empty_list_if_root_cgroup_path_is_None(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None):
|
||||
cgroup_api = get_cgroup_api()
|
||||
self.assertEqual(cgroup_api._controllers_enabled_at_root, [])
|
||||
|
||||
def test_get_root_cgroup_path_should_return_v2_cgroup_root(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cgroup_api = get_cgroup_api()
|
||||
|
@ -374,97 +374,113 @@ class SystemdCgroupsApiv2TestCase(AgentTestCase):
|
|||
with mock_cgroup_v2_environment(self.tmp_dir) as env:
|
||||
# Mock an environment which has multiple v2 mountpoints
|
||||
env.add_command(MockCommand(r"^findmnt -t cgroup2 --noheadings$",
|
||||
'''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime
|
||||
/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime
|
||||
/custom/mountpoint/path2 none cgroup2 rw,relatime
|
||||
'''))
|
||||
'''/custom/mountpoint/path1 cgroup2 cgroup2 rw,relatime
|
||||
/sys/fs/cgroup cgroup2 cgroup2 rw,nosuid,nodev,noexec,relatime
|
||||
/custom/mountpoint/path2 none cgroup2 rw,relatime
|
||||
'''))
|
||||
cgroup_api = get_cgroup_api()
|
||||
self.assertEqual(cgroup_api._get_root_cgroup_path(), '/sys/fs/cgroup')
|
||||
|
||||
def test_get_unit_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self):
|
||||
def test_get_controllers_enabled_at_root_should_return_list_of_agent_supported_and_enabled_controllers(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
|
||||
self.assertEqual(cpu, '/sys/fs/cgroup/system.slice/extension.service',
|
||||
"The cgroup path for the CPU controller is incorrect")
|
||||
self.assertEqual(memory, '/sys/fs/cgroup/system.slice/extension.service',
|
||||
"The cgroup path for the memory controller is incorrect")
|
||||
cgroup_api = get_cgroup_api()
|
||||
enabled_controllers = cgroup_api._get_controllers_enabled_at_root('/sys/fs/cgroup')
|
||||
self.assertEqual(len(enabled_controllers), 2)
|
||||
self.assertIn('cpu', enabled_controllers)
|
||||
self.assertIn('memory', enabled_controllers)
|
||||
|
||||
def test_get_unit_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self):
|
||||
def test_get_controllers_enabled_at_root_should_return_empty_list_if_root_cgroup_path_is_empty(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)):
|
||||
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
|
||||
self.assertIn(cpu, '/sys/fs/cgroup/system.slice/extension.service',
|
||||
"The cgroup path for the CPU controller is incorrect")
|
||||
self.assertIsNone(memory,
|
||||
"The cgroup path for the memory controller is None so unit cgroup should be None")
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
|
||||
cgroup_api = get_cgroup_api()
|
||||
self.assertEqual(cgroup_api._controllers_enabled_at_root, [])
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')):
|
||||
cpu, memory = get_cgroup_api().get_unit_cgroup_paths("extension.service")
|
||||
self.assertIsNone(cpu, "The cgroup path for the cpu controller is None so unit cgroup should be None")
|
||||
self.assertIn(memory, '/sys/fs/cgroup/system.slice/extension.service',
|
||||
"The cgroup path for the memory controller is incorrect")
|
||||
|
||||
def test_get_process_cgroup_paths_should_return_the_cgroup_v2_cgroup_paths(self):
|
||||
def test_get_process_relative_cgroup_path_should_return_relative_path(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
|
||||
self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service',
|
||||
"The cgroup path for the CPU controller is incorrect")
|
||||
self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service',
|
||||
"The cgroup path for the memory controller is incorrect")
|
||||
cgroup_api = get_cgroup_api()
|
||||
self.assertEqual(cgroup_api._get_process_relative_cgroup_path(process_id="self"), "system.slice/walinuxagent.service")
|
||||
|
||||
def test_get_process_cgroup_path_should_return_None_if_either_cgroup_v2_controller_not_enabled(self):
|
||||
def test_get_unit_cgroup_should_return_correct_paths_for_cgroup_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=('/sys/fs/cgroup', None)):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
|
||||
self.assertIn(cpu, '/sys/fs/cgroup/system.slice/walinuxagent.service',
|
||||
"The cgroup path for the CPU controller is incorrect")
|
||||
self.assertIsNone(memory,
|
||||
"The cgroup path for the memory controller is None so unit cgroup should be None")
|
||||
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
|
||||
self.assertIsInstance(cgroup, CgroupV2)
|
||||
self.assertEqual(cgroup._cgroup_name, "extension")
|
||||
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
|
||||
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service")
|
||||
self.assertEqual(len(cgroup._enabled_controllers), 2)
|
||||
self.assertIn('cpu', cgroup._enabled_controllers)
|
||||
self.assertIn('memory', cgroup._enabled_controllers)
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_controller_root_paths', return_value=(None, '/sys/fs/cgroup')):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
|
||||
self.assertIsNone(cpu, "The cgroup path for the CPU controller is None so unit cgroup should be None")
|
||||
self.assertIn(memory, '/sys/fs/cgroup/system.slice/walinuxagent.service',
|
||||
"The cgroup path for the memory controller is incorrect")
|
||||
|
||||
def test_get_process_cgroup_v2_path_should_return_None_if_relative_path_is_None(self):
|
||||
def test_get_unit_cgroup_should_return_empty_paths_if_root_path_empty_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2.get_process_cgroup_relative_paths', return_value=(None, None)):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_paths("self")
|
||||
self.assertIsNone(cpu, "The relative cgroup path for the cpu controller is None so unit cgroup should be None")
|
||||
self.assertIsNone(memory,
|
||||
"The relative cgroup path for the memory controller is None so unit cgroup should be None")
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
|
||||
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
|
||||
self.assertIsInstance(cgroup, CgroupV2)
|
||||
self.assertEqual(cgroup._cgroup_name, "extension")
|
||||
self.assertEqual(cgroup._root_cgroup_path, "")
|
||||
self.assertEqual(cgroup._cgroup_path, "")
|
||||
self.assertEqual(len(cgroup._enabled_controllers), 0)
|
||||
|
||||
def test_get_controller_root_paths_should_return_the_cgroup_v2_root_cgroup_path(self):
|
||||
def test_get_unit_cgroup_should_return_only_enabled_controllers_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cpu, memory = get_cgroup_api().get_controller_root_paths()
|
||||
self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect")
|
||||
self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect")
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['cpu']):
|
||||
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
|
||||
self.assertIsInstance(cgroup, CgroupV2)
|
||||
self.assertEqual(cgroup._cgroup_name, "extension")
|
||||
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
|
||||
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service")
|
||||
self.assertEqual(len(cgroup._enabled_controllers), 1)
|
||||
self.assertIn('cpu', cgroup._enabled_controllers)
|
||||
|
||||
def test_get_controller_root_paths_should_return_None_if_root_cgroup_path_is_None(self):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=[]):
|
||||
cgroup = get_cgroup_api().get_unit_cgroup(unit_name="extension.service", cgroup_name="extension")
|
||||
self.assertIsInstance(cgroup, CgroupV2)
|
||||
self.assertEqual(cgroup._cgroup_name, "extension")
|
||||
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
|
||||
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/extension.service")
|
||||
self.assertEqual(len(cgroup._enabled_controllers), 0)
|
||||
|
||||
def test_get_cgroup_from_relative_path_should_return_the_correct_paths_for_cgroup_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=None):
|
||||
cpu, memory = get_cgroup_api().get_controller_root_paths()
|
||||
self.assertIsNone(cpu, "The root cgroup path is None, so the CPU controller path should be None")
|
||||
self.assertIsNone(memory, "The root cgroup path is None, so the memory controller path should be None")
|
||||
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
|
||||
self.assertIsInstance(cgroup, CgroupV2)
|
||||
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
|
||||
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
|
||||
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/some/relative/path")
|
||||
self.assertEqual(len(cgroup._enabled_controllers), 2)
|
||||
self.assertIn('cpu', cgroup._enabled_controllers)
|
||||
self.assertIn('memory', cgroup._enabled_controllers)
|
||||
|
||||
def test_get_controller_root_paths_should_return_None_if_either_controller_not_enabled(self):
|
||||
def test_get_cgroup_from_relative_path_should_return_empty_paths_if_root_path_empty_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['io', 'memory']):
|
||||
cpu, memory = get_cgroup_api().get_controller_root_paths()
|
||||
self.assertIsNone(cpu, "The CPU controller is not enabled, so the CPU controller path should be None")
|
||||
self.assertEqual(memory, '/sys/fs/cgroup', "The root cgroup for the memory controller is incorrect")
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
|
||||
cgroup = get_cgroup_api().get_cgroup_from_relative_path(relative_path="some/relative/path", cgroup_name="test_cgroup")
|
||||
self.assertIsInstance(cgroup, CgroupV2)
|
||||
self.assertEqual(cgroup._cgroup_name, "test_cgroup")
|
||||
self.assertEqual(cgroup._root_cgroup_path, "")
|
||||
self.assertEqual(cgroup._cgroup_path, "")
|
||||
self.assertEqual(len(cgroup._enabled_controllers), 0)
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_controllers_enabled_at_root', return_value=['cpu', 'io']):
|
||||
cpu, memory = get_cgroup_api().get_controller_root_paths()
|
||||
self.assertEqual(cpu, '/sys/fs/cgroup', "The root cgroup for the CPU controller is incorrect")
|
||||
self.assertIsNone(memory, "The memory controller is not enabled, so the memory controller path should be None")
|
||||
|
||||
def test_get_cpu_and_memory_cgroup_relative_paths_for_process_should_return_the_cgroup_v2_relative_paths(self):
|
||||
def test_get_process_cgroup_should_return_the_correct_paths_for_cgroup_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cpu, memory = get_cgroup_api().get_process_cgroup_relative_paths('self')
|
||||
self.assertEqual(cpu, "system.slice/walinuxagent.service", "The relative path for the CPU cgroup is incorrect")
|
||||
self.assertEqual(memory, "system.slice/walinuxagent.service", "The relative memory for the CPU cgroup is incorrect")
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertIsInstance(cgroup, CgroupV2)
|
||||
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
|
||||
self.assertEqual(cgroup._root_cgroup_path, "/sys/fs/cgroup")
|
||||
self.assertEqual(cgroup._cgroup_path, "/sys/fs/cgroup/system.slice/walinuxagent.service")
|
||||
self.assertEqual(len(cgroup._enabled_controllers), 2)
|
||||
self.assertIn('cpu', cgroup._enabled_controllers)
|
||||
self.assertIn('memory', cgroup._enabled_controllers)
|
||||
|
||||
def test_get_process_cgroup_should_return_empty_paths_if_root_path_empty_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertIsInstance(cgroup, CgroupV2)
|
||||
self.assertEqual(cgroup._cgroup_name, "walinuxagent")
|
||||
self.assertEqual(cgroup._root_cgroup_path, "")
|
||||
self.assertEqual(cgroup._cgroup_path, "")
|
||||
self.assertEqual(len(cgroup._enabled_controllers), 0)
|
||||
|
||||
|
||||
class SystemdCgroupsApiMockedFileSystemTestCase(_MockedFileSystemTestCase):
|
||||
|
@ -483,3 +499,176 @@ class SystemdCgroupsApiMockedFileSystemTestCase(_MockedFileSystemTestCase):
|
|||
self.assertEqual(legacy_cgroups, 2, "cleanup_legacy_cgroups() did not find all the expected cgroups")
|
||||
self.assertFalse(os.path.exists(legacy_cpu_cgroup), "cleanup_legacy_cgroups() did not remove the CPU legacy cgroup")
|
||||
self.assertFalse(os.path.exists(legacy_memory_cgroup), "cleanup_legacy_cgroups() did not remove the memory legacy cgroup")
|
||||
|
||||
|
||||
class CgroupsApiv1TestCase(AgentTestCase):
|
||||
def test_get_supported_controllers_returns_v1_controllers(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
controllers = cgroup.get_supported_controllers()
|
||||
self.assertEqual(len(controllers), 2)
|
||||
self.assertIn('cpu,cpuacct', controllers)
|
||||
self.assertIn('memory', controllers)
|
||||
|
||||
def test_check_in_expected_slice_returns_True_if_all_paths_in_expected_slice(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service'}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
|
||||
|
||||
def test_check_in_expected_slice_returns_False_if_any_paths_not_in_expected_slice(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice'))
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service', 'memory': 'user.slice/walinuxagent.service'}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice'))
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': '', 'memory': ''}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice'))
|
||||
|
||||
def test_get_controller_metrics_returns_all_supported_controllers_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
metrics = cgroup.get_controller_metrics()
|
||||
self.assertEqual(len(metrics), 2)
|
||||
self.assertIsInstance(metrics[0], CpuMetrics)
|
||||
self.assertEqual(metrics[0].name, "walinuxagent")
|
||||
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service")
|
||||
self.assertIsInstance(metrics[1], MemoryMetrics)
|
||||
self.assertEqual(metrics[1].name, "walinuxagent")
|
||||
self.assertEqual(metrics[1].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service")
|
||||
|
||||
def test_get_controller_metrics_returns_only_mounted_controllers_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'cpu,cpuacct': '/sys/fs/cgroup/cpu,cpuacct'}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
metrics = cgroup.get_controller_metrics()
|
||||
self.assertEqual(len(metrics), 1)
|
||||
self.assertIsInstance(metrics[0], CpuMetrics)
|
||||
self.assertEqual(metrics[0].name, "walinuxagent")
|
||||
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service")
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={'memory': '/sys/fs/cgroup/memory'}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
metrics = cgroup.get_controller_metrics()
|
||||
self.assertEqual(len(metrics), 1)
|
||||
self.assertIsInstance(metrics[0], MemoryMetrics)
|
||||
self.assertEqual(metrics[0].name, "walinuxagent")
|
||||
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service")
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
metrics = cgroup.get_controller_metrics()
|
||||
self.assertEqual(len(metrics), 0)
|
||||
|
||||
def test_get_controller_metrics_returns_only_controllers_at_expected_path_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'system.slice/walinuxagent.service', 'memory': 'unexpected/path'}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service")
|
||||
self.assertEqual(len(metrics), 1)
|
||||
self.assertIsInstance(metrics[0], CpuMetrics)
|
||||
self.assertEqual(metrics[0].name, "walinuxagent")
|
||||
self.assertEqual(metrics[0].path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service")
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_process_relative_controller_paths', return_value={'cpu,cpuacct': 'unexpected/path', 'memory': 'unexpected/path'}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
metrics = cgroup.get_controller_metrics(expected_relative_path="system.slice/walinuxagent.service")
|
||||
self.assertEqual(len(metrics), 0)
|
||||
|
||||
def test_get_procs_path_returns_correct_path_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
procs_path = cgroup.get_controller_procs_path(controller='cpu,cpuacct')
|
||||
self.assertEqual(procs_path, "/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service/cgroup.procs")
|
||||
|
||||
procs_path = cgroup.get_controller_procs_path(controller='memory')
|
||||
self.assertEqual(procs_path, "/sys/fs/cgroup/memory/system.slice/walinuxagent.service/cgroup.procs")
|
||||
|
||||
def test_get_processes_returns_processes_at_all_controller_paths_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
procs = cgroup.get_processes()
|
||||
self.assertEqual(len(procs), 3)
|
||||
self.assertIn(int(123), procs)
|
||||
self.assertIn(int(234), procs)
|
||||
self.assertIn(int(345), procs)
|
||||
|
||||
def test_get_processes_returns_empty_list_if_no_controllers_mounted_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1._get_controller_mountpoints', return_value={}):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
procs = cgroup.get_processes()
|
||||
self.assertIsInstance(procs, list)
|
||||
self.assertEqual(len(procs), 0)
|
||||
|
||||
def test_get_processes_returns_empty_list_if_procs_path_empty_v1(self):
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.CgroupV1.get_controller_procs_path', return_value=""):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
procs = cgroup.get_processes()
|
||||
self.assertIsInstance(procs, list)
|
||||
self.assertEqual(len(procs), 0)
|
||||
|
||||
|
||||
class CgroupsApiv2TestCase(AgentTestCase):
|
||||
def test_get_supported_controllers_returns_v2_controllers(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
controllers = cgroup.get_supported_controllers()
|
||||
self.assertEqual(len(controllers), 2)
|
||||
self.assertIn('cpu', controllers)
|
||||
self.assertIn('memory', controllers)
|
||||
|
||||
def test_check_in_expected_slice_returns_True_if_cgroup_path_in_expected_slice(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertTrue(cgroup.check_in_expected_slice(expected_slice='system.slice'))
|
||||
|
||||
def test_check_in_expected_slice_returns_False_if_cgroup_path_not_in_expected_slice(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='user.slice'))
|
||||
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_process_relative_cgroup_path', return_value=""):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
self.assertFalse(cgroup.check_in_expected_slice(expected_slice='system.slice'))
|
||||
|
||||
def test_get_procs_path_returns_empty_if_root_cgroup_empty_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
procs_path = cgroup.get_procs_path()
|
||||
self.assertEqual(procs_path, "")
|
||||
|
||||
def test_get_procs_path_returns_correct_path_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
procs_path = cgroup.get_procs_path()
|
||||
self.assertEqual(procs_path, "/sys/fs/cgroup/system.slice/walinuxagent.service/cgroup.procs")
|
||||
|
||||
def test_get_processes_returns_processes_at_all_controller_paths_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
procs = cgroup.get_processes()
|
||||
self.assertEqual(len(procs), 3)
|
||||
self.assertIn(int(123), procs)
|
||||
self.assertIn(int(234), procs)
|
||||
self.assertIn(int(345), procs)
|
||||
|
||||
def test_get_processes_returns_empty_list_if_root_cgroup_empty_v2(self):
|
||||
with mock_cgroup_v2_environment(self.tmp_dir):
|
||||
with patch('azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv2._get_root_cgroup_path', return_value=""):
|
||||
cgroup = get_cgroup_api().get_process_cgroup(process_id="self", cgroup_name="walinuxagent")
|
||||
procs = cgroup.get_processes()
|
||||
self.assertEqual(len(procs), 0)
|
||||
|
|
|
@ -27,7 +27,7 @@ import time
|
|||
import threading
|
||||
|
||||
from azurelinuxagent.common import conf
|
||||
from azurelinuxagent.ga.cgroup import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuCgroup
|
||||
from azurelinuxagent.ga.controllermetrics import AGENT_NAME_TELEMETRY, MetricsCounter, MetricValue, MetricsCategory, CpuMetrics
|
||||
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator, DisableCgroups
|
||||
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
|
||||
from azurelinuxagent.common.event import WALAEventOperation
|
||||
|
@ -272,7 +272,7 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
|
|||
|
||||
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \
|
||||
'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \
|
||||
CpuCgroup('Microsoft.CPlat.Extension',
|
||||
CpuMetrics('Microsoft.CPlat.Extension',
|
||||
'/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice')
|
||||
|
||||
configurator.remove_extension_slice(extension_name="Microsoft.CPlat.Extension")
|
||||
|
@ -369,10 +369,10 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
|
|||
configurator.setup_extension_slice(extension_name=extension_name, cpu_quota=5)
|
||||
configurator.set_extension_services_cpu_memory_quota(service_list)
|
||||
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \
|
||||
CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
|
||||
CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
|
||||
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/' \
|
||||
'azure-vmextensions-Microsoft.CPlat.Extension.slice'] = \
|
||||
CpuCgroup('Microsoft.CPlat.Extension',
|
||||
CpuMetrics('Microsoft.CPlat.Extension',
|
||||
'/sys/fs/cgroup/cpu,cpuacct/azure.slice/azure-vmextensions.slice/azure-vmextensions-Microsoft.CPlat.Extension.slice')
|
||||
|
||||
configurator.disable("UNIT TEST", DisableCgroups.ALL)
|
||||
|
@ -717,7 +717,7 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
|
|||
with self._get_cgroup_configurator() as configurator:
|
||||
with patch("os.path.exists") as mock_path:
|
||||
mock_path.return_value = True
|
||||
CGroupsTelemetry.track_cgroup(CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'))
|
||||
CGroupsTelemetry.track_cgroup(CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'))
|
||||
configurator.stop_tracking_extension_services_cgroups(service_list)
|
||||
|
||||
tracked = CGroupsTelemetry._tracked
|
||||
|
@ -776,7 +776,7 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
|
|||
with patch("os.path.exists") as mock_path:
|
||||
mock_path.side_effect = side_effect
|
||||
CGroupsTelemetry._tracked['/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service'] = \
|
||||
CpuCgroup('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
|
||||
CpuMetrics('extension.service', '/sys/fs/cgroup/cpu,cpuacct/system.slice/extension.service')
|
||||
configurator.stop_tracking_unit_cgroups("extension.service")
|
||||
|
||||
tracked = CGroupsTelemetry._tracked
|
||||
|
@ -911,7 +911,7 @@ exit 0
|
|||
agent_processes = [os.getppid(), os.getpid()] + agent_command_processes + [start_extension.systemd_run_pid]
|
||||
other_processes = [1, get_completed_process()] + extension_processes
|
||||
|
||||
with patch("azurelinuxagent.ga.cgroupapi._SystemdCgroupApi.get_processes_in_cgroup", return_value=agent_processes + other_processes):
|
||||
with patch("azurelinuxagent.ga.cgroupapi.CgroupV1.get_processes", return_value=agent_processes + other_processes):
|
||||
with self.assertRaises(CGroupsException) as context_manager:
|
||||
configurator._check_processes_in_agent_cgroup()
|
||||
|
||||
|
@ -1012,7 +1012,7 @@ exit 0
|
|||
|
||||
with self.assertRaises(AgentMemoryExceededException) as context_manager:
|
||||
with self._get_cgroup_configurator() as configurator:
|
||||
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_tracked_metrics") as tracked_metrics:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_tracked_metrics") as tracked_metrics:
|
||||
tracked_metrics.return_value = metrics
|
||||
configurator.check_agent_memory_usage()
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ import os
|
|||
import random
|
||||
import time
|
||||
|
||||
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup
|
||||
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics
|
||||
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
|
||||
from azurelinuxagent.common.utils import fileutil
|
||||
from tests.lib.tools import AgentTestCase, data_dir, patch
|
||||
|
@ -105,10 +105,10 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
@staticmethod
|
||||
def _track_new_extension_cgroups(num_extensions):
|
||||
for i in range(num_extensions):
|
||||
dummy_cpu_cgroup = CpuCgroup("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i))
|
||||
dummy_cpu_cgroup = CpuMetrics("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i))
|
||||
CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)
|
||||
|
||||
dummy_memory_cgroup = MemoryCgroup("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i))
|
||||
dummy_memory_cgroup = MemoryMetrics("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i))
|
||||
CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)
|
||||
|
||||
def _assert_cgroups_are_tracked(self, num_extensions):
|
||||
|
@ -136,12 +136,12 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
|
||||
self._track_new_extension_cgroups(num_extensions)
|
||||
|
||||
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
|
||||
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage:
|
||||
with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage:
|
||||
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
|
||||
patch_is_active.return_value = True
|
||||
|
||||
current_cpu = 30
|
||||
|
@ -163,10 +163,10 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected)
|
||||
self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory, current_swap_memory)
|
||||
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active", return_value=False)
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active", return_value=False)
|
||||
def test_telemetry_polling_with_inactive_cgroups(self, *_):
|
||||
num_extensions = 5
|
||||
no_extensions_expected = 0 # pylint: disable=unused-variable
|
||||
|
@ -182,10 +182,10 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
|
||||
self.assertEqual(len(metrics), 0)
|
||||
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage")
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage")
|
||||
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage")
|
||||
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active")
|
||||
def test_telemetry_polling_with_changing_cgroups_state(self, patch_is_active, patch_get_cpu_usage, # pylint: disable=unused-argument
|
||||
patch_get_mem, patch_get_max_mem, *args):
|
||||
num_extensions = 5
|
||||
|
@ -274,11 +274,11 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
CGroupsTelemetry.poll_all_tracked()
|
||||
self.assertEqual(expected_call_count, patch_periodic_warn.call_count)
|
||||
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage")
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage")
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage")
|
||||
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage")
|
||||
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active")
|
||||
def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, patch_try_memory_swap_usage,
|
||||
*args): # pylint: disable=unused-argument
|
||||
num_polls = 10
|
||||
|
@ -321,13 +321,13 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
self.assertFalse(CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path"))
|
||||
self.assertFalse(CGroupsTelemetry.is_tracked("not_present_memory_dummy_path"))
|
||||
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror)
|
||||
def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): # pylint: disable=unused-argument
|
||||
num_extensions = 5
|
||||
self._track_new_extension_cgroups(num_extensions)
|
||||
|
||||
with patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage:
|
||||
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage") as patch_get_cpu_usage:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
|
||||
patch_is_active.return_value = True
|
||||
|
||||
current_cpu = 30
|
||||
|
@ -341,16 +341,16 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated
|
||||
self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0)
|
||||
|
||||
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror)
|
||||
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylint: disable=unused-argument
|
||||
num_extensions = 5
|
||||
|
||||
self._track_new_extension_cgroups(num_extensions)
|
||||
|
||||
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
|
||||
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.ga.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage:
|
||||
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage") as patch_get_memory_max_usage:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.try_swap_memory_usage") as patch_try_swap_memory_usage:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
|
||||
patch_is_active.return_value = True
|
||||
|
||||
current_memory = 209715200
|
||||
|
@ -367,14 +367,14 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
self.assertEqual(len(metrics), num_extensions * 3)
|
||||
self._assert_polled_metrics_equal(metrics, 0, current_memory, current_max_memory, current_swap_memory)
|
||||
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_max_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage", side_effect=raise_ioerror)
|
||||
def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): # pylint: disable=unused-argument
|
||||
num_extensions = 5
|
||||
self._track_new_extension_cgroups(num_extensions)
|
||||
|
||||
with patch("azurelinuxagent.ga.cgroup.CGroup.is_active") as patch_is_active:
|
||||
with patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active") as patch_is_active:
|
||||
|
||||
patch_is_active.return_value = False
|
||||
poll_count = 1
|
||||
|
@ -383,9 +383,9 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
metrics = CGroupsTelemetry.poll_all_tracked()
|
||||
self.assertEqual(0, len(metrics))
|
||||
|
||||
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage")
|
||||
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_throttled_time")
|
||||
@patch("azurelinuxagent.ga.cgroup.CGroup.is_active")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_throttled_time")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.ControllerMetrics.is_active")
|
||||
def test_cgroup_telemetry_should_not_report_cpu_negative_value(self, patch_is_active, path_get_throttled_time, patch_get_cpu_usage):
|
||||
|
||||
num_polls = 5
|
||||
|
@ -396,7 +396,7 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
cpu_percent_values.append(-1)
|
||||
cpu_throttled_values = [random.randint(0, 60 * 60) for _ in range(num_polls)]
|
||||
|
||||
dummy_cpu_cgroup = CpuCgroup("dummy_extension_name", "dummy_cpu_path")
|
||||
dummy_cpu_cgroup = CpuMetrics("dummy_extension_name", "dummy_cpu_path")
|
||||
CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)
|
||||
self.assertEqual(1, len(CGroupsTelemetry._tracked))
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ import contextlib
|
|||
import os
|
||||
|
||||
from azurelinuxagent.common import logger, conf
|
||||
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue
|
||||
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue
|
||||
from azurelinuxagent.ga.cgroupconfigurator import CGroupConfigurator
|
||||
from azurelinuxagent.common.logger import Logger
|
||||
from azurelinuxagent.common.protocol.util import ProtocolUtil
|
||||
|
@ -197,8 +197,8 @@ def _create_log_collector_monitor_handler(iterations=1):
|
|||
monitor_log_collector.join()
|
||||
|
||||
cgroups = [
|
||||
CpuCgroup("test", "dummy_cpu_path"),
|
||||
MemoryCgroup("test", "dummy_memory_path")
|
||||
CpuMetrics("test", "dummy_cpu_path"),
|
||||
MemoryMetrics("test", "dummy_memory_path")
|
||||
]
|
||||
monitor_log_collector = get_log_collector_monitor_handler(cgroups)
|
||||
monitor_log_collector.run_and_wait = run_and_wait
|
||||
|
|
|
@ -22,7 +22,7 @@ import os
|
|||
import random
|
||||
import shutil
|
||||
|
||||
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricsCounter, CounterNotFound
|
||||
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricsCounter, CounterNotFound
|
||||
from azurelinuxagent.common.exception import CGroupsException
|
||||
from azurelinuxagent.common.osutil import get_osutil
|
||||
from azurelinuxagent.common.utils import fileutil
|
||||
|
@ -36,35 +36,35 @@ def consume_cpu_time():
|
|||
return waste
|
||||
|
||||
|
||||
class TestCGroup(AgentTestCase):
|
||||
class TestControllerMetrics(AgentTestCase):
|
||||
def test_is_active(self):
|
||||
test_cgroup = CpuCgroup("test_extension", self.tmp_dir)
|
||||
self.assertEqual(False, test_cgroup.is_active())
|
||||
test_metrics = CpuMetrics("test_extension", self.tmp_dir)
|
||||
self.assertEqual(False, test_metrics.is_active())
|
||||
|
||||
with open(os.path.join(self.tmp_dir, "tasks"), mode="wb") as tasks:
|
||||
tasks.write(str(1000).encode())
|
||||
|
||||
self.assertEqual(True, test_cgroup.is_active())
|
||||
self.assertEqual(True, test_metrics.is_active())
|
||||
|
||||
@patch("azurelinuxagent.common.logger.periodic_warn")
|
||||
def test_is_active_file_not_present(self, patch_periodic_warn):
|
||||
test_cgroup = CpuCgroup("test_extension", self.tmp_dir)
|
||||
self.assertEqual(False, test_cgroup.is_active())
|
||||
test_metrics = CpuMetrics("test_extension", self.tmp_dir)
|
||||
self.assertEqual(False, test_metrics.is_active())
|
||||
|
||||
test_cgroup = MemoryCgroup("test_extension", os.path.join(self.tmp_dir, "this_cgroup_does_not_exist"))
|
||||
self.assertEqual(False, test_cgroup.is_active())
|
||||
test_metrics = MemoryMetrics("test_extension", os.path.join(self.tmp_dir, "this_cgroup_does_not_exist"))
|
||||
self.assertEqual(False, test_metrics.is_active())
|
||||
|
||||
self.assertEqual(0, patch_periodic_warn.call_count)
|
||||
|
||||
@patch("azurelinuxagent.common.logger.periodic_warn")
|
||||
def test_is_active_incorrect_file(self, patch_periodic_warn):
|
||||
open(os.path.join(self.tmp_dir, "tasks"), mode="wb").close()
|
||||
test_cgroup = CpuCgroup("test_extension", os.path.join(self.tmp_dir, "tasks"))
|
||||
self.assertEqual(False, test_cgroup.is_active())
|
||||
test_metrics = CpuMetrics("test_extension", os.path.join(self.tmp_dir, "tasks"))
|
||||
self.assertEqual(False, test_metrics.is_active())
|
||||
self.assertEqual(1, patch_periodic_warn.call_count)
|
||||
|
||||
|
||||
class TestCpuCgroup(AgentTestCase):
|
||||
class TestCpuMetrics(AgentTestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
AgentTestCase.setUpClass()
|
||||
|
@ -96,147 +96,147 @@ class TestCpuCgroup(AgentTestCase):
|
|||
|
||||
def setUp(self):
|
||||
AgentTestCase.setUp(self)
|
||||
TestCpuCgroup.mock_read_file_map.clear()
|
||||
TestCpuMetrics.mock_read_file_map.clear()
|
||||
|
||||
def test_initialize_cpu_usage_should_set_current_cpu_usage(self):
|
||||
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
|
||||
TestCpuCgroup.mock_read_file_map = {
|
||||
TestCpuMetrics.mock_read_file_map = {
|
||||
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
|
||||
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
|
||||
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
|
||||
}
|
||||
|
||||
cgroup.initialize_cpu_usage()
|
||||
metrics.initialize_cpu_usage()
|
||||
|
||||
self.assertEqual(cgroup._current_cgroup_cpu, 63763)
|
||||
self.assertEqual(cgroup._current_system_cpu, 5496872)
|
||||
self.assertEqual(metrics._current_cgroup_cpu, 63763)
|
||||
self.assertEqual(metrics._current_system_cpu, 5496872)
|
||||
|
||||
def test_get_cpu_usage_should_return_the_cpu_usage_since_its_last_invocation(self):
|
||||
osutil = get_osutil()
|
||||
|
||||
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
|
||||
TestCpuCgroup.mock_read_file_map = {
|
||||
TestCpuMetrics.mock_read_file_map = {
|
||||
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
|
||||
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
|
||||
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
|
||||
}
|
||||
|
||||
cgroup.initialize_cpu_usage()
|
||||
metrics.initialize_cpu_usage()
|
||||
|
||||
TestCpuCgroup.mock_read_file_map = {
|
||||
TestCpuMetrics.mock_read_file_map = {
|
||||
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t1"),
|
||||
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t1")
|
||||
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t1")
|
||||
}
|
||||
|
||||
cpu_usage = cgroup.get_cpu_usage()
|
||||
cpu_usage = metrics.get_cpu_usage()
|
||||
|
||||
self.assertEqual(cpu_usage, round(100.0 * 0.000307697876885 * osutil.get_processor_cores(), 3))
|
||||
|
||||
TestCpuCgroup.mock_read_file_map = {
|
||||
TestCpuMetrics.mock_read_file_map = {
|
||||
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t2"),
|
||||
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t2")
|
||||
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t2")
|
||||
}
|
||||
|
||||
cpu_usage = cgroup.get_cpu_usage()
|
||||
cpu_usage = metrics.get_cpu_usage()
|
||||
|
||||
self.assertEqual(cpu_usage, round(100.0 * 0.000445181085968 * osutil.get_processor_cores(), 3))
|
||||
|
||||
def test_initialize_cpu_usage_should_set_the_cgroup_usage_to_0_when_the_cgroup_does_not_exist(self):
|
||||
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
|
||||
io_error_2 = IOError()
|
||||
io_error_2.errno = errno.ENOENT # "No such directory"
|
||||
|
||||
TestCpuCgroup.mock_read_file_map = {
|
||||
TestCpuMetrics.mock_read_file_map = {
|
||||
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
|
||||
os.path.join(cgroup.path, "cpuacct.stat"): io_error_2
|
||||
os.path.join(metrics.path, "cpuacct.stat"): io_error_2
|
||||
}
|
||||
|
||||
cgroup.initialize_cpu_usage()
|
||||
metrics.initialize_cpu_usage()
|
||||
|
||||
self.assertEqual(cgroup._current_cgroup_cpu, 0)
|
||||
self.assertEqual(cgroup._current_system_cpu, 5496872) # check the system usage just for test sanity
|
||||
self.assertEqual(metrics._current_cgroup_cpu, 0)
|
||||
self.assertEqual(metrics._current_system_cpu, 5496872) # check the system usage just for test sanity
|
||||
|
||||
def test_initialize_cpu_usage_should_raise_an_exception_when_called_more_than_once(self):
|
||||
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
|
||||
TestCpuCgroup.mock_read_file_map = {
|
||||
TestCpuMetrics.mock_read_file_map = {
|
||||
"/proc/stat": os.path.join(data_dir, "cgroups", "proc_stat_t0"),
|
||||
os.path.join(cgroup.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
|
||||
os.path.join(metrics.path, "cpuacct.stat"): os.path.join(data_dir, "cgroups", "cpuacct.stat_t0")
|
||||
}
|
||||
|
||||
cgroup.initialize_cpu_usage()
|
||||
metrics.initialize_cpu_usage()
|
||||
|
||||
with self.assertRaises(CGroupsException):
|
||||
cgroup.initialize_cpu_usage()
|
||||
metrics.initialize_cpu_usage()
|
||||
|
||||
def test_get_cpu_usage_should_raise_an_exception_when_initialize_cpu_usage_has_not_been_invoked(self):
|
||||
cgroup = CpuCgroup("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
metrics = CpuMetrics("test", "/sys/fs/cgroup/cpu/system.slice/test")
|
||||
|
||||
with self.assertRaises(CGroupsException):
|
||||
cpu_usage = cgroup.get_cpu_usage() # pylint: disable=unused-variable
|
||||
cpu_usage = metrics.get_cpu_usage() # pylint: disable=unused-variable
|
||||
|
||||
def test_get_throttled_time_should_return_the_value_since_its_last_invocation(self):
|
||||
test_file = os.path.join(self.tmp_dir, "cpu.stat")
|
||||
shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t0"), test_file) # throttled_time = 50
|
||||
cgroup = CpuCgroup("test", self.tmp_dir)
|
||||
cgroup.initialize_cpu_usage()
|
||||
metrics = CpuMetrics("test", self.tmp_dir)
|
||||
metrics.initialize_cpu_usage()
|
||||
shutil.copyfile(os.path.join(data_dir, "cgroups", "cpu.stat_t1"), test_file) # throttled_time = 2075541442327
|
||||
|
||||
throttled_time = cgroup.get_cpu_throttled_time()
|
||||
throttled_time = metrics.get_cpu_throttled_time()
|
||||
|
||||
self.assertEqual(throttled_time, float(2075541442327 - 50) / 1E9, "The value of throttled_time is incorrect")
|
||||
|
||||
def test_get_tracked_metrics_should_return_the_throttled_time(self):
|
||||
cgroup = CpuCgroup("test", os.path.join(data_dir, "cgroups"))
|
||||
cgroup.initialize_cpu_usage()
|
||||
metrics = CpuMetrics("test", os.path.join(data_dir, "cgroups"))
|
||||
metrics.initialize_cpu_usage()
|
||||
|
||||
def find_throttled_time(metrics):
|
||||
return [m for m in metrics if m.counter == MetricsCounter.THROTTLED_TIME]
|
||||
|
||||
found = find_throttled_time(cgroup.get_tracked_metrics())
|
||||
found = find_throttled_time(metrics.get_tracked_metrics())
|
||||
self.assertTrue(len(found) == 0, "get_tracked_metrics should not fetch the throttled time by default. Found: {0}".format(found))
|
||||
|
||||
found = find_throttled_time(cgroup.get_tracked_metrics(track_throttled_time=True))
|
||||
found = find_throttled_time(metrics.get_tracked_metrics(track_throttled_time=True))
|
||||
self.assertTrue(len(found) == 1, "get_tracked_metrics should have fetched the throttled time by default. Found: {0}".format(found))
|
||||
|
||||
|
||||
class TestMemoryCgroup(AgentTestCase):
|
||||
class TestMemoryMetrics(AgentTestCase):
|
||||
def test_get_metrics(self):
|
||||
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "memory_mount"))
|
||||
test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "memory_mount"))
|
||||
|
||||
memory_usage = test_mem_cg.get_memory_usage()
|
||||
memory_usage = test_mem_metrics.get_memory_usage()
|
||||
self.assertEqual(150000, memory_usage)
|
||||
|
||||
max_memory_usage = test_mem_cg.get_max_memory_usage()
|
||||
max_memory_usage = test_mem_metrics.get_max_memory_usage()
|
||||
self.assertEqual(1000000, max_memory_usage)
|
||||
|
||||
swap_memory_usage = test_mem_cg.try_swap_memory_usage()
|
||||
swap_memory_usage = test_mem_metrics.try_swap_memory_usage()
|
||||
self.assertEqual(20000, swap_memory_usage)
|
||||
|
||||
def test_get_metrics_when_files_not_present(self):
|
||||
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups"))
|
||||
test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups"))
|
||||
|
||||
with self.assertRaises(IOError) as e:
|
||||
test_mem_cg.get_memory_usage()
|
||||
test_mem_metrics.get_memory_usage()
|
||||
|
||||
self.assertEqual(e.exception.errno, errno.ENOENT)
|
||||
|
||||
with self.assertRaises(IOError) as e:
|
||||
test_mem_cg.get_max_memory_usage()
|
||||
test_mem_metrics.get_max_memory_usage()
|
||||
|
||||
self.assertEqual(e.exception.errno, errno.ENOENT)
|
||||
|
||||
with self.assertRaises(IOError) as e:
|
||||
test_mem_cg.try_swap_memory_usage()
|
||||
test_mem_metrics.try_swap_memory_usage()
|
||||
|
||||
self.assertEqual(e.exception.errno, errno.ENOENT)
|
||||
|
||||
def test_get_memory_usage_counters_not_found(self):
|
||||
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters"))
|
||||
test_mem_metrics = MemoryMetrics("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters"))
|
||||
|
||||
with self.assertRaises(CounterNotFound):
|
||||
test_mem_cg.get_memory_usage()
|
||||
test_mem_metrics.get_memory_usage()
|
||||
|
||||
swap_memory_usage = test_mem_cg.try_swap_memory_usage()
|
||||
swap_memory_usage = test_mem_metrics.try_swap_memory_usage()
|
||||
self.assertEqual(0, swap_memory_usage)
|
|
@ -21,7 +21,7 @@ import random
|
|||
import string
|
||||
|
||||
from azurelinuxagent.common import event, logger
|
||||
from azurelinuxagent.ga.cgroup import CpuCgroup, MemoryCgroup, MetricValue, _REPORT_EVERY_HOUR
|
||||
from azurelinuxagent.ga.controllermetrics import CpuMetrics, MemoryMetrics, MetricValue, _REPORT_EVERY_HOUR
|
||||
from azurelinuxagent.ga.cgroupstelemetry import CGroupsTelemetry
|
||||
from azurelinuxagent.common.event import EVENTS_DIRECTORY
|
||||
from azurelinuxagent.common.protocol.healthservice import HealthService
|
||||
|
@ -222,7 +222,7 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
|
|||
self.assertEqual(0, patch_add_metric.call_count)
|
||||
|
||||
@patch('azurelinuxagent.common.event.EventLogger.add_metric')
|
||||
@patch("azurelinuxagent.ga.cgroup.MemoryCgroup.get_memory_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.MemoryMetrics.get_memory_usage")
|
||||
@patch('azurelinuxagent.common.logger.Logger.periodic_warn')
|
||||
def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument
|
||||
patch_get_memory_usage,
|
||||
|
@ -231,14 +231,14 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
|
|||
ioerror.errno = 2
|
||||
patch_get_memory_usage.side_effect = ioerror
|
||||
|
||||
CGroupsTelemetry._tracked["/test/path"] = MemoryCgroup("cgroup_name", "/test/path")
|
||||
CGroupsTelemetry._tracked["/test/path"] = MemoryMetrics("_cgroup_name", "/test/path")
|
||||
|
||||
PollResourceUsage().run()
|
||||
self.assertEqual(0, patch_periodic_warn.call_count)
|
||||
self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent.
|
||||
|
||||
@patch('azurelinuxagent.common.event.EventLogger.add_metric')
|
||||
@patch("azurelinuxagent.ga.cgroup.CpuCgroup.get_cpu_usage")
|
||||
@patch("azurelinuxagent.ga.controllermetrics.CpuMetrics.get_cpu_usage")
|
||||
@patch('azurelinuxagent.common.logger.Logger.periodic_warn')
|
||||
def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2(self, patch_periodic_warn, # pylint: disable=unused-argument
|
||||
patch_cpu_usage, patch_add_metric,
|
||||
|
@ -247,7 +247,7 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
|
|||
ioerror.errno = 2
|
||||
patch_cpu_usage.side_effect = ioerror
|
||||
|
||||
CGroupsTelemetry._tracked["/test/path"] = CpuCgroup("cgroup_name", "/test/path")
|
||||
CGroupsTelemetry._tracked["/test/path"] = CpuMetrics("_cgroup_name", "/test/path")
|
||||
|
||||
PollResourceUsage().run()
|
||||
self.assertEqual(0, patch_periodic_warn.call_count)
|
||||
|
|
|
@ -122,7 +122,9 @@ _MOCKED_COMMANDS_HYBRID = [
|
|||
|
||||
_MOCKED_FILES_V1 = [
|
||||
("/proc/self/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_self_cgroup')),
|
||||
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup'))
|
||||
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v1', 'proc_pid_cgroup')),
|
||||
(r"/sys/fs/cgroup/cpu,cpuacct/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs')),
|
||||
(r"/sys/fs/cgroup/memory/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs'))
|
||||
]
|
||||
|
||||
_MOCKED_FILES_V2 = [
|
||||
|
@ -130,7 +132,8 @@ _MOCKED_FILES_V2 = [
|
|||
(r"/proc/[0-9]+/cgroup", os.path.join(data_dir, 'cgroups', 'v2', 'proc_pid_cgroup')),
|
||||
("/sys/fs/cgroup/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')),
|
||||
("/sys/fs/cgroup/azure.slice/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control')),
|
||||
("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty'))
|
||||
("/sys/fs/cgroup/azure.slice/walinuxagent.service/cgroup.subtree_control", os.path.join(data_dir, 'cgroups', 'v2', 'sys_fs_cgroup_cgroup.subtree_control_empty')),
|
||||
(r"/sys/fs/cgroup/system.slice/walinuxagent.service/cgroup.procs", os.path.join(data_dir, 'cgroups', 'cgroup.procs'))
|
||||
]
|
||||
|
||||
_MOCKED_FILES_HYBRID = [
|
||||
|
|
|
@ -24,8 +24,9 @@ from azurelinuxagent.common import conf
|
|||
from azurelinuxagent.common.exception import CGroupsException
|
||||
from azurelinuxagent.ga import logcollector, cgroupconfigurator
|
||||
from azurelinuxagent.common.utils import fileutil
|
||||
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, InvalidCgroupMountpointException
|
||||
from azurelinuxagent.ga.cgroupapi import InvalidCgroupMountpointException, CgroupV1
|
||||
from azurelinuxagent.ga.collect_logs import CollectLogsHandler
|
||||
from azurelinuxagent.ga.controllermetrics import AGENT_LOG_COLLECTOR
|
||||
from tests.lib.mock_cgroup_environment import mock_cgroup_v1_environment
|
||||
from tests.lib.tools import AgentTestCase, data_dir, Mock, patch
|
||||
|
||||
|
@ -247,16 +248,24 @@ class TestAgent(AgentTestCase):
|
|||
CollectLogsHandler.enable_monitor_cgroups_check()
|
||||
mock_log_collector.run = Mock()
|
||||
|
||||
# Mock cgroup paths so process is in the log collector slice
|
||||
def mock_cgroup_paths(*args, **kwargs):
|
||||
if args and args[0] == "self":
|
||||
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
|
||||
return (relative_path, relative_path)
|
||||
return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs)
|
||||
# Mock cgroup so process is in the log collector slice
|
||||
def mock_cgroup(*args, **kwargs): # pylint: disable=W0613
|
||||
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
|
||||
return CgroupV1(
|
||||
cgroup_name=AGENT_LOG_COLLECTOR,
|
||||
controller_mountpoints={
|
||||
'cpu,cpuacct':"/sys/fs/cgroup/cpu,cpuacct",
|
||||
'memory':"/sys/fs/cgroup/memory"
|
||||
},
|
||||
controller_paths={
|
||||
'cpu,cpuacct':"/sys/fs/cgroup/cpu,cpuacct/{0}".format(relative_path),
|
||||
'memory':"/sys/fs/cgroup/memory/{0}".format(relative_path)
|
||||
}
|
||||
)
|
||||
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths",
|
||||
side_effect=mock_cgroup_paths):
|
||||
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup",
|
||||
side_effect=mock_cgroup):
|
||||
agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf"))
|
||||
agent.collect_logs(is_full_mode=True)
|
||||
|
||||
|
@ -296,17 +305,26 @@ class TestAgent(AgentTestCase):
|
|||
CollectLogsHandler.enable_monitor_cgroups_check()
|
||||
mock_log_collector.run = Mock()
|
||||
|
||||
# Mock cgroup paths so process is in incorrect slice
|
||||
def mock_cgroup_paths(*args, **kwargs):
|
||||
if args and args[0] == "self":
|
||||
return ("NOT_THE_CORRECT_PATH", "NOT_THE_CORRECT_PATH")
|
||||
return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs)
|
||||
# Mock cgroup so process is in incorrect slice
|
||||
def mock_cgroup(*args, **kwargs): # pylint: disable=W0613
|
||||
relative_path = "NOT_THE_CORRECT_PATH"
|
||||
return CgroupV1(
|
||||
cgroup_name=AGENT_LOG_COLLECTOR,
|
||||
controller_mountpoints={
|
||||
'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct",
|
||||
'memory': "/sys/fs/cgroup/memory"
|
||||
},
|
||||
controller_paths={
|
||||
'cpu,cpuacct': "/sys/fs/cgroup/cpu,cpuacct/{0}".format(relative_path),
|
||||
'memory': "/sys/fs/cgroup/memory/{0}".format(relative_path)
|
||||
}
|
||||
)
|
||||
|
||||
def raise_on_sys_exit(*args):
|
||||
raise RuntimeError(args[0] if args else "Exiting")
|
||||
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths", side_effect=mock_cgroup_paths):
|
||||
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup", side_effect=mock_cgroup):
|
||||
agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf"))
|
||||
|
||||
with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit:
|
||||
|
@ -346,19 +364,25 @@ class TestAgent(AgentTestCase):
|
|||
CollectLogsHandler.enable_monitor_cgroups_check()
|
||||
mock_log_collector.run = Mock()
|
||||
|
||||
# Mock cgroup paths so process is in the log collector slice and cpu is not mounted
|
||||
def mock_cgroup_paths(*args, **kwargs):
|
||||
if args and args[0] == "self":
|
||||
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
|
||||
return (None, relative_path)
|
||||
return get_cgroup_api().get_process_cgroup_relative_paths(*args, **kwargs)
|
||||
# Mock cgroup so process is in the log collector slice and cpu is not mounted
|
||||
def mock_cgroup(*args, **kwargs): # pylint: disable=W0613
|
||||
relative_path = "{0}/{1}".format(cgroupconfigurator.LOGCOLLECTOR_SLICE, logcollector.CGROUPS_UNIT)
|
||||
return CgroupV1(
|
||||
cgroup_name=AGENT_LOG_COLLECTOR,
|
||||
controller_mountpoints={
|
||||
'memory': "/sys/fs/cgroup/memory"
|
||||
},
|
||||
controller_paths={
|
||||
'memory': "/sys/fs/cgroup/memory/{0}".format(relative_path)
|
||||
}
|
||||
)
|
||||
|
||||
def raise_on_sys_exit(*args):
|
||||
raise RuntimeError(args[0] if args else "Exiting")
|
||||
|
||||
with mock_cgroup_v1_environment(self.tmp_dir):
|
||||
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup_paths",
|
||||
side_effect=mock_cgroup_paths):
|
||||
with patch("azurelinuxagent.ga.cgroupapi.SystemdCgroupApiv1.get_process_cgroup",
|
||||
side_effect=mock_cgroup):
|
||||
agent = Agent(False, conf_file_path=os.path.join(data_dir, "test_waagent.conf"))
|
||||
|
||||
with patch("sys.exit", side_effect=raise_on_sys_exit) as mock_exit:
|
||||
|
|
|
@ -7,7 +7,7 @@ from assertpy import assert_that, fail
|
|||
from azurelinuxagent.common.osutil import systemd
|
||||
from azurelinuxagent.common.utils import shellutil
|
||||
from azurelinuxagent.common.version import DISTRO_NAME, DISTRO_VERSION
|
||||
from azurelinuxagent.ga.cgroupapi import get_cgroup_api
|
||||
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, SystemdCgroupApiv1
|
||||
from tests_e2e.tests.lib.agent_log import AgentLog
|
||||
from tests_e2e.tests.lib.logging import log
|
||||
from tests_e2e.tests.lib.retry import retry_if_false
|
||||
|
@ -164,9 +164,14 @@ def check_log_message(message, after_timestamp=datetime.datetime.min):
|
|||
return False
|
||||
|
||||
|
||||
def get_unit_cgroup_paths(unit_name):
|
||||
def get_unit_cgroup_proc_path(unit_name, controller):
|
||||
"""
|
||||
Returns the cgroup paths for the given unit
|
||||
Returns the cgroup.procs path for the given unit and controller.
|
||||
"""
|
||||
cgroups_api = get_cgroup_api()
|
||||
return cgroups_api.get_unit_cgroup_paths(unit_name)
|
||||
unit_cgroup = cgroups_api.get_unit_cgroup(unit_name=unit_name, cgroup_name="test cgroup")
|
||||
if isinstance(cgroups_api, SystemdCgroupApiv1):
|
||||
return unit_cgroup.get_controller_procs_path(controller=controller)
|
||||
else:
|
||||
return unit_cgroup.get_procs_path()
|
||||
|
||||
|
|
|
@ -18,14 +18,13 @@
|
|||
|
||||
# This script forces the process check by putting unknown process in the agent's cgroup
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import datetime
|
||||
|
||||
from assertpy import fail
|
||||
|
||||
from azurelinuxagent.common.utils import shellutil
|
||||
from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_paths, AGENT_SERVICE_NAME
|
||||
from tests_e2e.tests.lib.cgroup_helpers import check_agent_quota_disabled, check_log_message, get_unit_cgroup_proc_path, AGENT_SERVICE_NAME
|
||||
from tests_e2e.tests.lib.logging import log
|
||||
from tests_e2e.tests.lib.retry import retry_if_false
|
||||
|
||||
|
@ -62,8 +61,8 @@ def disable_agent_cgroups_with_unknown_process(pid):
|
|||
Note: System may kick the added process out of the cgroups, keeps adding until agent detect that process
|
||||
"""
|
||||
|
||||
def unknown_process_found(cpu_cgroup):
|
||||
cgroup_procs_path = os.path.join(cpu_cgroup, "cgroup.procs")
|
||||
def unknown_process_found():
|
||||
cgroup_procs_path = get_unit_cgroup_proc_path(AGENT_SERVICE_NAME, 'cpu,cpuacct')
|
||||
log.info("Adding dummy process %s to cgroup.procs file %s", pid, cgroup_procs_path)
|
||||
try:
|
||||
with open(cgroup_procs_path, 'a') as f:
|
||||
|
@ -81,9 +80,7 @@ def disable_agent_cgroups_with_unknown_process(pid):
|
|||
pid)), attempts=3)
|
||||
return found and retry_if_false(check_agent_quota_disabled, attempts=3)
|
||||
|
||||
cpu_cgroup, _ = get_unit_cgroup_paths(AGENT_SERVICE_NAME)
|
||||
|
||||
found: bool = retry_if_false(lambda: unknown_process_found(cpu_cgroup), attempts=3)
|
||||
found: bool = retry_if_false(unknown_process_found, attempts=3)
|
||||
if not found:
|
||||
fail("The agent did not detect unknown process: {0}".format(pid))
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче