зеркало из https://github.com/Azure/WALinuxAgent.git
Monitor RAM usage for VMAgent (#2597)
* Monitor RAM usage for VMAgent * address comments * fix Unit test * address new comments * refactor metricvalue * fix tests * fix monitor UTS * fix pylint warning * update periodic report * addressed few more comments * fix ut errors * pylint warnings * pylint error * fix tests * address private members * log counter found error * fix test errors
This commit is contained in:
Родитель
48158a8b56
Коммит
8320fee03c
|
@ -13,21 +13,60 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
# Requires Python 2.6+ and Openssl 1.0+
|
||||
from collections import namedtuple
|
||||
|
||||
import errno
|
||||
import os
|
||||
import re
|
||||
from datetime import timedelta
|
||||
|
||||
from azurelinuxagent.common import logger
|
||||
from azurelinuxagent.common import logger, conf
|
||||
from azurelinuxagent.common.exception import CGroupsException
|
||||
from azurelinuxagent.common.future import ustr
|
||||
from azurelinuxagent.common.osutil import get_osutil
|
||||
from azurelinuxagent.common.utils import fileutil
|
||||
|
||||
_REPORT_EVERY_HOUR = timedelta(hours=1)
|
||||
_DEFAULT_REPORT_PERIOD = timedelta(seconds=conf.get_cgroup_check_period())
|
||||
|
||||
AGENT_NAME_TELEMETRY = "walinuxagent.service" # Name used for telemetry; it needs to be consistent even if the name of the service changes
|
||||
|
||||
MetricValue = namedtuple('Metric', ['category', 'counter', 'instance', 'value'])
|
||||
|
||||
class CounterNotFound(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class MetricValue(object):
|
||||
|
||||
"""
|
||||
Class for defining all the required metric fields to send telemetry.
|
||||
"""
|
||||
|
||||
def __init__(self, category, counter, instance, value, report_period=_DEFAULT_REPORT_PERIOD):
|
||||
self._category = category
|
||||
self._counter = counter
|
||||
self._instance = instance
|
||||
self._value = value
|
||||
self._report_period = report_period
|
||||
|
||||
@property
|
||||
def category(self):
|
||||
return self._category
|
||||
|
||||
@property
|
||||
def counter(self):
|
||||
return self._counter
|
||||
|
||||
@property
|
||||
def instance(self):
|
||||
return self._instance
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self._value
|
||||
|
||||
@property
|
||||
def report_period(self):
|
||||
return self._report_period
|
||||
|
||||
|
||||
class MetricsCategory(object):
|
||||
|
@ -40,6 +79,7 @@ class MetricsCounter(object):
|
|||
TOTAL_MEM_USAGE = "Total Memory Usage"
|
||||
MAX_MEM_USAGE = "Max Memory Usage"
|
||||
THROTTLED_TIME = "Throttled Time"
|
||||
SWAP_MEM_USAGE = "Swap Memory Usage"
|
||||
|
||||
|
||||
re_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n')
|
||||
|
@ -166,7 +206,8 @@ class CpuCgroup(CGroup):
|
|||
#
|
||||
match = re_user_system_times.match(cpuacct_stat)
|
||||
if not match:
|
||||
raise CGroupsException("The contents of {0} are invalid: {1}".format(self._get_cgroup_file('cpuacct.stat'), cpuacct_stat))
|
||||
raise CGroupsException(
|
||||
"The contents of {0} are invalid: {1}".format(self._get_cgroup_file('cpuacct.stat'), cpuacct_stat))
|
||||
cpu_ticks = int(match.groups()[0]) + int(match.groups()[1])
|
||||
|
||||
return cpu_ticks
|
||||
|
@ -239,7 +280,8 @@ class CpuCgroup(CGroup):
|
|||
return float(self.get_throttled_time() / 1E9)
|
||||
|
||||
if not self._cpu_usage_initialized():
|
||||
raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_throttled_time()")
|
||||
raise CGroupsException(
|
||||
"initialize_cpu_usage() must be invoked before the first call to get_throttled_time()")
|
||||
|
||||
self._previous_throttled_time = self._current_throttled_time
|
||||
self._current_throttled_time = self.get_throttled_time()
|
||||
|
@ -250,53 +292,99 @@ class CpuCgroup(CGroup):
|
|||
tracked = []
|
||||
cpu_usage = self.get_cpu_usage()
|
||||
if cpu_usage >= float(0):
|
||||
tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, cpu_usage))
|
||||
tracked.append(
|
||||
MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, cpu_usage))
|
||||
|
||||
if 'track_throttled_time' in kwargs and kwargs['track_throttled_time']:
|
||||
throttled_time = self.get_cpu_throttled_time()
|
||||
if cpu_usage >= float(0) and throttled_time >= float(0):
|
||||
tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, throttled_time))
|
||||
tracked.append(
|
||||
MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, throttled_time))
|
||||
|
||||
return tracked
|
||||
|
||||
|
||||
class MemoryCgroup(CGroup):
|
||||
def __init__(self, name, cgroup_path):
|
||||
super(MemoryCgroup, self).__init__(name, cgroup_path)
|
||||
|
||||
self._counter_not_found_error_count = 0
|
||||
|
||||
def _get_memory_stat_counter(self, counter_name):
|
||||
try:
|
||||
with open(os.path.join(self.path, 'memory.stat')) as memory_stat:
|
||||
# cat /sys/fs/cgroup/memory/azure.slice/memory.stat
|
||||
# cache 67178496
|
||||
# rss 42340352
|
||||
# rss_huge 6291456
|
||||
# swap 0
|
||||
for line in memory_stat:
|
||||
re_memory_counter = r'{0}\s+(\d+)'.format(counter_name)
|
||||
match = re.match(re_memory_counter, line)
|
||||
if match is not None:
|
||||
return int(match.groups()[0])
|
||||
except (IOError, OSError) as e:
|
||||
if e.errno == errno.ENOENT:
|
||||
raise
|
||||
raise CGroupsException("Failed to read memory.stat: {0}".format(ustr(e)))
|
||||
except Exception as e:
|
||||
raise CGroupsException("Failed to read memory.stat: {0}".format(ustr(e)))
|
||||
|
||||
raise CounterNotFound("Cannot find counter: {0}".format(counter_name))
|
||||
|
||||
def get_memory_usage(self):
|
||||
"""
|
||||
Collect memory.usage_in_bytes from the cgroup.
|
||||
Collect RSS+CACHE from memory.stat cgroup.
|
||||
|
||||
:return: Memory usage in bytes
|
||||
:rtype: int
|
||||
"""
|
||||
usage = None
|
||||
try:
|
||||
usage = self._get_parameters('memory.usage_in_bytes', first_line_only=True)
|
||||
except Exception as e:
|
||||
if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101
|
||||
raise
|
||||
raise CGroupsException("Exception while attempting to read {0}".format("memory.usage_in_bytes"), e)
|
||||
|
||||
return int(usage)
|
||||
cache = self._get_memory_stat_counter("cache")
|
||||
rss = self._get_memory_stat_counter("rss")
|
||||
return cache + rss
|
||||
|
||||
def try_swap_memory_usage(self):
|
||||
"""
|
||||
Collect SWAP from memory.stat cgroup.
|
||||
|
||||
:return: Memory usage in bytes
|
||||
:rtype: int
|
||||
Note: stat file is the only place to get the SWAP since other swap related file memory.memsw.usage_in_bytes is for total Memory+SWAP.
|
||||
"""
|
||||
try:
|
||||
return self._get_memory_stat_counter("swap")
|
||||
except CounterNotFound as e:
|
||||
if self._counter_not_found_error_count < 1:
|
||||
logger.periodic_warn(logger.EVERY_HALF_HOUR,
|
||||
'Could not find swap counter from "memory.stat" file in the cgroup: {0}.'
|
||||
' Internal error: {1}'.format(self.path, ustr(e)))
|
||||
self._counter_not_found_error_count += 1
|
||||
return 0
|
||||
|
||||
def get_max_memory_usage(self):
|
||||
"""
|
||||
Collect memory.usage_in_bytes from the cgroup.
|
||||
Collect memory.max_usage_in_bytes from the cgroup.
|
||||
|
||||
:return: Memory usage in bytes
|
||||
:rtype: int
|
||||
"""
|
||||
usage = None
|
||||
usage = 0
|
||||
try:
|
||||
usage = self._get_parameters('memory.max_usage_in_bytes', first_line_only=True)
|
||||
usage = int(self._get_parameters('memory.max_usage_in_bytes', first_line_only=True))
|
||||
except Exception as e:
|
||||
if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101
|
||||
raise
|
||||
raise CGroupsException("Exception while attempting to read {0}".format("memory.usage_in_bytes"), e)
|
||||
raise CGroupsException("Exception while attempting to read {0}".format("memory.max_usage_in_bytes"), e)
|
||||
|
||||
return int(usage)
|
||||
return usage
|
||||
|
||||
def get_tracked_metrics(self, **_):
|
||||
return [
|
||||
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.TOTAL_MEM_USAGE, self.name, self.get_memory_usage()),
|
||||
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.MAX_MEM_USAGE, self.name, self.get_max_memory_usage()),
|
||||
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.TOTAL_MEM_USAGE, self.name,
|
||||
self.get_memory_usage()),
|
||||
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.MAX_MEM_USAGE, self.name,
|
||||
self.get_max_memory_usage(), _REPORT_EVERY_HOUR),
|
||||
MetricValue(MetricsCategory.MEMORY_CATEGORY, MetricsCounter.SWAP_MEM_USAGE, self.name,
|
||||
self.try_swap_memory_usage(), _REPORT_EVERY_HOUR)
|
||||
]
|
||||
|
|
|
@ -23,7 +23,7 @@ import threading
|
|||
|
||||
from azurelinuxagent.common import conf
|
||||
from azurelinuxagent.common import logger
|
||||
from azurelinuxagent.common.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter
|
||||
from azurelinuxagent.common.cgroup import CpuCgroup, AGENT_NAME_TELEMETRY, MetricsCounter, MemoryCgroup
|
||||
from azurelinuxagent.common.cgroupapi import CGroupsApi, SystemdCgroupsApi, SystemdRunError, EXTENSION_SLICE_PREFIX
|
||||
from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry
|
||||
from azurelinuxagent.common.exception import ExtensionErrorCodes, CGroupsException
|
||||
|
@ -98,6 +98,13 @@ _DROP_IN_FILE_CPU_QUOTA_CONTENTS_FORMAT = """
|
|||
[Service]
|
||||
CPUQuota={0}
|
||||
"""
|
||||
_DROP_IN_FILE_MEMORY_ACCOUNTING = "13-MemoryAccounting.conf"
|
||||
_DROP_IN_FILE_MEMORY_ACCOUNTING_CONTENTS = """
|
||||
# This drop-in unit file was created by the Azure VM Agent.
|
||||
# Do not edit.
|
||||
[Service]
|
||||
MemoryAccounting=yes
|
||||
"""
|
||||
|
||||
|
||||
class DisableCgroups(object):
|
||||
|
@ -176,11 +183,18 @@ class CGroupConfigurator(object):
|
|||
cpu_controller_root,
|
||||
memory_controller_root)
|
||||
|
||||
if self._agent_cpu_cgroup_path is not None or self._agent_memory_cgroup_path is not None:
|
||||
self.enable()
|
||||
|
||||
if self._agent_cpu_cgroup_path is not None:
|
||||
_log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path)
|
||||
self.enable()
|
||||
self.__set_cpu_quota(conf.get_agent_cpu_quota())
|
||||
CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))
|
||||
|
||||
if self._agent_memory_cgroup_path is not None:
|
||||
_log_cgroup_info("Agent Memory cgroup: {0}", self._agent_memory_cgroup_path)
|
||||
CGroupsTelemetry.track_cgroup(MemoryCgroup(AGENT_NAME_TELEMETRY, self._agent_memory_cgroup_path))
|
||||
|
||||
_log_cgroup_info('Agent cgroups enabled: {0}', self._agent_cgroups_enabled)
|
||||
|
||||
except Exception as exception:
|
||||
|
@ -322,6 +336,7 @@ class CGroupConfigurator(object):
|
|||
agent_drop_in_path = systemd.get_agent_drop_in_path()
|
||||
agent_drop_in_file_slice = os.path.join(agent_drop_in_path, _AGENT_DROP_IN_FILE_SLICE)
|
||||
agent_drop_in_file_cpu_accounting = os.path.join(agent_drop_in_path, _DROP_IN_FILE_CPU_ACCOUNTING)
|
||||
agent_drop_in_file_memory_accounting = os.path.join(agent_drop_in_path, _DROP_IN_FILE_MEMORY_ACCOUNTING)
|
||||
|
||||
files_to_create = []
|
||||
|
||||
|
@ -349,6 +364,12 @@ class CGroupConfigurator(object):
|
|||
if not os.path.exists(agent_drop_in_file_cpu_accounting):
|
||||
files_to_create.append((agent_drop_in_file_cpu_accounting, _DROP_IN_FILE_CPU_ACCOUNTING_CONTENTS))
|
||||
|
||||
if fileutil.findre_in_file(agent_unit_file, r"MemoryAccounting=") is not None:
|
||||
CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_memory_accounting)
|
||||
else:
|
||||
if not os.path.exists(agent_drop_in_file_memory_accounting):
|
||||
files_to_create.append((agent_drop_in_file_memory_accounting, _DROP_IN_FILE_MEMORY_ACCOUNTING_CONTENTS))
|
||||
|
||||
if len(files_to_create) > 0:
|
||||
# create the unit files, but if 1 fails remove all and return
|
||||
try:
|
||||
|
@ -482,7 +503,6 @@ class CGroupConfigurator(object):
|
|||
"Attempted to enable cgroups, but they are not supported on the current platform")
|
||||
self._agent_cgroups_enabled = True
|
||||
self._extensions_cgroups_enabled = True
|
||||
self.__set_cpu_quota(conf.get_agent_cpu_quota())
|
||||
|
||||
def disable(self, reason, disable_cgroups):
|
||||
if disable_cgroups == DisableCgroups.ALL: # disable all
|
||||
|
|
|
@ -47,16 +47,21 @@ class PollResourceUsage(PeriodicOperation):
|
|||
Periodic operation to poll the tracked cgroups for resource usage data.
|
||||
|
||||
It also checks whether there are processes in the agent's cgroup that should not be there.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
super(PollResourceUsage, self).__init__(conf.get_cgroup_check_period())
|
||||
self.__log_metrics = conf.get_cgroup_log_metrics()
|
||||
self.__periodic_metrics = {}
|
||||
|
||||
def _operation(self):
|
||||
tracked_metrics = CGroupsTelemetry.poll_all_tracked()
|
||||
|
||||
for metric in tracked_metrics:
|
||||
report_metric(metric.category, metric.counter, metric.instance, metric.value, log_event=self.__log_metrics)
|
||||
key = metric.category + metric.counter + metric.instance
|
||||
if key not in self.__periodic_metrics or (self.__periodic_metrics[key] + metric.report_period) <= datetime.datetime.now():
|
||||
report_metric(metric.category, metric.counter, metric.instance, metric.value, log_event=self.__log_metrics)
|
||||
self.__periodic_metrics[key] = datetime.datetime.now()
|
||||
|
||||
CGroupConfigurator.get_instance().check_cgroups(tracked_metrics)
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ Restart=always
|
|||
RestartSec=5
|
||||
Slice=azure.slice
|
||||
CPUAccounting=yes
|
||||
MemoryAccounting=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -13,6 +13,7 @@ Restart=always
|
|||
RestartSec=5
|
||||
Slice=azure.slice
|
||||
CPUAccounting=yes
|
||||
MemoryAccounting=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -19,6 +19,7 @@ ExecStart=/usr/bin/python3 -u /usr/sbin/waagent -daemon
|
|||
Restart=always
|
||||
Slice=azure.slice
|
||||
CPUAccounting=yes
|
||||
MemoryAccounting=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -97,6 +97,7 @@ class UnitFilePaths:
|
|||
slice = "/lib/systemd/system/walinuxagent.service.d/10-Slice.conf"
|
||||
cpu_accounting = "/lib/systemd/system/walinuxagent.service.d/11-CPUAccounting.conf"
|
||||
cpu_quota = "/lib/systemd/system/walinuxagent.service.d/12-CPUQuota.conf"
|
||||
memory_accounting = "/lib/systemd/system/walinuxagent.service.d/13-MemoryAccounting.conf"
|
||||
extension_service_cpu_accounting = '/lib/systemd/system/extension.service.d/11-CPUAccounting.conf'
|
||||
extension_service_cpu_quota = '/lib/systemd/system/extension.service.d/12-CPUQuota.conf'
|
||||
extension_service_memory_accounting = '/lib/systemd/system/extension.service.d/13-MemoryAccounting.conf'
|
||||
|
|
|
@ -77,6 +77,8 @@ class CGroupConfiguratorSystemdTestCase(AgentTestCase):
|
|||
self.assertTrue(configurator.enabled(), "Cgroups should be enabled")
|
||||
self.assertTrue(any(cg for cg in tracked.values() if cg.name == AGENT_NAME_TELEMETRY and 'cpu' in cg.path),
|
||||
"The Agent's CPU is not being tracked. Tracked: {0}".format(tracked))
|
||||
self.assertTrue(any(cg for cg in tracked.values() if cg.name == AGENT_NAME_TELEMETRY and 'memory' in cg.path),
|
||||
"The Agent's Memory is not being tracked. Tracked: {0}".format(tracked))
|
||||
|
||||
def test_initialize_should_start_tracking_other_controllers_when_one_is_not_present(self):
|
||||
command_mocks = [MockCommand(r"^mount -t cgroup$",
|
||||
|
@ -147,6 +149,7 @@ cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blki
|
|||
extensions_slice_unit_file = configurator.mocks.get_mapped_path(UnitFilePaths.vmextensions)
|
||||
agent_drop_in_file_slice = configurator.mocks.get_mapped_path(UnitFilePaths.slice)
|
||||
agent_drop_in_file_cpu_accounting = configurator.mocks.get_mapped_path(UnitFilePaths.cpu_accounting)
|
||||
agent_drop_in_file_memory_accounting = configurator.mocks.get_mapped_path(UnitFilePaths.memory_accounting)
|
||||
|
||||
# The mock creates the slice unit files; delete them
|
||||
os.remove(azure_slice_unit_file)
|
||||
|
@ -158,6 +161,7 @@ cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blki
|
|||
self.assertFalse(os.path.exists(extensions_slice_unit_file), "{0} should not have been created".format(extensions_slice_unit_file))
|
||||
self.assertFalse(os.path.exists(agent_drop_in_file_slice), "{0} should not have been created".format(agent_drop_in_file_slice))
|
||||
self.assertFalse(os.path.exists(agent_drop_in_file_cpu_accounting), "{0} should not have been created".format(agent_drop_in_file_cpu_accounting))
|
||||
self.assertFalse(os.path.exists(agent_drop_in_file_memory_accounting), "{0} should not have been created".format(agent_drop_in_file_memory_accounting))
|
||||
|
||||
def test_initialize_should_create_unit_files_when_the_agent_service_file_is_not_updated(self):
|
||||
with self._get_cgroup_configurator(initialize=False) as configurator:
|
||||
|
@ -166,6 +170,7 @@ cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blki
|
|||
extensions_slice_unit_file = configurator.mocks.get_mapped_path(UnitFilePaths.vmextensions)
|
||||
agent_drop_in_file_slice = configurator.mocks.get_mapped_path(UnitFilePaths.slice)
|
||||
agent_drop_in_file_cpu_accounting = configurator.mocks.get_mapped_path(UnitFilePaths.cpu_accounting)
|
||||
agent_drop_in_file_memory_accounting = configurator.mocks.get_mapped_path(UnitFilePaths.memory_accounting)
|
||||
|
||||
# The mock creates the service and slice unit files; replace the former and delete the latter
|
||||
configurator.mocks.add_data_file(os.path.join(data_dir, 'init', "walinuxagent.service.previous"), UnitFilePaths.walinuxagent)
|
||||
|
@ -180,6 +185,7 @@ cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blki
|
|||
self.assertTrue(os.path.exists(extensions_slice_unit_file), "{0} was not created".format(extensions_slice_unit_file))
|
||||
self.assertTrue(os.path.exists(agent_drop_in_file_slice), "{0} was not created".format(agent_drop_in_file_slice))
|
||||
self.assertTrue(os.path.exists(agent_drop_in_file_cpu_accounting), "{0} was not created".format(agent_drop_in_file_cpu_accounting))
|
||||
self.assertTrue(os.path.exists(agent_drop_in_file_memory_accounting), "{0} was not created".format(agent_drop_in_file_memory_accounting))
|
||||
|
||||
def test_setup_extension_slice_should_create_unit_files(self):
|
||||
with self._get_cgroup_configurator() as configurator:
|
||||
|
@ -229,12 +235,12 @@ cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blki
|
|||
self.assertIn("Attempted to enable cgroups, but they are not supported on the current platform", str(context_manager.exception))
|
||||
|
||||
def test_enable_should_set_agent_cpu_quota_and_track_throttled_time(self):
|
||||
with self._get_cgroup_configurator(enable=False) as configurator:
|
||||
with self._get_cgroup_configurator(initialize=False) as configurator:
|
||||
agent_drop_in_file_cpu_quota = configurator.mocks.get_mapped_path(UnitFilePaths.cpu_quota)
|
||||
if os.path.exists(agent_drop_in_file_cpu_quota):
|
||||
raise Exception("{0} should not have been created during test setup".format(agent_drop_in_file_cpu_quota))
|
||||
|
||||
configurator.enable()
|
||||
configurator.initialize()
|
||||
|
||||
expected_quota = "CPUQuota={0}%".format(conf.get_agent_cpu_quota())
|
||||
self.assertTrue(os.path.exists(agent_drop_in_file_cpu_quota), "{0} was not created".format(agent_drop_in_file_cpu_quota))
|
||||
|
@ -244,13 +250,13 @@ cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blki
|
|||
self.assertTrue(CGroupsTelemetry.get_track_throttled_time(), "Throttle time should be tracked")
|
||||
|
||||
def test_enable_should_not_track_throttled_time_when_setting_the_cpu_quota_fails(self):
|
||||
with self._get_cgroup_configurator(enable=False) as configurator:
|
||||
with self._get_cgroup_configurator(initialize=False) as configurator:
|
||||
if CGroupsTelemetry.get_track_throttled_time():
|
||||
raise Exception("Test setup should not start tracking Throttle Time")
|
||||
|
||||
configurator.mocks.add_file(UnitFilePaths.cpu_quota, Exception("A TEST EXCEPTION"))
|
||||
|
||||
configurator.enable()
|
||||
configurator.initialize()
|
||||
|
||||
self.assertFalse(CGroupsTelemetry.get_track_throttled_time(), "Throttle time should not be tracked")
|
||||
|
||||
|
@ -268,7 +274,9 @@ cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blki
|
|||
self.assertTrue(
|
||||
fileutil.findre_in_file(agent_drop_in_file_cpu_quota, "^CPUQuota=$"),
|
||||
"CPUQuota was not set correctly. Expected an empty value. Got:\n{0}".format(fileutil.read_file(agent_drop_in_file_cpu_quota)))
|
||||
self.assertEqual(len(CGroupsTelemetry._tracked), 0, "No cgroups should be tracked after disable. Tracking: {0}".format(CGroupsTelemetry._tracked))
|
||||
self.assertEqual(len(CGroupsTelemetry._tracked), 1, "Memory cgroups should be tracked after disable. Tracking: {0}".format(CGroupsTelemetry._tracked))
|
||||
self.assertFalse(any(cg for cg in CGroupsTelemetry._tracked.values() if cg.name == 'walinuxagent.service' and 'cpu' in cg.path),
|
||||
"The Agent's cpu should not be tracked. Tracked: {0}".format(CGroupsTelemetry._tracked))
|
||||
|
||||
def test_disable_should_reset_cpu_quota_for_all_cgroups(self):
|
||||
service_list = [
|
||||
|
|
|
@ -22,7 +22,7 @@ import os
|
|||
import random
|
||||
import shutil
|
||||
|
||||
from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup, MetricsCounter
|
||||
from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup, MetricsCounter, CounterNotFound
|
||||
from azurelinuxagent.common.exception import CGroupsException
|
||||
from azurelinuxagent.common.osutil import get_osutil
|
||||
from azurelinuxagent.common.utils import fileutil
|
||||
|
@ -206,11 +206,14 @@ class TestMemoryCgroup(AgentTestCase):
|
|||
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "memory_mount"))
|
||||
|
||||
memory_usage = test_mem_cg.get_memory_usage()
|
||||
self.assertEqual(100000, memory_usage)
|
||||
self.assertEqual(150000, memory_usage)
|
||||
|
||||
max_memory_usage = test_mem_cg.get_max_memory_usage()
|
||||
self.assertEqual(1000000, max_memory_usage)
|
||||
|
||||
swap_memory_usage = test_mem_cg.try_swap_memory_usage()
|
||||
self.assertEqual(20000, swap_memory_usage)
|
||||
|
||||
def test_get_metrics_when_files_not_present(self):
|
||||
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups"))
|
||||
|
||||
|
@ -223,3 +226,17 @@ class TestMemoryCgroup(AgentTestCase):
|
|||
test_mem_cg.get_max_memory_usage()
|
||||
|
||||
self.assertEqual(e.exception.errno, errno.ENOENT)
|
||||
|
||||
with self.assertRaises(IOError) as e:
|
||||
test_mem_cg.try_swap_memory_usage()
|
||||
|
||||
self.assertEqual(e.exception.errno, errno.ENOENT)
|
||||
|
||||
def test_get_memory_usage_counters_not_found(self):
|
||||
test_mem_cg = MemoryCgroup("test_extension", os.path.join(data_dir, "cgroups", "missing_memory_counters"))
|
||||
|
||||
with self.assertRaises(CounterNotFound):
|
||||
test_mem_cg.get_memory_usage()
|
||||
|
||||
swap_memory_usage = test_mem_cg.try_swap_memory_usage()
|
||||
self.assertEqual(0, swap_memory_usage)
|
||||
|
|
|
@ -116,18 +116,20 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
|
||||
self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))
|
||||
|
||||
def _assert_polled_metrics_equal(self, metrics, cpu_metric_value, memory_metric_value, max_memory_metric_value):
|
||||
def _assert_polled_metrics_equal(self, metrics, cpu_metric_value, memory_metric_value, max_memory_metric_value, swap_memory_value):
|
||||
for metric in metrics:
|
||||
self.assertIn(metric.category, ["CPU", "Memory"])
|
||||
if metric.category == "CPU":
|
||||
self.assertEqual(metric.counter, "% Processor Time")
|
||||
self.assertEqual(metric.value, cpu_metric_value)
|
||||
if metric.category == "Memory":
|
||||
self.assertIn(metric.counter, ["Total Memory Usage", "Max Memory Usage", "Memory Used by Process"])
|
||||
self.assertIn(metric.counter, ["Total Memory Usage", "Max Memory Usage", "Swap Memory Usage"])
|
||||
if metric.counter == "Total Memory Usage":
|
||||
self.assertEqual(metric.value, memory_metric_value)
|
||||
elif metric.counter == "Max Memory Usage":
|
||||
self.assertEqual(metric.value, max_memory_metric_value)
|
||||
elif metric.counter == "Swap Memory Usage":
|
||||
self.assertEqual(metric.value, swap_memory_value)
|
||||
|
||||
def test_telemetry_polling_with_active_cgroups(self, *args): # pylint: disable=unused-argument
|
||||
num_extensions = 3
|
||||
|
@ -136,27 +138,30 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
|
||||
with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
|
||||
patch_is_active.return_value = True
|
||||
with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
|
||||
patch_is_active.return_value = True
|
||||
|
||||
current_cpu = 30
|
||||
current_memory = 209715200
|
||||
current_max_memory = 471859200
|
||||
current_cpu = 30
|
||||
current_memory = 209715200
|
||||
current_max_memory = 471859200
|
||||
current_swap_memory = 20971520
|
||||
|
||||
# 1 CPU metric + 1 Current Memory + 1 Max memory
|
||||
num_of_metrics_per_extn_expected = 3
|
||||
patch_get_cpu_usage.return_value = current_cpu
|
||||
patch_get_memory_usage.return_value = current_memory # example 200 MB
|
||||
patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB
|
||||
num_polls = 10
|
||||
# 1 CPU metric + 1 Current Memory + 1 Max memory + 1 swap memory
|
||||
num_of_metrics_per_extn_expected = 4
|
||||
patch_get_cpu_usage.return_value = current_cpu
|
||||
patch_get_memory_usage.return_value = current_memory # example 200 MB
|
||||
patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB
|
||||
patch_try_swap_memory_usage.return_value = current_swap_memory # example 20MB
|
||||
num_polls = 12
|
||||
|
||||
for data_count in range(1, num_polls + 1): # pylint: disable=unused-variable
|
||||
metrics = CGroupsTelemetry.poll_all_tracked()
|
||||
|
||||
self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected)
|
||||
self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory)
|
||||
for data_count in range(1, num_polls + 1): # pylint: disable=unused-variable
|
||||
metrics = CGroupsTelemetry.poll_all_tracked()
|
||||
|
||||
self.assertEqual(len(metrics), num_extensions * num_of_metrics_per_extn_expected)
|
||||
self._assert_polled_metrics_equal(metrics, current_cpu, current_memory, current_max_memory, current_swap_memory)
|
||||
|
||||
@patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror)
|
||||
|
@ -177,7 +182,6 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
|
||||
self.assertEqual(len(metrics), 0)
|
||||
|
||||
|
||||
@patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage")
|
||||
@patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage")
|
||||
@patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage")
|
||||
|
@ -216,7 +220,6 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
|
||||
self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))
|
||||
|
||||
|
||||
# mocking get_proc_stat to make it run on Mac and other systems. This test does not need to read the values of the
|
||||
# /proc/stat file on the filesystem.
|
||||
@patch("azurelinuxagent.common.logger.periodic_warn")
|
||||
|
@ -238,7 +241,7 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
@patch("azurelinuxagent.common.logger.periodic_warn")
|
||||
def test_telemetry_polling_to_generate_transient_logs_ioerror_permission_denied(self, patch_periodic_warn):
|
||||
num_extensions = 1
|
||||
num_controllers = 2
|
||||
num_controllers = 1
|
||||
is_active_check_per_controller = 2
|
||||
self._track_new_extension_cgroups(num_extensions)
|
||||
|
||||
|
@ -251,7 +254,7 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
with patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=io_error_3):
|
||||
poll_count = 1
|
||||
expected_count_per_call = num_controllers + is_active_check_per_controller
|
||||
# each collect per controller would generate a log statement, and each cgroup would invoke a
|
||||
# get_max_memory_usage memory controller would generate a log statement, and each cgroup would invoke a
|
||||
# is active check raising an exception
|
||||
|
||||
for data_count in range(poll_count, 10): # pylint: disable=unused-variable
|
||||
|
@ -266,16 +269,18 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
# Trying to invoke IndexError during the getParameter call
|
||||
with patch("azurelinuxagent.common.utils.fileutil.read_file", return_value=''):
|
||||
with patch("azurelinuxagent.common.logger.periodic_warn") as patch_periodic_warn:
|
||||
expected_call_count = 2 # 1 periodic warning for the cpu cgroups, and 1 for memory
|
||||
expected_call_count = 1 # 1 periodic warning for memory
|
||||
for data_count in range(1, 10): # pylint: disable=unused-variable
|
||||
CGroupsTelemetry.poll_all_tracked()
|
||||
self.assertEqual(expected_call_count, patch_periodic_warn.call_count)
|
||||
|
||||
@patch("azurelinuxagent.common.cgroup.MemoryCgroup.try_swap_memory_usage")
|
||||
@patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage")
|
||||
@patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage")
|
||||
@patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage")
|
||||
@patch("azurelinuxagent.common.cgroup.CGroup.is_active")
|
||||
def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, *args): # pylint: disable=unused-argument
|
||||
def test_telemetry_calculations(self, patch_is_active, patch_get_cpu_usage, patch_get_memory_usage, patch_get_memory_max_usage, patch_try_memory_swap_usage,
|
||||
*args): # pylint: disable=unused-argument
|
||||
num_polls = 10
|
||||
num_extensions = 1
|
||||
|
||||
|
@ -284,6 +289,7 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
# only verifying calculations and not validity of the values.
|
||||
memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]
|
||||
max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]
|
||||
swap_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]
|
||||
|
||||
self._track_new_extension_cgroups(num_extensions)
|
||||
self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked))
|
||||
|
@ -291,14 +297,15 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
for i in range(num_polls):
|
||||
patch_is_active.return_value = True
|
||||
patch_get_cpu_usage.return_value = cpu_percent_values[i]
|
||||
patch_get_memory_usage.return_value = memory_usage_values[i] # example 200 MB
|
||||
patch_get_memory_max_usage.return_value = max_memory_usage_values[i] # example 450 MB
|
||||
patch_get_memory_usage.return_value = memory_usage_values[i]
|
||||
patch_get_memory_max_usage.return_value = max_memory_usage_values[i]
|
||||
patch_try_memory_swap_usage.return_value = swap_usage_values[i]
|
||||
|
||||
metrics = CGroupsTelemetry.poll_all_tracked()
|
||||
|
||||
# 1 CPU metric + 1 Current Memory + 1 Max memory
|
||||
self.assertEqual(len(metrics), 3 * num_extensions)
|
||||
self._assert_polled_metrics_equal(metrics, cpu_percent_values[i], memory_usage_values[i], max_memory_usage_values[i])
|
||||
# 1 CPU metric + 1 Current Memory + 1 Max memory + 1 swap memory
|
||||
self.assertEqual(len(metrics), 4 * num_extensions)
|
||||
self._assert_polled_metrics_equal(metrics, cpu_percent_values[i], memory_usage_values[i], max_memory_usage_values[i], swap_usage_values[i])
|
||||
|
||||
def test_cgroup_tracking(self, *args): # pylint: disable=unused-argument
|
||||
num_extensions = 5
|
||||
|
@ -332,8 +339,7 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
metrics = CGroupsTelemetry.poll_all_tracked()
|
||||
|
||||
self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated
|
||||
self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0)
|
||||
|
||||
self._assert_polled_metrics_equal(metrics, current_cpu, 0, 0, 0)
|
||||
|
||||
@patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror)
|
||||
def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): # pylint: disable=unused-argument
|
||||
|
@ -343,20 +349,23 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
|
||||
with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
|
||||
patch_is_active.return_value = True
|
||||
with patch("azurelinuxagent.common.cgroup.MemoryCgroup.try_swap_memory_usage") as patch_try_swap_memory_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
|
||||
patch_is_active.return_value = True
|
||||
|
||||
current_memory = 209715200
|
||||
current_max_memory = 471859200
|
||||
current_memory = 209715200
|
||||
current_max_memory = 471859200
|
||||
current_swap_memory = 20971520
|
||||
|
||||
patch_get_memory_usage.return_value = current_memory # example 200 MB
|
||||
patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB
|
||||
num_polls = 10
|
||||
for data_count in range(1, num_polls + 1): # pylint: disable=unused-variable
|
||||
metrics = CGroupsTelemetry.poll_all_tracked()
|
||||
# Memory is only populated, CPU is not. Thus 2 metrics per cgroup.
|
||||
self.assertEqual(len(metrics), num_extensions * 2)
|
||||
self._assert_polled_metrics_equal(metrics, 0, current_memory, current_max_memory)
|
||||
patch_get_memory_usage.return_value = current_memory # example 200 MB
|
||||
patch_get_memory_max_usage.return_value = current_max_memory # example 450 MB
|
||||
patch_try_swap_memory_usage.return_value = current_swap_memory # example 20MB
|
||||
num_polls = 10
|
||||
for data_count in range(1, num_polls + 1): # pylint: disable=unused-variable
|
||||
metrics = CGroupsTelemetry.poll_all_tracked()
|
||||
# Memory is only populated, CPU is not. Thus 3 metrics for memory.
|
||||
self.assertEqual(len(metrics), num_extensions * 3)
|
||||
self._assert_polled_metrics_equal(metrics, 0, current_memory, current_max_memory, current_swap_memory)
|
||||
|
||||
@patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror)
|
||||
@patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror)
|
||||
|
@ -408,4 +417,3 @@ class TestCGroupsTelemetry(AgentTestCase):
|
|||
|
||||
for metric in metrics:
|
||||
self.assertGreaterEqual(metric.value, 0, "telemetry should not report negative value")
|
||||
|
||||
|
|
|
@ -31,9 +31,10 @@ from mock import MagicMock
|
|||
from azurelinuxagent.common.utils import textutil, fileutil
|
||||
from azurelinuxagent.common import event, logger
|
||||
from azurelinuxagent.common.AgentGlobals import AgentGlobals
|
||||
from azurelinuxagent.common.event import add_event, add_periodic, add_log_event, elapsed_milliseconds, report_metric, \
|
||||
from azurelinuxagent.common.event import add_event, add_periodic, add_log_event, elapsed_milliseconds, \
|
||||
WALAEventOperation, parse_xml_event, parse_json_event, AGENT_EVENT_FILE_EXTENSION, EVENTS_DIRECTORY, \
|
||||
TELEMETRY_EVENT_EVENT_ID, TELEMETRY_EVENT_PROVIDER_ID, TELEMETRY_LOG_EVENT_ID, TELEMETRY_LOG_PROVIDER_ID
|
||||
TELEMETRY_EVENT_EVENT_ID, TELEMETRY_EVENT_PROVIDER_ID, TELEMETRY_LOG_EVENT_ID, TELEMETRY_LOG_PROVIDER_ID, \
|
||||
report_metric
|
||||
from azurelinuxagent.common.future import ustr
|
||||
from azurelinuxagent.common.osutil import get_osutil
|
||||
from azurelinuxagent.common.telemetryevent import CommonTelemetryEventSchema, GuestAgentGenericLogsSchema, \
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
cache 50000
|
||||
rss 100000
|
||||
rss_huge 4194304
|
||||
shmem 8192
|
||||
mapped_file 540672
|
||||
dirty 0
|
||||
writeback 0
|
||||
swap 20000
|
||||
pgpgin 42584
|
||||
pgpgout 24188
|
||||
pgfault 71983
|
||||
pgmajfault 402
|
||||
inactive_anon 32854016
|
||||
active_anon 12288
|
||||
inactive_file 47472640
|
||||
active_file 1290240
|
||||
unevictable 0
|
||||
hierarchical_memory_limit 9223372036854771712
|
||||
hierarchical_memsw_limit 9223372036854771712
|
||||
total_cache 48771072
|
||||
total_rss 32845824
|
||||
total_rss_huge 4194304
|
||||
total_shmem 8192
|
||||
total_mapped_file 540672
|
||||
total_dirty 0
|
||||
total_writeback 0
|
||||
total_swap 0
|
||||
total_pgpgin 42584
|
||||
total_pgpgout 24188
|
||||
total_pgfault 71983
|
||||
total_pgmajfault 402
|
||||
total_inactive_anon 32854016
|
||||
total_active_anon 12288
|
||||
total_inactive_file 47472640
|
||||
total_active_file 1290240
|
||||
total_unevictable 0
|
|
@ -1 +0,0 @@
|
|||
100000
|
|
@ -0,0 +1,34 @@
|
|||
cache 50000
|
||||
rss_huge 4194304
|
||||
shmem 8192
|
||||
mapped_file 540672
|
||||
dirty 0
|
||||
writeback 0
|
||||
pgpgin 42584
|
||||
pgpgout 24188
|
||||
pgfault 71983
|
||||
pgmajfault 402
|
||||
inactive_anon 32854016
|
||||
active_anon 12288
|
||||
inactive_file 47472640
|
||||
active_file 1290240
|
||||
unevictable 0
|
||||
hierarchical_memory_limit 9223372036854771712
|
||||
hierarchical_memsw_limit 9223372036854771712
|
||||
total_cache 48771072
|
||||
total_rss 32845824
|
||||
total_rss_huge 4194304
|
||||
total_shmem 8192
|
||||
total_mapped_file 540672
|
||||
total_dirty 0
|
||||
total_writeback 0
|
||||
total_swap 0
|
||||
total_pgpgin 42584
|
||||
total_pgpgout 24188
|
||||
total_pgfault 71983
|
||||
total_pgmajfault 402
|
||||
total_inactive_anon 32854016
|
||||
total_active_anon 12288
|
||||
total_inactive_file 47472640
|
||||
total_active_file 1290240
|
||||
total_unevictable 0
|
|
@ -16,6 +16,7 @@ ExecStart=/usr/bin/python3 -u /usr/sbin/waagent -daemon
|
|||
Restart=always
|
||||
Slice=azure.slice
|
||||
CPUAccounting=yes
|
||||
MemoryAccounting=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -21,7 +21,7 @@ import random
|
|||
import string
|
||||
|
||||
from azurelinuxagent.common import event, logger
|
||||
from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup, MetricValue
|
||||
from azurelinuxagent.common.cgroup import CpuCgroup, MemoryCgroup, MetricValue, _REPORT_EVERY_HOUR
|
||||
from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry
|
||||
from azurelinuxagent.common.event import EVENTS_DIRECTORY
|
||||
from azurelinuxagent.common.protocol.healthservice import HealthService
|
||||
|
@ -197,22 +197,23 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
|
|||
self.get_protocol.stop()
|
||||
|
||||
@patch('azurelinuxagent.common.event.EventLogger.add_metric')
|
||||
@patch('azurelinuxagent.common.event.EventLogger.add_event')
|
||||
@patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry.poll_all_tracked")
|
||||
def test_send_extension_metrics_telemetry(self, patch_poll_all_tracked, patch_add_event, # pylint: disable=unused-argument
|
||||
def test_send_extension_metrics_telemetry(self, patch_poll_all_tracked, # pylint: disable=unused-argument
|
||||
patch_add_metric, *args):
|
||||
patch_poll_all_tracked.return_value = [MetricValue("Process", "% Processor Time", 1, 1),
|
||||
MetricValue("Memory", "Total Memory Usage", 1, 1),
|
||||
MetricValue("Memory", "Max Memory Usage", 1, 1)]
|
||||
patch_poll_all_tracked.return_value = [MetricValue("Process", "% Processor Time", "service", 1),
|
||||
MetricValue("Memory", "Total Memory Usage", "service", 1),
|
||||
MetricValue("Memory", "Max Memory Usage", "service", 1, _REPORT_EVERY_HOUR),
|
||||
MetricValue("Memory", "Swap Memory Usage", "service", 1, _REPORT_EVERY_HOUR)
|
||||
]
|
||||
|
||||
PollResourceUsage().run()
|
||||
self.assertEqual(1, patch_poll_all_tracked.call_count)
|
||||
self.assertEqual(3, patch_add_metric.call_count) # Three metrics being sent.
|
||||
self.assertEqual(4, patch_add_metric.call_count) # Four metrics being sent.
|
||||
|
||||
@patch('azurelinuxagent.common.event.EventLogger.add_metric')
|
||||
@patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry.poll_all_tracked")
|
||||
def test_send_extension_metrics_telemetry_for_empty_cgroup(self, patch_poll_all_tracked, # pylint: disable=unused-argument
|
||||
patch_add_metric,*args):
|
||||
patch_add_metric, *args):
|
||||
patch_poll_all_tracked.return_value = []
|
||||
|
||||
PollResourceUsage().run()
|
||||
|
@ -245,41 +246,9 @@ class TestExtensionMetricsDataTelemetry(AgentTestCase):
|
|||
ioerror.errno = 2
|
||||
patch_cpu_usage.side_effect = ioerror
|
||||
|
||||
CGroupsTelemetry._tracked["/test/path"]= CpuCgroup("cgroup_name", "/test/path")
|
||||
CGroupsTelemetry._tracked["/test/path"] = CpuCgroup("cgroup_name", "/test/path")
|
||||
|
||||
PollResourceUsage().run()
|
||||
self.assertEqual(0, patch_periodic_warn.call_count)
|
||||
self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent.
|
||||
|
||||
def test_generate_extension_metrics_telemetry_dictionary(self, *args): # pylint: disable=unused-argument
|
||||
num_polls = 10
|
||||
num_extensions = 1
|
||||
|
||||
cpu_percent_values = [random.randint(0, 100) for _ in range(num_polls)]
|
||||
|
||||
# only verifying calculations and not validity of the values.
|
||||
memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]
|
||||
max_memory_usage_values = [random.randint(0, 8 * 1024 ** 3) for _ in range(num_polls)]
|
||||
|
||||
# no need to initialize the CPU usage, since we mock get_cpu_usage() below
|
||||
with patch("azurelinuxagent.common.cgroup.CpuCgroup.initialize_cpu_usage"):
|
||||
for i in range(num_extensions):
|
||||
dummy_cpu_cgroup = CpuCgroup("dummy_extension_{0}".format(i), "dummy_cpu_path_{0}".format(i))
|
||||
CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)
|
||||
|
||||
dummy_memory_cgroup = MemoryCgroup("dummy_extension_{0}".format(i), "dummy_memory_path_{0}".format(i))
|
||||
CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)
|
||||
|
||||
self.assertEqual(2 * num_extensions, len(CGroupsTelemetry._tracked))
|
||||
|
||||
with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") as patch_get_memory_max_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") as patch_get_cpu_usage:
|
||||
with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
|
||||
for i in range(num_polls):
|
||||
patch_is_active.return_value = True
|
||||
patch_get_cpu_usage.return_value = cpu_percent_values[i]
|
||||
patch_get_memory_usage.return_value = memory_usage_values[i] # example 200 MB
|
||||
patch_get_memory_max_usage.return_value = max_memory_usage_values[i] # example 450 MB
|
||||
CGroupsTelemetry.poll_all_tracked()
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче