Use self-update for initial update (#3184)

* use self-update for initial update

* addressing comments

* cleanup files

* state files

* remove comment
This commit is contained in:
Nageswara Nandigam 2024-08-23 14:00:26 -07:00 коммит произвёл GitHub
Родитель 1a0f1b5b17
Коммит 4412778a70
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
11 изменённых файлов: 341 добавлений и 61 удалений

Просмотреть файл

@ -33,20 +33,32 @@ def get_agent_update_handler(protocol):
return AgentUpdateHandler(protocol) return AgentUpdateHandler(protocol)
RSM_UPDATE_STATE_FILE = "waagent_rsm_update"
INITIAL_UPDATE_STATE_FILE = "waagent_initial_update"
class AgentUpdateHandler(object): class AgentUpdateHandler(object):
""" """
This class handles two type of agent updates. Handler initializes the updater to SelfUpdateVersionUpdater and switch to appropriate updater based on below conditions: This class handles two type of agent updates. Handler initializes the updater to SelfUpdateVersionUpdater and switch to appropriate updater based on below conditions:
RSM update: This is the update requested by RSM. The contract between CRP and agent is we get following properties in the goal state: RSM update: This update requested by RSM and contract between CRP and agent is we get following properties in the goal state:
version: it will have what version to update version: it will have what version to update
isVersionFromRSM: True if the version is from RSM deployment. isVersionFromRSM: True if the version is from RSM deployment.
isVMEnabledForRSMUpgrades: True if the VM is enabled for RSM upgrades. isVMEnabledForRSMUpgrades: True if the VM is enabled for RSM upgrades.
if vm enabled for RSM upgrades, we use RSM update path. But if requested update is not by rsm deployment if vm enabled for RSM upgrades, we use RSM update path. But if requested update is not by rsm deployment( if isVersionFromRSM:False)
we ignore the update. we ignore the update.
Self update: We fallback to this if above is condition not met. This update to the largest version available in the manifest Self update: We fallback to this if above condition not met. This update to the largest version available in the manifest.
Also, we use self-update for initial update due to [1][2]
Note: Self-update don't support downgrade. Note: Self-update don't support downgrade.
Handler keeps the rsm state of last update is with RSM or not on every new goal state. Once handler decides which updater to use, then [1] New vms that are enrolled into RSM, they get isVMEnabledForRSMUpgrades as True and isVersionFromRSM as False in first goal state. As per RSM update flow mentioned above,
does following steps: we don't apply the update if isVersionFromRSM is false. Consequently, new vms remain on pre-installed agent until RSM drives a new version update. In the meantime, agent may process the extensions with the baked version.
This can potentially lead to issues due to incompatibility.
[2] If current version is N, and we are deploying N+1. We find an issue on N+1 and remove N+1 from PIR. If CRP created the initial goal state for a new vm
before the delete, the version in the goal state would be N+1; If the agent starts processing the goal state after the deleting, it won't find N+1 and update will fail and
the vm will use baked version.
Handler updates the state if current update mode is changed from last update mode(RSM or Self-Update) on new goal state. Once handler decides which updater to use, then
updater does following steps:
1. Retrieve the agent version from the goal state. 1. Retrieve the agent version from the goal state.
2. Check if we allowed to update for that version. 2. Check if we allowed to update for that version.
3. Log the update message. 3. Log the update message.
@ -63,8 +75,8 @@ class AgentUpdateHandler(object):
self._daemon_version = self._get_daemon_version_for_update() self._daemon_version = self._get_daemon_version_for_update()
self._last_attempted_update_error_msg = "" self._last_attempted_update_error_msg = ""
# restore the state of rsm update. Default to self-update if last update is not with RSM. # Restore the state of rsm update. Default to self-update if last update is not with RSM or if agent doing initial update
if not self._get_is_last_update_with_rsm(): if not self._get_is_last_update_with_rsm() or self._is_initial_update():
self._updater = SelfUpdateVersionUpdater(self._gs_id) self._updater = SelfUpdateVersionUpdater(self._gs_id)
else: else:
self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version)
@ -78,14 +90,39 @@ class AgentUpdateHandler(object):
# use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53. # use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53.
return FlexibleVersion("2.2.53") return FlexibleVersion("2.2.53")
@staticmethod
def _get_initial_update_state_file():
"""
This file keeps if initial update is attempted or not
"""
return os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE)
def _save_initial_update_state_file(self):
"""
Save the file if agent attempted initial update
"""
try:
with open(self._get_initial_update_state_file(), "w"):
pass
except Exception as e:
msg = "Error creating the initial update state file ({0}): {1}".format(self._get_initial_update_state_file(), ustr(e))
logger.warn(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
def _is_initial_update(self):
"""
Returns True if state file doesn't exit as presence of file consider as initial update already attempted
"""
return not os.path.exists(self._get_initial_update_state_file())
@staticmethod @staticmethod
def _get_rsm_update_state_file(): def _get_rsm_update_state_file():
""" """
This file keeps if last attempted update is rsm or not. This file keeps if last attempted update is rsm or not.
""" """
return os.path.join(conf.get_lib_dir(), "rsm_update.json") return os.path.join(conf.get_lib_dir(), RSM_UPDATE_STATE_FILE)
def _save_rsm_update_state(self): def _save_rsm_update_state_file(self):
""" """
Save the rsm state empty file when we switch to RSM Save the rsm state empty file when we switch to RSM
""" """
@ -93,9 +130,11 @@ class AgentUpdateHandler(object):
with open(self._get_rsm_update_state_file(), "w"): with open(self._get_rsm_update_state_file(), "w"):
pass pass
except Exception as e: except Exception as e:
logger.warn("Error creating the RSM state ({0}): {1}", self._get_rsm_update_state_file(), ustr(e)) msg = "Error creating the RSM state file ({0}): {1}".format(self._get_rsm_update_state_file(), ustr(e))
logger.warn(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
def _remove_rsm_update_state(self): def _remove_rsm_update_state_file(self):
""" """
Remove the rsm state file when we switch to self-update Remove the rsm state file when we switch to self-update
""" """
@ -103,7 +142,9 @@ class AgentUpdateHandler(object):
if os.path.exists(self._get_rsm_update_state_file()): if os.path.exists(self._get_rsm_update_state_file()):
os.remove(self._get_rsm_update_state_file()) os.remove(self._get_rsm_update_state_file())
except Exception as e: except Exception as e:
logger.warn("Error removing the RSM state ({0}): {1}", self._get_rsm_update_state_file(), ustr(e)) msg = "Error removing the RSM state file ({0}): {1}".format(self._get_rsm_update_state_file(), ustr(e))
logger.warn(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
def _get_is_last_update_with_rsm(self): def _get_is_last_update_with_rsm(self):
""" """
@ -152,25 +193,29 @@ class AgentUpdateHandler(object):
agent_family = self._get_agent_family_manifest(goal_state) agent_family = self._get_agent_family_manifest(goal_state)
# Updater will return True or False if we need to switch the updater # Always agent uses self-update for initial update regardless vm enrolled into RSM or not
# If self-updater receives RSM update enabled, it will switch to RSM updater # So ignoring the check for updater switch for the initial goal state/update
# If RSM updater receives RSM update disabled, it will switch to self-update if not self._is_initial_update():
# No change in updater if GS not updated
is_rsm_update_enabled = self._updater.is_rsm_update_enabled(agent_family, ext_gs_updated)
if not is_rsm_update_enabled and isinstance(self._updater, RSMVersionUpdater): # Updater will return True or False if we need to switch the updater
msg = "VM not enabled for RSM updates, switching to self-update mode" # If self-updater receives RSM update enabled, it will switch to RSM updater
logger.info(msg) # If RSM updater receives RSM update disabled, it will switch to self-update
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) # No change in updater if GS not updated
self._updater = SelfUpdateVersionUpdater(self._gs_id) is_rsm_update_enabled = self._updater.is_rsm_update_enabled(agent_family, ext_gs_updated)
self._remove_rsm_update_state()
if is_rsm_update_enabled and isinstance(self._updater, SelfUpdateVersionUpdater): if not is_rsm_update_enabled and isinstance(self._updater, RSMVersionUpdater):
msg = "VM enabled for RSM updates, switching to RSM update mode" msg = "VM not enabled for RSM updates, switching to self-update mode"
logger.info(msg) logger.info(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False) add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version) self._updater = SelfUpdateVersionUpdater(self._gs_id)
self._save_rsm_update_state() self._remove_rsm_update_state_file()
if is_rsm_update_enabled and isinstance(self._updater, SelfUpdateVersionUpdater):
msg = "VM enabled for RSM updates, switching to RSM update mode"
logger.info(msg)
add_event(op=WALAEventOperation.AgentUpgrade, message=msg, log_event=False)
self._updater = RSMVersionUpdater(self._gs_id, self._daemon_version)
self._save_rsm_update_state_file()
# If updater is changed in previous step, we allow update as it consider as first attempt. If not, it checks below condition # If updater is changed in previous step, we allow update as it consider as first attempt. If not, it checks below condition
# RSM checks new goal state; self-update checks manifest download interval # RSM checks new goal state; self-update checks manifest download interval
@ -218,6 +263,11 @@ class AgentUpdateHandler(object):
add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=error_msg, log_event=False) add_event(op=WALAEventOperation.AgentUpgrade, is_success=False, message=error_msg, log_event=False)
self._last_attempted_update_error_msg = error_msg self._last_attempted_update_error_msg = error_msg
# save initial update state when agent is doing first update
finally:
if self._is_initial_update():
self._save_initial_update_state_file()
def get_vmagent_update_status(self): def get_vmagent_update_status(self):
""" """
This function gets the VMAgent update status as per the last attempted update. This function gets the VMAgent update status as per the last attempted update.

Просмотреть файл

@ -162,7 +162,8 @@ class DeprovisionHandler(object):
'published_hostname', 'published_hostname',
'fast_track.json', 'fast_track.json',
'initial_goal_state', 'initial_goal_state',
'rsm_update.json' 'waagent_rsm_update',
'waagent_initial_update'
] ]
known_files_glob = [ known_files_glob = [
'Extensions.*.xml', 'Extensions.*.xml',

Просмотреть файл

@ -10,7 +10,8 @@ from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses
from azurelinuxagent.common.protocol.util import ProtocolUtil from azurelinuxagent.common.protocol.util import ProtocolUtil
from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME
from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler from azurelinuxagent.ga.agent_update_handler import get_agent_update_handler, INITIAL_UPDATE_STATE_FILE, \
RSM_UPDATE_STATE_FILE
from azurelinuxagent.ga.guestagent import GuestAgent from azurelinuxagent.ga.guestagent import GuestAgent
from tests.ga.test_update import UpdateTestCase from tests.ga.test_update import UpdateTestCase
from tests.lib.http_request_predicates import HttpRequestPredicates from tests.lib.http_request_predicates import HttpRequestPredicates
@ -28,7 +29,7 @@ class TestAgentUpdate(UpdateTestCase):
clear_singleton_instances(ProtocolUtil) clear_singleton_instances(ProtocolUtil)
@contextlib.contextmanager @contextlib.contextmanager
def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, protocol_get_error=False, mock_get_header=None, mock_put_header=None): def _get_agent_update_handler(self, test_data=None, autoupdate_frequency=0.001, autoupdate_enabled=True, initial_update_attempted=True, protocol_get_error=False, mock_get_header=None, mock_put_header=None):
# Default to DATA_FILE of test_data parameter raises the pylint warning # Default to DATA_FILE of test_data parameter raises the pylint warning
# W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value) # W0102: Dangerous default value DATA_FILE (builtins.dict) as argument (dangerous-default-value)
test_data = DATA_FILE if test_data is None else test_data test_data = DATA_FILE if test_data is None else test_data
@ -57,6 +58,9 @@ class TestAgentUpdate(UpdateTestCase):
protocol.set_http_handlers(http_get_handler=http_get_handler, http_put_handler=http_put_handler) protocol.set_http_handlers(http_get_handler=http_get_handler, http_put_handler=http_put_handler)
if initial_update_attempted:
open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close()
with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled): with patch("azurelinuxagent.common.conf.get_autoupdate_enabled", return_value=autoupdate_enabled):
with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency): with patch("azurelinuxagent.common.conf.get_autoupdate_frequency", return_value=autoupdate_frequency):
with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"): with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"):
@ -452,7 +456,7 @@ class TestAgentUpdate(UpdateTestCase):
with self.assertRaises(AgentUpgradeExitException): with self.assertRaises(AgentUpgradeExitException):
agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True) agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True)
state_file = os.path.join(conf.get_lib_dir(), "rsm_update.json") state_file = os.path.join(conf.get_lib_dir(), RSM_UPDATE_STATE_FILE)
self.assertTrue(os.path.exists(state_file), "The rsm state file was not saved (can't find {0})".format(state_file)) self.assertTrue(os.path.exists(state_file), "The rsm state file was not saved (can't find {0})".format(state_file))
# check if state gets updated if most recent goal state has different values # check if state gets updated if most recent goal state has different values
@ -535,3 +539,36 @@ class TestAgentUpdate(UpdateTestCase):
self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
"Downloaded agent package: WALinuxAgent-9.9.9.10 is missing agent handler manifest file" in kwarg['message'] and kwarg[ "Downloaded agent package: WALinuxAgent-9.9.9.10 is missing agent handler manifest file" in kwarg['message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]), "Agent update should fail") 'op'] == WALAEventOperation.AgentUpgrade]), "Agent update should fail")
def test_it_should_use_self_update_for_first_update_always(self):
self.prepare_agents(count=1)
# mock the goal state as vm enrolled into RSM
data_file = DATA_FILE.copy()
data_file['ext_conf'] = "wire/ext_conf_rsm_version.xml"
with self._get_agent_update_handler(test_data=data_file, initial_update_attempted=False) as (agent_update_handler, mock_telemetry):
with self.assertRaises(AgentUpgradeExitException) as context:
agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True)
# Verifying agent used self-update for initial update
self._assert_update_discovered_from_agent_manifest(mock_telemetry, version="99999.0.0.0")
self._assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION), "99999.0.0.0"])
self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason))
state_file = os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE)
self.assertTrue(os.path.exists(state_file),
"The first update state file was not saved (can't find {0})".format(state_file))
def test_it_should_honor_any_update_type_after_first_update(self):
self.prepare_agents(count=1)
data_file = DATA_FILE.copy()
data_file['ext_conf'] = "wire/ext_conf_rsm_version.xml"
# mocking initial update attempt as true
with self._get_agent_update_handler(test_data=data_file, initial_update_attempted=True) as (agent_update_handler, mock_telemetry):
with self.assertRaises(AgentUpgradeExitException) as context:
agent_update_handler.run(agent_update_handler._protocol.get_goal_state(), True)
# Verifying agent honored RSM update
self._assert_agent_rsm_version_in_goal_state(mock_telemetry, version="9.9.9.10")
self._assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)])
self._assert_agent_exit_process_telemetry_emitted(ustr(context.exception.reason))

Просмотреть файл

@ -20,6 +20,8 @@ import zipfile
from datetime import datetime, timedelta from datetime import datetime, timedelta
from threading import current_thread from threading import current_thread
from azurelinuxagent.ga.agent_update_handler import INITIAL_UPDATE_STATE_FILE
from azurelinuxagent.ga.guestagent import GuestAgent, GuestAgentError, \ from azurelinuxagent.ga.guestagent import GuestAgent, GuestAgentError, \
AGENT_ERROR_FILE AGENT_ERROR_FILE
from tests.common.osutil.test_default import TestOSUtil from tests.common.osutil.test_default import TestOSUtil
@ -1282,6 +1284,9 @@ class TestUpdate(UpdateTestCase):
protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler) protocol.set_http_handlers(http_get_handler=get_handler, http_put_handler=put_handler)
# mocking first agent update attempted
open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close()
# Case 1: rsm version missing in GS when vm opt-in for rsm upgrades; report missing rsm version error # Case 1: rsm version missing in GS when vm opt-in for rsm upgrades; report missing rsm version error
protocol.mock_wire_data.set_extension_config("wire/ext_conf_version_missing_in_agent_family.xml") protocol.mock_wire_data.set_extension_config("wire/ext_conf_version_missing_in_agent_family.xml")
update_goal_state_and_run_handler() update_goal_state_and_run_handler()
@ -1481,7 +1486,10 @@ class TestAgentUpgrade(UpdateTestCase):
@contextlib.contextmanager @contextlib.contextmanager
def __get_update_handler(self, iterations=1, test_data=None, def __get_update_handler(self, iterations=1, test_data=None,
reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=1.0, normal_frequency=2.0): reload_conf=None, autoupdate_frequency=0.001, hotfix_frequency=1.0, normal_frequency=2.0, initial_update_attempted=True):
if initial_update_attempted:
open(os.path.join(conf.get_lib_dir(), INITIAL_UPDATE_STATE_FILE), "a").close()
test_data = DATA_FILE if test_data is None else test_data test_data = DATA_FILE if test_data is None else test_data
# In _get_update_handler() contextmanager, yield is used inside an if-else block and that's creating a false positive pylint warning # In _get_update_handler() contextmanager, yield is used inside an if-else block and that's creating a false positive pylint warning

Просмотреть файл

@ -83,6 +83,28 @@ class VmImageInfo(object):
def __str__(self): def __str__(self):
return self.urn return self.urn
class CustomImage(object):
# Images from a gallery are given as "<image_gallery>/<image_definition>/<image_version>".
_IMAGE_FROM_GALLERY = re.compile(r"(?P<gallery>[^/]+)/(?P<image>[^/]+)/(?P<version>[^/]+)")
@staticmethod
def _is_image_from_gallery(image: str) -> bool:
"""
Verifies if image is from shared gallery
"""
return CustomImage._IMAGE_FROM_GALLERY.match(image) is not None
@staticmethod
def _get_name_of_image_from_gallery(image: str) -> str:
"""
Get image name from shared gallery
"""
match = CustomImage._IMAGE_FROM_GALLERY.match(image)
if match is None:
raise Exception(f"Invalid image from gallery: {image}")
return match.group('image')
class AgentTestLoader(object): class AgentTestLoader(object):
""" """
@ -134,6 +156,7 @@ class AgentTestLoader(object):
""" """
Performs some basic validations on the data loaded from the YAML description files Performs some basic validations on the data loaded from the YAML description files
""" """
def _parse_image(image: str) -> str: def _parse_image(image: str) -> str:
""" """
Parses a reference to an image or image set and returns the name of the image or image set Parses a reference to an image or image set and returns the name of the image or image set
@ -147,8 +170,11 @@ class AgentTestLoader(object):
# Validate that the images the suite must run on are in images.yml # Validate that the images the suite must run on are in images.yml
for image in suite.images: for image in suite.images:
image = _parse_image(image) image = _parse_image(image)
# skip validation if suite image from gallery image
if CustomImage._is_image_from_gallery(image):
continue
if image not in self.images: if image not in self.images:
raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {image} in images.yml") raise Exception(f"Invalid image reference in test suite {suite.name}: Can't find {image} in images.yml or image from a shared gallery")
# If the suite specifies a cloud and it's location<cloud:location>, validate that location string is start with <cloud:> and then validate that the images it uses are available in that location # If the suite specifies a cloud and it's location<cloud:location>, validate that location string is start with <cloud:> and then validate that the images it uses are available in that location
for suite_location in suite.locations: for suite_location in suite.locations:
@ -158,6 +184,9 @@ class AgentTestLoader(object):
continue continue
for suite_image in suite.images: for suite_image in suite.images:
suite_image = _parse_image(suite_image) suite_image = _parse_image(suite_image)
# skip validation if suite image from gallery image
if CustomImage._is_image_from_gallery(suite_image):
continue
for image in self.images[suite_image]: for image in self.images[suite_image]:
# If the image has a location restriction, validate that it is available on the location the suite must run on # If the image has a location restriction, validate that it is available on the location the suite must run on
if image.locations: if image.locations:
@ -208,8 +237,8 @@ class AgentTestLoader(object):
rest of the tests in the suite will not be executed). By default, a failure on a test does not stop execution of rest of the tests in the suite will not be executed). By default, a failure on a test does not stop execution of
the test suite. the test suite.
* images - A string, or a list of strings, specifying the images on which the test suite must be executed. Each value * images - A string, or a list of strings, specifying the images on which the test suite must be executed. Each value
can be the name of a single image (e.g."ubuntu_2004"), or the name of an image set (e.g. "endorsed"). The can be the name of a single image (e.g."ubuntu_2004"), or the name of an image set (e.g. "endorsed") or shared gallery image(e.g. "gallery/wait-cloud-init/1.0.2").
names for images and image sets are defined in WALinuxAgent/tests_e2e/tests_suites/images.yml. The names for images and image sets are defined in WALinuxAgent/tests_e2e/tests_suites/images.yml.
* locations - [Optional; string or list of strings] If given, the test suite must be executed on that cloud location(e.g. "AzureCloud:eastus2euap"). * locations - [Optional; string or list of strings] If given, the test suite must be executed on that cloud location(e.g. "AzureCloud:eastus2euap").
If not specified, or set to an empty string, the test suite will be executed in the default location. This is useful If not specified, or set to an empty string, the test suite will be executed in the default location. This is useful
for test suites that exercise a feature that is enabled only in certain regions. for test suites that exercise a feature that is enabled only in certain regions.

Просмотреть файл

@ -22,7 +22,7 @@ from lisa.combinator import Combinator # pylint: disable=E0401
from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401 from lisa.messages import TestStatus, TestResultMessage # pylint: disable=E0401
from lisa.util import field_metadata # pylint: disable=E0401 from lisa.util import field_metadata # pylint: disable=E0401
from tests_e2e.orchestrator.lib.agent_test_loader import AgentTestLoader, VmImageInfo, TestSuiteInfo from tests_e2e.orchestrator.lib.agent_test_loader import AgentTestLoader, VmImageInfo, TestSuiteInfo, CustomImage
from tests_e2e.tests.lib.logging import set_thread_name from tests_e2e.tests.lib.logging import set_thread_name
from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient
from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient from tests_e2e.tests.lib.virtual_machine_scale_set_client import VirtualMachineScaleSetClient
@ -171,10 +171,10 @@ class AgentTestSuitesCombinator(Combinator):
vhd = image.urn vhd = image.urn
image_name = urllib.parse.urlparse(vhd).path.split('/')[-1] # take the last fragment of the URL's path (e.g. "RHEL_8_Standard-8.3.202006170423.vhd") image_name = urllib.parse.urlparse(vhd).path.split('/')[-1] # take the last fragment of the URL's path (e.g. "RHEL_8_Standard-8.3.202006170423.vhd")
shared_gallery = "" shared_gallery = ""
elif self._is_image_from_gallery(image.urn): elif CustomImage._is_image_from_gallery(image.urn):
marketplace_image = "" marketplace_image = ""
vhd = "" vhd = ""
image_name = self._get_name_of_image_from_gallery(image.urn) image_name = CustomImage._get_name_of_image_from_gallery(image.urn)
shared_gallery = image.urn shared_gallery = image.urn
else: else:
marketplace_image = image.urn marketplace_image = image.urn
@ -472,7 +472,15 @@ class AgentTestSuitesCombinator(Combinator):
for image in suite.images: for image in suite.images:
match = AgentTestLoader.RANDOM_IMAGES_RE.match(image) match = AgentTestLoader.RANDOM_IMAGES_RE.match(image)
if match is None: if match is None:
image_list = loader.images[image] # Added this condition for galley image as they don't have definition in images.yml
if CustomImage._is_image_from_gallery(image):
i = VmImageInfo()
i.urn = image
i.locations = []
i.vm_sizes = []
image_list = [i]
else:
image_list = loader.images[image]
else: else:
count = match.group('count') count = match.group('count')
if count is None: if count is None:
@ -566,20 +574,6 @@ class AgentTestSuitesCombinator(Combinator):
parsed = urllib.parse.urlparse(vhd) parsed = urllib.parse.urlparse(vhd)
return parsed.scheme == 'https' and parsed.netloc != "" and parsed.path != "" return parsed.scheme == 'https' and parsed.netloc != "" and parsed.path != ""
# Images from a gallery are given as "<image_gallery>/<image_definition>/<image_version>".
_IMAGE_FROM_GALLERY = re.compile(r"(?P<gallery>[^/]+)/(?P<image>[^/]+)/(?P<version>[^/]+)")
@staticmethod
def _is_image_from_gallery(image: str) -> bool:
return AgentTestSuitesCombinator._IMAGE_FROM_GALLERY.match(image) is not None
@staticmethod
def _get_name_of_image_from_gallery(image: str) -> bool:
match = AgentTestSuitesCombinator._IMAGE_FROM_GALLERY.match(image)
if match is None:
raise Exception(f"Invalid image from gallery: {image}")
return match.group('image')
@staticmethod @staticmethod
def _report_test_result( def _report_test_result(
suite_name: str, suite_name: str,

Просмотреть файл

@ -9,5 +9,4 @@ tests:
- "agent_wait_for_cloud_init/agent_wait_for_cloud_init.py" - "agent_wait_for_cloud_init/agent_wait_for_cloud_init.py"
template: "agent_wait_for_cloud_init/add_cloud_init_script.py" template: "agent_wait_for_cloud_init/add_cloud_init_script.py"
install_test_agent: false install_test_agent: false
# Dummy image, since the parameter is required. The actual image needs to be passed as a parameter to the runbook. images: "gallery/wait-cloud-init/1.0.2"
images: "ubuntu_2204"

Просмотреть файл

@ -0,0 +1,13 @@
#
# This test verifies that the Agent does initial update on very first goal state before it starts processing extensions for new vms that are enrolled into RSM.
#
# NOTE: This test_suite is not fully automated. It requires a custom image where custom pre-installed Agent has been installed with version 2.8.9.9. Creation of custom images is not automated currently.
# But daily run is automated and test suite will pass shared gallery custom image reference in images list
#
#
name: "InitialAgentUpdate"
tests:
- "initial_agent_update/initial_agent_update.py"
install_test_agent: false
images: "gallery/initial-agent-update/1.0.0"
locations: "AzureCloud:eastus2euap"

Просмотреть файл

@ -0,0 +1,82 @@
#!/usr/bin/env python3
# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from assertpy import fail
from tests_e2e.tests.lib.agent_test import AgentVmTest
from tests_e2e.tests.lib.agent_test_context import AgentVmTestContext
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import retry_if_false
class InitialAgentUpdate(AgentVmTest):
"""
This test verifies that the Agent does initial update on very first goal state before it starts processing extensions for new vms that are enrolled into RSM
"""
def __init__(self, context: AgentVmTestContext):
super().__init__(context)
self._ssh_client = self._context.create_ssh_client()
self._test_version = "2.8.9.9"
def run(self):
log.info("Testing initial agent update for new vms that are enrolled into RSM")
log.info("Retrieving latest version from goal state to verify initial agent update")
latest_version: str = self._ssh_client.run_command("agent_update-self_update_latest_version.py --family_type Prod",
use_sudo=True).rstrip()
log.info("Latest Version: %s", latest_version)
self._verify_agent_updated_to_latest_version(latest_version)
self._verify_agent_updated_before_processing_goal_state(latest_version)
def _verify_agent_updated_to_latest_version(self, latest_version: str) -> None:
"""
Verifies the agent updated to latest version from custom image test version.
"""
log.info("Verifying agent updated to latest version: {0} from custom image test version: {1}".format(latest_version, self._test_version))
self._verify_guest_agent_update(latest_version)
def _verify_guest_agent_update(self, latest_version: str) -> None:
"""
Verify current agent version running on latest version
"""
def _check_agent_version(latest_version: str) -> bool:
waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
expected_version = f"Goal state agent: {latest_version}"
if expected_version in waagent_version:
return True
else:
return False
log.info("Running waagent --version and checking Goal state agent version")
success: bool = retry_if_false(lambda: _check_agent_version(latest_version), delay=60)
waagent_version: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
if not success:
fail("Guest agent didn't update to latest version {0} but found \n {1}".format(
latest_version, waagent_version))
log.info(
f"Successfully verified agent updated to latest version. Current agent version running:\n {waagent_version}")
def _verify_agent_updated_before_processing_goal_state(self, latest_version) -> None:
log.info("Checking agent log if agent does initial update with self-update before processing goal state")
output = self._ssh_client.run_command(
"initial_agent_update-agent_update_check_from_log.py --current_version {0} --latest_version {1}".format(self._test_version, latest_version))
log.info(output)

Просмотреть файл

@ -19,20 +19,22 @@
# returns the agent latest version published # returns the agent latest version published
# #
import argparse
from azurelinuxagent.common.protocol.goal_state import GoalStateProperties from azurelinuxagent.common.protocol.goal_state import GoalStateProperties
from azurelinuxagent.common.protocol.util import get_protocol_util from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from tests_e2e.tests.lib.retry import retry from tests_e2e.tests.lib.retry import retry
def get_agent_family_manifest(goal_state): def get_agent_family_manifest(goal_state, family_type):
""" """
Get the agent_family from last GS for Test Family Get the agent_family from last GS for given Family
""" """
agent_families = goal_state.extensions_goal_state.agent_families agent_families = goal_state.extensions_goal_state.agent_families
agent_family_manifests = [] agent_family_manifests = []
for m in agent_families: for m in agent_families:
if m.name == 'Test': if m.name == family_type:
if len(m.uris) > 0: if len(m.uris) > 0:
agent_family_manifests.append(m) agent_family_manifests.append(m)
return agent_family_manifests[0] return agent_family_manifests[0]
@ -53,11 +55,14 @@ def get_largest_version(agent_manifest):
def main(): def main():
try: try:
parser = argparse.ArgumentParser()
parser.add_argument('--family_type', dest="family_type", default="Test")
args = parser.parse_args()
protocol = get_protocol_util().get_protocol(init_goal_state=False) protocol = get_protocol_util().get_protocol(init_goal_state=False)
retry(lambda: protocol.client.reset_goal_state( retry(lambda: protocol.client.reset_goal_state(
goal_state_properties=GoalStateProperties.ExtensionsGoalState)) goal_state_properties=GoalStateProperties.ExtensionsGoalState))
goal_state = protocol.client.get_goal_state() goal_state = protocol.client.get_goal_state()
agent_family = get_agent_family_manifest(goal_state) agent_family = get_agent_family_manifest(goal_state, args.family_type)
agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris) agent_manifest = goal_state.fetch_agent_manifest(agent_family.name, agent_family.uris)
largest_version = get_largest_version(agent_manifest) largest_version = get_largest_version(agent_manifest)
print(str(largest_version)) print(str(largest_version))

Просмотреть файл

@ -0,0 +1,62 @@
#!/usr/bin/env pypy3
# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Checks that the initial agent update happens with self-update before processing goal state from the agent log
import argparse
import datetime
import re
from assertpy import fail
from tests_e2e.tests.lib.agent_log import AgentLog
from tests_e2e.tests.lib.logging import log
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--current_version", dest='current_version', required=True)
parser.add_argument("--latest_version", dest='latest_version', required=True)
args = parser.parse_args()
agentlog = AgentLog()
patterns = {
"goal_state": "ProcessExtensionsGoalState started",
"self_update": f"Self-update is ready to upgrade the new agent: {args.latest_version} now before processing the goal state",
"exit_process": f"Current Agent {args.current_version} completed all update checks, exiting current process to upgrade to the new Agent version {args.latest_version}"
}
first_occurrence_times = {"goal_state": datetime.time.min, "self_update": datetime.time.min, "exit_process": datetime.time.min}
for record in agentlog.read():
for key, pattern in patterns.items():
# Skip if we already found the first occurrence of the pattern
if first_occurrence_times[key] != datetime.time.min:
continue
if re.search(pattern, record.message, flags=re.DOTALL):
log.info(f"Found data: {record} in agent log")
first_occurrence_times[key] = record.when
break
if first_occurrence_times["self_update"] < first_occurrence_times["goal_state"] and first_occurrence_times["exit_process"] < first_occurrence_times["goal_state"]:
log.info("Verified initial agent update happened before processing goal state")
else:
fail(f"Agent initial update didn't happen before processing goal state and first_occurrence_times for patterns: {patterns} are: {first_occurrence_times}")
if __name__ == '__main__':
main()