зеркало из https://github.com/microsoft/lisa.git
add retry_on_deployment
This commit is contained in:
Родитель
ea0f2f186d
Коммит
a87bba5021
|
@ -161,6 +161,7 @@ class Environment(ContextMixin, InitializableMixin):
|
|||
self,
|
||||
is_predefined: bool,
|
||||
warn_as_error: bool,
|
||||
retry: int,
|
||||
id_: int,
|
||||
runbook: schema.Environment,
|
||||
) -> None:
|
||||
|
@ -173,6 +174,7 @@ class Environment(ContextMixin, InitializableMixin):
|
|||
self.is_new: bool = True
|
||||
self.id: str = str(id_)
|
||||
self.warn_as_error = warn_as_error
|
||||
self.retry = retry
|
||||
self.platform: Optional[Platform] = None
|
||||
self.log = get_logger("env", self.name)
|
||||
self.source_test_result: Optional[TestResult] = None
|
||||
|
@ -185,6 +187,9 @@ class Environment(ContextMixin, InitializableMixin):
|
|||
self._raw_id = id_
|
||||
self._retries: int = 0
|
||||
|
||||
# Counter for the number of deployment attempts
|
||||
# increments when deployment fails and retry > 0
|
||||
self.tried_times: int = 0
|
||||
# cost uses to plan order of environments.
|
||||
# cheaper env can fit cases earlier to run more cases on it.
|
||||
# 1. smaller is higher priority, it can be index of candidate environment
|
||||
|
@ -380,6 +385,7 @@ class Environment(ContextMixin, InitializableMixin):
|
|||
env = Environment(
|
||||
is_predefined=self.is_predefined,
|
||||
warn_as_error=self.warn_as_error,
|
||||
retry=self.retry,
|
||||
id_=self._raw_id,
|
||||
runbook=runbook,
|
||||
)
|
||||
|
@ -462,9 +468,11 @@ class Environments(EnvironmentsDict):
|
|||
def __init__(
|
||||
self,
|
||||
warn_as_error: bool = False,
|
||||
retry: int = 0,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.warn_as_error = warn_as_error
|
||||
self.retry = retry
|
||||
|
||||
def get_or_create(self, requirement: EnvironmentSpace) -> Optional[Environment]:
|
||||
result: Optional[Environment] = None
|
||||
|
@ -507,6 +515,7 @@ class Environments(EnvironmentsDict):
|
|||
env = Environment(
|
||||
is_predefined=is_predefined_runbook,
|
||||
warn_as_error=self.warn_as_error,
|
||||
retry=self.retry,
|
||||
id_=id_,
|
||||
runbook=copied_runbook,
|
||||
)
|
||||
|
@ -523,6 +532,7 @@ def load_environments(
|
|||
if root_runbook:
|
||||
environments = Environments(
|
||||
warn_as_error=root_runbook.warn_as_error,
|
||||
retry=root_runbook.retry,
|
||||
)
|
||||
|
||||
environments_runbook = root_runbook.environments
|
||||
|
|
|
@ -10,6 +10,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, Type
|
|||
from lisa import messages, notifier, schema, transformer
|
||||
from lisa.action import Action
|
||||
from lisa.combinator import Combinator
|
||||
from lisa.environment import Environment
|
||||
from lisa.messages import TestResultMessage, TestResultMessageBase, TestStatus
|
||||
from lisa.notifier import register_notifier
|
||||
from lisa.parameter_parser.runbook import RunbookBuilder
|
||||
|
@ -190,6 +191,20 @@ class BaseRunner(BaseClassMixin, InitializableMixin):
|
|||
self._wait_resource_logged = False
|
||||
self._wait_resource_timers[name] = _wait_resource_timer
|
||||
|
||||
def _need_retry(self, environment: Environment) -> bool:
|
||||
if environment.tried_times >= environment.retry:
|
||||
if environment.retry > 0:
|
||||
self._log.info(
|
||||
f"Tried {environment.tried_times + 1} times, but failed again."
|
||||
)
|
||||
return False
|
||||
|
||||
environment.tried_times += 1
|
||||
self._log.info(
|
||||
f"Retrying... (Attempt {environment.tried_times}/{environment.retry})"
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
class RootRunner(Action):
|
||||
"""
|
||||
|
|
|
@ -292,6 +292,7 @@ class LisaRunner(BaseRunner):
|
|||
) -> None:
|
||||
try:
|
||||
try:
|
||||
# Attempt to deploy the environment
|
||||
self.platform.deploy_environment(environment)
|
||||
assert (
|
||||
environment.status == EnvironmentStatus.Deployed
|
||||
|
@ -308,12 +309,16 @@ class LisaRunner(BaseRunner):
|
|||
# rerun prepare to calculate resource again.
|
||||
environment.status = EnvironmentStatus.New
|
||||
except Exception as identifier:
|
||||
self._attach_failed_environment_to_result(
|
||||
environment=environment,
|
||||
result=test_results[0],
|
||||
exception=identifier,
|
||||
)
|
||||
self._delete_environment_task(environment=environment, test_results=[])
|
||||
if self._need_retry(environment):
|
||||
environment.status = EnvironmentStatus.New
|
||||
else:
|
||||
# Final attempt failed; handle the failure
|
||||
self._attach_failed_environment_to_result(
|
||||
environment=environment,
|
||||
result=test_results[0],
|
||||
exception=identifier,
|
||||
)
|
||||
self._delete_environment_task(environment=environment, test_results=[])
|
||||
|
||||
def _initialize_environment_task(
|
||||
self, environment: Environment, test_results: List[TestResult]
|
||||
|
|
|
@ -1320,6 +1320,13 @@ class Environment:
|
|||
class EnvironmentRoot:
|
||||
warn_as_error: bool = field(default=False)
|
||||
environments: List[Environment] = field(default_factory=list)
|
||||
# Number of retry attempts for failed deployments (min=0)
|
||||
retry: int = field(
|
||||
default=0,
|
||||
metadata=field_metadata(
|
||||
field_function=fields.Int, validate=validate.Range(min=0)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@dataclass_json()
|
||||
|
|
|
@ -377,7 +377,11 @@ class AzurePrepareTestCase(TestCase):
|
|||
_ = node_req.get_extended_runbook(common.AzureNodeSchema, AZURE)
|
||||
runbook._original_nodes_requirement.append(node_req)
|
||||
environment = Environment(
|
||||
is_predefined=True, warn_as_error=False, id_=0, runbook=runbook
|
||||
is_predefined=True,
|
||||
warn_as_error=False,
|
||||
id_=0,
|
||||
runbook=runbook,
|
||||
retry=0,
|
||||
)
|
||||
|
||||
return environment
|
||||
|
|
Загрузка…
Ссылка в новой задаче