Bug 1660506 - [taskgraph] Ensure backstop determination uses time since last backstop, r=marco

Backstop pushes have a time component so that we can guarantee they run at least every N
hours. Prior, they measured the time since the last push. This means that as long as a new
push comes in at least once every 4 hours, the backstop would keep being pushed out.

This means we could theoretically go 80 hours without a backstop push, which makes finding
merge candidates tricky.

This change ensures we measure the 4 hours as time since last backstop, as opposed to time
since last push.

Differential Revision: https://phabricator.services.mozilla.com/D88913
This commit is contained in:
Andrew Halberstadt 2020-09-01 18:42:57 +00:00
Родитель 44293fe4fe
Коммит e7e4e3ee6a
2 изменённых файлов: 43 добавлений и 52 удалений

Просмотреть файл

@ -5,12 +5,19 @@
from __future__ import absolute_import, print_function, unicode_literals
from datetime import datetime
from textwrap import dedent
from time import mktime
import pytest
from mozunit import main
from taskgraph.util.backstop import is_backstop
from taskgraph.util.backstop import (
is_backstop,
BACKSTOP_INDEX,
BACKSTOP_PUSH_INTERVAL,
BACKSTOP_TIME_INTERVAL,
)
from taskgraph.util.taskcluster import get_index_url
@pytest.fixture(scope='module')
@ -20,50 +27,46 @@ def params():
'head_repository': 'https://hg.mozilla.org/integration/autoland',
'head_rev': 'abcdef',
'project': 'autoland',
'pushlog_id': 1,
'pushdate': mktime(datetime.now().timetuple()),
}
def test_is_backstop(responses, params):
url = get_index_url(
BACKSTOP_INDEX.format(project=params["project"])
) + "/artifacts/public/parameters.yml"
responses.add(
responses.GET,
"https://hg.mozilla.org/integration/autoland/json-pushes/?version=2&startID=16&endID=17", # noqa
json={"pushes": {"17": {}}},
status=200,
url,
status=404,
)
# If there's no previous push date, run tasks
params['pushlog_id'] = 18
params["pushlog_id"] = 1
assert is_backstop(params)
responses.add(
responses.replace(
responses.GET,
"https://hg.mozilla.org/integration/autoland/json-pushes/?version=2&startID=17&endID=18", # noqa
json={"pushes": {"18": {"date": params['pushdate']}}},
url,
body=dedent("""
pushdate: {pushdate}
""".format(pushdate=params["pushdate"])),
status=200,
)
# Only multiples of 20 schedule tasks. Pushdate from push 19 was cached.
params['pushlog_id'] = 19
params['pushdate'] += 3599
# Only multiples of push interval schedule tasks.
params['pushlog_id'] = BACKSTOP_PUSH_INTERVAL - 1
params['pushdate'] += 1
assert not is_backstop(params)
params['pushlog_id'] = 20
params['pushlog_id'] = BACKSTOP_PUSH_INTERVAL
params['pushdate'] += 1
assert is_backstop(params)
responses.add(
responses.GET,
"https://hg.mozilla.org/integration/autoland/json-pushes/?version=2&startID=19&endID=20", # noqa
json={"pushes": {"20": {"date": params['pushdate']}}},
status=200,
)
# Tasks are also scheduled if four hours have passed.
params['pushlog_id'] = 21
params['pushdate'] += 4 * 3600
# Tasks are also scheduled if the time interval has passed.
params['pushlog_id'] = BACKSTOP_PUSH_INTERVAL + 1
params['pushdate'] += BACKSTOP_TIME_INTERVAL * 60
assert is_backstop(params)

Просмотреть файл

@ -4,15 +4,14 @@
from __future__ import absolute_import, print_function, unicode_literals
import logging
from requests import HTTPError
from taskgraph.util.hg import get_push_data
from taskgraph.util.taskcluster import get_artifact_from_index
BACKSTOP_PUSH_INTERVAL = 20
BACKSTOP_TIME_INTERVAL = 60 * 4 # minutes
logger = logging.getLogger(__name__)
BACKSTOP_INDEX = "gecko.v2.{project}.latest.taskgraph.backstop"
def is_backstop(
@ -42,31 +41,20 @@ def is_backstop(
if pushid % push_interval == 0:
return True
if time_interval <= 0:
return False
# We also want to ensure we run all tasks at least once per N minutes.
if (
time_interval > 0
and minutes_between_pushes(
time_interval, params["head_repository"], project, pushid, pushdate
)
>= time_interval
):
index = BACKSTOP_INDEX.format(project=project)
try:
last_pushdate = get_artifact_from_index(index, 'public/parameters.yml')["pushdate"]
except HTTPError as e:
if e.response.status_code == 404:
# There hasn't been a backstop push yet.
return True
raise
if (pushdate - last_pushdate) / 60 >= time_interval:
return True
return False
def minutes_between_pushes(time_interval, repository, project, cur_push_id, cur_push_date):
# figure out the minutes that have elapsed between the current push and previous one
# defaulting to max min so if we can't get value, defaults to run the task
min_between_pushes = time_interval
prev_push_id = cur_push_id - 1
data = get_push_data(repository, project, prev_push_id, prev_push_id)
if data is not None:
prev_push_date = data[prev_push_id].get('date', 0)
# now have datetime of current and previous push
if cur_push_date > 0 and prev_push_date > 0:
min_between_pushes = (cur_push_date - prev_push_date) / 60
return min_between_pushes