зеркало из https://github.com/mozilla/treeherder.git
bug 1077136 - ingest bad pushes as 'onhold' so we don't keep trying to ingest them
This commit is contained in:
Родитель
914ef7c7f1
Коммит
08c04ae79a
|
@ -131,18 +131,13 @@ class Builds4hTransformerMixin(object):
|
|||
artifact_build = copy.deepcopy(build)
|
||||
|
||||
try:
|
||||
branch = revisions_lookup[project]
|
||||
try:
|
||||
resultset = branch[prop['revision']]
|
||||
except KeyError:
|
||||
# we don't have the resultset for this build/job yet
|
||||
# we need to queue fetching that resultset
|
||||
if prop['revision'] not in ["Unknown", None]:
|
||||
missing_resultsets[project].add(prop['revision'])
|
||||
|
||||
continue
|
||||
resultset = common.get_resultset(project,
|
||||
revisions_lookup,
|
||||
prop['revision'],
|
||||
missing_resultsets,
|
||||
logger)
|
||||
except KeyError:
|
||||
# this branch is not one of those we care about
|
||||
# skip this job, at least at this point
|
||||
continue
|
||||
|
||||
treeherder_data = {
|
||||
|
@ -303,18 +298,13 @@ class PendingTransformerMixin(object):
|
|||
for revision, jobs in revisions.items():
|
||||
|
||||
try:
|
||||
branch = revisions_lookup[project]
|
||||
try:
|
||||
resultset = branch[revision]
|
||||
except KeyError:
|
||||
# we don't have the resultset for this build/job yet
|
||||
# we need to queue fetching that resultset
|
||||
if revision not in ["Unknown", None]:
|
||||
missing_resultsets[project].add(revision)
|
||||
|
||||
continue
|
||||
resultset = common.get_resultset(project,
|
||||
revisions_lookup,
|
||||
revision,
|
||||
missing_resultsets,
|
||||
logger)
|
||||
except KeyError:
|
||||
# this branch is not one of those we care about
|
||||
# skip this job, at least at this point
|
||||
continue
|
||||
|
||||
# using project and revision form the revision lookups
|
||||
|
@ -433,18 +423,13 @@ class RunningTransformerMixin(object):
|
|||
for revision, jobs in revisions.items():
|
||||
|
||||
try:
|
||||
branch = revisions_lookup[project]
|
||||
try:
|
||||
resultset = branch[revision]
|
||||
except KeyError:
|
||||
# we don't have the resultset for this build/job yet
|
||||
# we need to queue fetching that resultset
|
||||
if revision not in ["Unknown", None]:
|
||||
missing_resultsets[project].add(revision)
|
||||
|
||||
continue
|
||||
resultset = common.get_resultset(project,
|
||||
revisions_lookup,
|
||||
revision,
|
||||
missing_resultsets,
|
||||
logger)
|
||||
except KeyError:
|
||||
# this branch is not one of those we care about
|
||||
# skip this job, at least at this point
|
||||
continue
|
||||
|
||||
# using project and revision form the revision lookups
|
||||
|
|
|
@ -163,3 +163,40 @@ def fetch_missing_resultsets(source, missing_resultsets, logger):
|
|||
missing_resultsets,
|
||||
ex
|
||||
))
|
||||
|
||||
def get_resultset(project, revisions_lookup, revision, missing_resultsets, logger):
|
||||
"""
|
||||
Get the resultset out of the revisions_lookup for the given revision.
|
||||
|
||||
This is a little complex due to our attempts to get missing resultsets
|
||||
in case we see jobs that, for one reason or another, we didn't get the
|
||||
resultset from json-pushes.
|
||||
|
||||
This may raise a KeyError if the project or revision isn't found in the
|
||||
lookup.. This signals that the job should be skipped
|
||||
"""
|
||||
|
||||
branch = revisions_lookup[project]
|
||||
# we can ingest resultsets that are not active for various
|
||||
# reasons. One would be that the data from pending/running/
|
||||
# builds4hr may have a bad revision (from the wrong repo).
|
||||
# in this case, we ingest the resultset as inactive so we
|
||||
# don't keep re-trying to find it when we hit jobs like this.
|
||||
# Or, the resultset could be inactive for other reasons.
|
||||
# Either way, we don't want to ingest jobs for it.
|
||||
if branch.get("active_status", "active") != "active":
|
||||
logger.warn(("Skipping job for non-active "
|
||||
"resultset/revision: {0}").format(
|
||||
revision))
|
||||
try:
|
||||
resultset = branch[revision]
|
||||
except KeyError as ex:
|
||||
# we don't have the resultset for this build/job yet
|
||||
# we need to queue fetching that resultset
|
||||
if revision not in ["Unknown", None]:
|
||||
missing_resultsets[project].add(revision)
|
||||
raise ex
|
||||
|
||||
return resultset
|
||||
|
||||
|
||||
|
|
|
@ -110,13 +110,14 @@ class MissingHgPushlogProcess(HgPushlogTransformerMixin,
|
|||
OAuthLoaderMixin):
|
||||
|
||||
def extract(self, url, resultset):
|
||||
# we will sometimes get here because builds4hr/pending/running have a
|
||||
# job with a resultset that json-pushes doesn't know about. Seems
|
||||
# odd, but it happens. So we just ingest
|
||||
|
||||
logger.info("extracting missing resultsets: {0}".format(url))
|
||||
response = requests.get(url, timeout=settings.TREEHERDER_REQUESTS_TIMEOUT)
|
||||
if response.status_code == 404:
|
||||
# we will sometimes get here because builds4hr/pending/running have a
|
||||
# job with a resultset that json-pushes doesn't know about. Seems
|
||||
# odd, but it happens. So we just ingest
|
||||
logger.error("no pushlog in json-pushes. generating a placeholder: {0}".format(url))
|
||||
|
||||
# we want to make a "fake" resultset, because json-pushes doesn't
|
||||
# know about it. This is what TBPL does
|
||||
return {
|
||||
|
@ -132,7 +133,8 @@ class MissingHgPushlogProcess(HgPushlogTransformerMixin,
|
|||
"desc": "Pushlog not found at {0}".format(url)
|
||||
}
|
||||
],
|
||||
"user": "Unknown"
|
||||
"user": "Unknown",
|
||||
"active_status": "onhold"
|
||||
}
|
||||
}
|
||||
else:
|
||||
|
|
|
@ -967,6 +967,16 @@ class JobsModel(TreeherderModelBase):
|
|||
def get_revision_resultset_lookup(self, revision_list):
|
||||
"""
|
||||
Create a list of revision->resultset lookups from a list of revision
|
||||
|
||||
This will retrieve non-active resultsets as well. Some of the data
|
||||
ingested has mixed up revisions that show for jobs, but are not in
|
||||
the right repository in builds4hr/running/pending. So we ingest those
|
||||
bad resultsets/revisions as non-active so that we don't keep trying
|
||||
to re-ingest them. Allowing this query to retrieve non ``active``
|
||||
resultsets means we will avoid re-doing that work by detacting that
|
||||
we've already ingested it.
|
||||
|
||||
But we skip ingesting the job, because the resultset is not active.
|
||||
"""
|
||||
|
||||
replacement = ",".join(["%s"] * len(revision_list))
|
||||
|
@ -2405,6 +2415,7 @@ class JobsModel(TreeherderModelBase):
|
|||
result.get('author', 'unknown@somewhere.com'),
|
||||
result['revision_hash'],
|
||||
result['push_timestamp'],
|
||||
result.get('active_status', 'active'),
|
||||
result['revision_hash']
|
||||
]
|
||||
)
|
||||
|
|
|
@ -125,8 +125,8 @@
|
|||
},
|
||||
"set_result_set":{
|
||||
|
||||
"sql":"INSERT INTO `result_set` (`author`, `revision_hash`,`push_timestamp`)
|
||||
SELECT ?,?,?
|
||||
"sql":"INSERT INTO `result_set` (`author`, `revision_hash`,`push_timestamp`, `active_status`)
|
||||
SELECT ?,?,?,?
|
||||
FROM DUAL
|
||||
WHERE NOT EXISTS (
|
||||
SELECT `revision_hash`, `push_timestamp`
|
||||
|
@ -760,6 +760,7 @@
|
|||
rs.id,
|
||||
rs.revision_hash,
|
||||
rs.push_timestamp,
|
||||
rs.active_status,
|
||||
revision.id as revision_id,
|
||||
revision.revision
|
||||
FROM result_set AS rs
|
||||
|
@ -767,7 +768,7 @@
|
|||
ON rs.id = revision_map.result_set_id
|
||||
INNER JOIN revision
|
||||
ON revision_map.revision_id = revision.id
|
||||
WHERE rs.active_status = 'active'
|
||||
WHERE 1
|
||||
REP0
|
||||
ORDER BY rs.push_timestamp DESC
|
||||
LIMIT ?,?
|
||||
|
|
Загрузка…
Ссылка в новой задаче