359 строки
15 KiB
Python
359 строки
15 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
# A script to mirror bugilla issues into github, where we can see
|
|
# them as part of our github-and-jira-based planning procedure.
|
|
#
|
|
# For every relevant bug we find in bugzilla, we create a corresponding
|
|
# github issue that:
|
|
#
|
|
# * has matching summary and description text
|
|
# * has the "bugzilla" label
|
|
# * may have additional metadata in the issue description
|
|
#
|
|
# If such an issue already exists, we update it to match the bugzilla
|
|
# bug rather than creating a new one.
|
|
#
|
|
# Note that the mirroring is (for now) entirely one-way. Changes to bug summar,
|
|
# description of status in bugzilla will be pushed to github, but any changes
|
|
# in github will not be reflected back in bugzilla.
|
|
|
|
import re
|
|
import os
|
|
import urllib.parse
|
|
|
|
import requests
|
|
from github import Github
|
|
|
|
DRY_RUN = False
|
|
VERBOSE_DEBUG = False
|
|
|
|
GH_REPO = 'mozilla/application-services-bug-mirror'
|
|
GH_OLD_REPOS = ['mozilla/application-services']
|
|
GH_LABEL = 'bugzilla'
|
|
|
|
BZ_URL = 'https://bugzilla.mozilla.org/rest'
|
|
|
|
SYNCED_ISSUE_TEXT = '\n\n---\n\N{LADY BEETLE} Issue is synchronized with Bugzilla [Bug {id}](https://bugzilla.mozilla.org/show_bug.cgi?id={id})\n'
|
|
SYNCED_ISSUE_BUGID_REGEX = re.compile(
|
|
# N.B. we can't use a r'raw string' literal here because of the \N escape.
|
|
'\N{LADY BEETLE} Issue is synchronized with Bugzilla \\[Bug (\\d+)\\]')
|
|
SEE_ALSO_ISSUE_REGEX_TEMPLATE = r'^https://github.com/{}/issues/\d+$'
|
|
SYNCED_ISSUE_CLOSE_COMMENT = 'Upstream bug has been closed with the following resolution: {resolution}.'
|
|
|
|
# Jira adds some metadata to issue descriptions, indicated by this separator.
|
|
# We want to preserve any lines like this from the github issue description.
|
|
JIRA_ISSUE_MARKER = '\N{BOX DRAWINGS LIGHT TRIPLE DASH VERTICAL}'
|
|
|
|
# For now, only look at recent bugs in order to preserve sanity.
|
|
MIN_CREATION_TIME = '20190801'
|
|
|
|
|
|
def log(msg, *args, **kwds):
|
|
msg = str(msg)
|
|
print(msg.format(*args, **kwds))
|
|
|
|
|
|
def get_json(url):
|
|
"""Fetch a URL and return the result parsed as JSON."""
|
|
r = requests.get(url)
|
|
r.raise_for_status()
|
|
return r.json()
|
|
|
|
|
|
class BugSet(object):
|
|
"""A set of bugzilla bugs, which we might like to mirror into GitHub.
|
|
|
|
This class knows how to query the bugzilla API to find bugs, and how to
|
|
fetch appropriate metadata for mirroring into github.
|
|
|
|
Importantly, it knows how to use a bugzilla API key to find confidential
|
|
or security-sensitive bugs, and can report their existence without leaking
|
|
potentially confidential details (e.g. by reporting a placeholder summary
|
|
of "confidential issue" rather than the actual bug summary).
|
|
|
|
Use `update_from_bugzilla()` to query for bugs and add them to an in-memory
|
|
store, then access them as if this were a dict keyed by bug number. Each bug
|
|
will be a dict with the following fields:
|
|
|
|
* `id`: The bug id, as an integer
|
|
* `whiteboard`: The bug's whiteboard text, as a string
|
|
* `is_open`: Whether the bug is open, as a boolean
|
|
* `summary`: The one-line bug summry, as a string
|
|
* `status`: The bug's status field, as a string
|
|
* `comment0`: The bug's first comment, which is typically a longer description, as a string
|
|
"""
|
|
|
|
def __init__(self, api_key=None):
|
|
self.api_key = api_key
|
|
self.bugs = {}
|
|
|
|
def __getitem__(self, key):
|
|
return self.bugs[str(key)]
|
|
|
|
def __delitem__(self, key):
|
|
del self.bugs[str(key)]
|
|
|
|
def __iter__(self):
|
|
return iter(self.bugs)
|
|
|
|
def __len__(self):
|
|
return len(self.bugs)
|
|
|
|
def update_from_bugzilla(self, **kwds):
|
|
"""Slurp in bugs from bugzilla that match the given query keywords."""
|
|
# First, fetch a minimal set of "safe" metadata that we're happy to put in
|
|
# a public github issue, even for confidential bugzilla bugs.
|
|
# This is the only query that's allowed to use a BZ API token to access
|
|
# confidential bug info.
|
|
url = BZ_URL + '/bug?include_fields=id,is_open,see_also'
|
|
url += '&' + self._make_query_string(**kwds)
|
|
if self.api_key is not None:
|
|
url += '&api_key=' + self.api_key
|
|
found_bugs = set()
|
|
for bug in get_json(url)['bugs']:
|
|
bugid = str(bug['id'])
|
|
found_bugs.add(bugid)
|
|
if bugid not in self.bugs:
|
|
self.bugs[bugid] = bug
|
|
else:
|
|
self.bugs[bugid].update(bug)
|
|
# Now make *unauthenticated* public API queries to fetch additional metadata
|
|
# which we know is safe to make public. Any security-sensitive bugs will be
|
|
# silently omitted from this query.
|
|
if found_bugs:
|
|
public_bugs = set()
|
|
url = BZ_URL + '/bug?include_fields=id,is_open,see_also,summary,status,resolution'
|
|
url += '&id=' + '&id='.join(found_bugs)
|
|
for bug in get_json(url)['bugs']:
|
|
bugid = str(bug['id'])
|
|
public_bugs.add(bugid)
|
|
self.bugs[bugid].update(bug)
|
|
# Unlike with fetching bug metadata, trying to fetch comments for a confidential bug
|
|
# will error the entire request rather than silently omitting it. So we have to filter
|
|
# them out during the loop above. Note that the resulting URL is a bit weird, it's:
|
|
#
|
|
# /bug/<bug1>/comment?ids=bug2,bug3...
|
|
#
|
|
# This allows us to fetch comments from multiple bugs in a single query.
|
|
if public_bugs:
|
|
url = BZ_URL + '/bug/' + public_bugs.pop() + '/comment'
|
|
if public_bugs:
|
|
url += '?ids=' + '&ids='.join(public_bugs)
|
|
for bugnum, bug in get_json(url)['bugs'].items():
|
|
bugid = str(bugnum)
|
|
self.bugs[bugid]['comment0'] = bug['comments'][0]['text']
|
|
|
|
def _make_query_string(self, product=None, component=None, id=None, resolved=None,
|
|
whiteboard=None, creation_time=None, last_change_time=None):
|
|
def listify(x): return x if isinstance(x, (list, tuple, set)) else (x,)
|
|
|
|
def encode(x): return urllib.parse.quote(x, safe='')
|
|
qs = []
|
|
if product is not None:
|
|
qs.extend('product=' + encode(p) for p in listify(product))
|
|
if component is not None:
|
|
qs.extend('component=' + encode(c) for c in listify(component))
|
|
if id is not None:
|
|
qs.extend('id=' + encode(i) for i in listify(id))
|
|
if whiteboard is not None:
|
|
qs.append('whiteboard=' + whiteboard)
|
|
if creation_time is not None:
|
|
qs.append('creation_time=' + creation_time)
|
|
if last_change_time is not None:
|
|
qs.append('last_change_time=' + last_change_time)
|
|
if resolved is not None:
|
|
if resolved:
|
|
raise ValueError(
|
|
"Sorry, I haven't looked up how to query for only resolved bugs...")
|
|
else:
|
|
qs.append('resolution=---')
|
|
if len(qs) == 0:
|
|
raise ValueError(
|
|
"Cowardly refusing to query every bug in existence; please specify some filters")
|
|
return '&'.join(qs)
|
|
|
|
|
|
class MirrorIssueSet(object):
|
|
"""A set of mirror issues from GitHub, which can be synced to bugzilla bugs.
|
|
|
|
Given a `BugSet` containing the bugs that you want to appear in github, use
|
|
like so:
|
|
|
|
issues = MirrorIssueSet(GITHUB_TOKEN)
|
|
issues.sync_from_bugset(bugs)
|
|
|
|
This will ensure that every bug in the bugset has a corresponding mirror issue,
|
|
creating or updating issues as appropriate. It will also close out any miror issues
|
|
that do not appear in the bugset, on the assumption that they've been closed in
|
|
bugzilla.
|
|
"""
|
|
|
|
def __init__(self, repo, label, api_key=None):
|
|
self._gh = Github(api_key)
|
|
self._repo = self._gh.get_repo(repo)
|
|
self._repo_name = repo
|
|
self._label = self._repo.get_label(label)
|
|
self._see_also_regex = re.compile(
|
|
SEE_ALSO_ISSUE_REGEX_TEMPLATE.format(repo))
|
|
# The mirror issues, indexes by bugzilla bugid.
|
|
self.mirror_issues = {}
|
|
|
|
def sync_from_bugset(self, bugs, updates_only=False):
|
|
"""Sync the mirrored issues with the given BugSet (which might be modified in-place)."""
|
|
self.update_from_github()
|
|
log('Found {} mirror issues in github', len(self.mirror_issues))
|
|
# Fetch details for any mirror issues that are not in the set.
|
|
# They might be e.g. closed, or have been moved to a different component,
|
|
# but we still want to update them in github.
|
|
missing_bugs = [
|
|
bugid for bugid in self.mirror_issues if bugid not in bugs]
|
|
if missing_bugs:
|
|
log('Fetching info for {} missing bugs from bugzilla', len(missing_bugs))
|
|
bugs.update_from_bugzilla(id=missing_bugs)
|
|
num_updated = 0
|
|
for bugid in bugs:
|
|
if updates_only and bugid not in self.mirror_issues:
|
|
if VERBOSE_DEBUG:
|
|
log('Not creating new bug {} in old repo', bugid)
|
|
continue
|
|
if self.sync_issue_from_bug_info(bugid, bugs[bugid]):
|
|
num_updated += 1
|
|
if num_updated > 0:
|
|
log('Updated {} issues from bugzilla to github', num_updated)
|
|
else:
|
|
log('Looks like everything is up-to-date in {}', self._repo_name)
|
|
|
|
def update_from_github(self):
|
|
"""Find mirror issues in the github repo.
|
|
|
|
We assume they have a special label for easy searching, and some text in the issue
|
|
description that tells us what bug it links to.
|
|
"""
|
|
for issue in self._repo.get_issues(state='open', labels=[self._label]):
|
|
match = SYNCED_ISSUE_BUGID_REGEX.search(issue.body)
|
|
if not match:
|
|
log("WARNING: Mirror issue #{} does not have a bugzilla bugid", issue.number)
|
|
continue
|
|
bugid = match.group(1)
|
|
if bugid in self.mirror_issues:
|
|
log("WARNING: Duplicate mirror issue #{} for Bug {}",
|
|
issue.number, bugid)
|
|
continue
|
|
self.mirror_issues[bugid] = issue
|
|
|
|
def sync_issue_from_bug_info(self, bugid, bug_info):
|
|
issue = self.mirror_issues.get(bugid, None)
|
|
issue_info = self._format_issue_info(bug_info, issue)
|
|
if issue is None:
|
|
if bug_info['is_open']:
|
|
# As a light hack, if the bugzilla bug has a "see also" link to an issue in our repo,
|
|
# we assume that's an existing mirror issue and avoid creating a new one. This lets us
|
|
# keep the bug open in bugzilla but close it in github without constantly creating new
|
|
# mirror issues.
|
|
for see_also in bug_info.get('see_also', ()):
|
|
if self._see_also_regex.match(see_also) is not None:
|
|
log('Ignoring bz{id}, which links to {} via see-also',
|
|
see_also, **bug_info)
|
|
break
|
|
else:
|
|
issue_info.pop('state')
|
|
log('Creating mirror issue for bz{id}', **bug_info)
|
|
if DRY_RUN:
|
|
issue = {}
|
|
else:
|
|
issue = self._repo.create_issue(**issue_info)
|
|
self.mirror_issues[bugid] = issue
|
|
return True
|
|
else:
|
|
changed_fields = [
|
|
field for field in issue_info if issue_info[field] != getattr(issue, field)]
|
|
if changed_fields:
|
|
# Note that this will close issues that have not open in bugzilla.
|
|
log('Updating mirror issue #{} for bz{id} (changed: {})',
|
|
issue.number, changed_fields, **bug_info)
|
|
# Weird API thing where `issue.edit` accepts strings rather than label refs...
|
|
issue_info['labels'] = [l.name for l in issue_info['labels']]
|
|
# Explain why we are closing this issue.
|
|
if not DRY_RUN:
|
|
if not bug_info['is_open'] and 'state' in changed_fields and 'resolution' in bug_info:
|
|
issue.create_comment(SYNCED_ISSUE_CLOSE_COMMENT.format(resolution=bug_info['resolution']))
|
|
issue.edit(**issue_info)
|
|
return True
|
|
else:
|
|
if VERBOSE_DEBUG:
|
|
log('No change for issue #{}', issue.number)
|
|
return False
|
|
|
|
def _format_issue_info(self, bug_info, issue):
|
|
issue_info = {
|
|
'state': 'open' if bug_info['is_open'] else 'closed'
|
|
}
|
|
if 'summary' in bug_info:
|
|
issue_info['title'] = bug_info['summary']
|
|
else:
|
|
issue_info['title'] = 'Confidential Bugzilla issue'
|
|
if 'comment0' in bug_info:
|
|
issue_info['body'] = bug_info['comment0']
|
|
else:
|
|
issue_info['body'] = 'No description is available for this confidential bugzilla issue.'
|
|
|
|
if issue is None:
|
|
issue_info['labels'] = [self._label]
|
|
else:
|
|
issue_info['labels'] = issue.labels
|
|
if self._label not in issue.labels:
|
|
issue_info['labels'].append(self._label)
|
|
|
|
# Ensure we include a link to the bugzilla bug for reference.
|
|
issue_info['body'] += SYNCED_ISSUE_TEXT.format(**bug_info)
|
|
|
|
# Preserve any Jira sync lines in the issue body.
|
|
if issue is not None:
|
|
for ln in issue.body.split("\n"):
|
|
if ln.startswith(JIRA_ISSUE_MARKER):
|
|
issue_info['body'] += '\n' + ln
|
|
# Jira seems to sometimes add a trailing newline, try to match it to avoid spurious updates.
|
|
if issue.body.endswith('\n') and not issue_info['body'].endswith('\n'):
|
|
issue_info['body'] += '\n'
|
|
|
|
return issue_info
|
|
|
|
|
|
def sync_bugzilla_to_github():
|
|
# Find the sets of bugs in bugzilla that we want to mirror.
|
|
log('Finding relevant bugs in bugzilla...')
|
|
bugs = BugSet(os.environ.get('BZ_API_KEY'))
|
|
bugs.update_from_bugzilla(product='Firefox', component='Firefox Accounts',
|
|
resolved=False, creation_time=MIN_CREATION_TIME)
|
|
bugs.update_from_bugzilla(product='Firefox', component='Sync',
|
|
resolved=False, creation_time=MIN_CREATION_TIME)
|
|
bugs.update_from_bugzilla(whiteboard='SACI',
|
|
resolved=False, creation_time=MIN_CREATION_TIME)
|
|
log('Found {} bugzilla bugs', len(bugs))
|
|
|
|
gh_token = os.environ.get('GITHUB_TOKEN')
|
|
|
|
# Find any that are already represented in old github repos.
|
|
# We don't want to make duplicates of them in the current repo!
|
|
for old_repo in GH_OLD_REPOS:
|
|
log('Syncing to old github repo at {}', old_repo)
|
|
old_issues = MirrorIssueSet(old_repo, GH_LABEL, gh_token)
|
|
old_issues.sync_from_bugset(bugs, updates_only=True)
|
|
done_count = 0
|
|
for bugid in old_issues.mirror_issues:
|
|
if bugid in bugs:
|
|
del bugs[bugid]
|
|
done_count += 1
|
|
log('Synced {} bugs, now {} left to sync', done_count, len(bugs))
|
|
|
|
log('Syncing to github repo at {}', GH_REPO)
|
|
issues = MirrorIssueSet(GH_REPO, GH_LABEL, gh_token)
|
|
issues.sync_from_bugset(bugs)
|
|
log('Done!')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sync_bugzilla_to_github()
|