Add a script for syncing bugzilla bugs into github issues.

Co-Authored-By: Edouard Oger <eoger@fastmail.com>
This commit is contained in:
Ryan Kelly 2019-08-06 13:29:41 +10:00
Родитель e5bafb05c5
Коммит 17e60ee459
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: FB70C973A037D258
3 изменённых файлов: 348 добавлений и 1 удалений

Просмотреть файл

@ -157,7 +157,7 @@ commands:
- run:
name: Check for any unrecorded changes in our dependency trees
command: |
pip3 install requests
pip3 install --require-hashes -r ./tools/requirements.txt
rustup install nightly
cargo metadata --locked > /dev/null
python3 ./tools/dependency_summary.py --check ./DEPENDENCIES.md
@ -165,6 +165,14 @@ commands:
python3 ./tools/dependency_summary.py --all-ios-targets --package megazord_ios --check megazords/ios/DEPENDENCIES.md
python3 ./tools/dependency_summary.py --all-android-targets --package fenix --check megazords/fenix/DEPENDENCIES.md
python3 ./tools/dependency_summary.py --all-android-targets --package lockbox --check megazords/lockbox/DEPENDENCIES.md
sync-bugzilla-to-github:
steps:
- checkout
- run:
name: Mirror Bugzilla issues into GitHub
command: |
sudo pip3 install --require-hashes -r ./tools/requirements.txt
python3 ./tools/sync_bugzilla_to_github.py
sync-tests:
steps:
- test-setup
@ -250,6 +258,11 @@ jobs:
- install-rust
- setup-rust-toolchain
- dependency-checks
Mirror Bugzilla issues into GitHub:
docker:
- image: circleci/python:latest
steps:
- sync-bugzilla-to-github
Rust tests - stable:
docker:
- image: circleci/rust:latest
@ -432,6 +445,16 @@ workflows:
branches:
only:
- master
mirror-bugzilla-to-github-periodically:
jobs:
- Mirror Bugzilla issues into GitHub
triggers:
- schedule:
cron: "0 * * * *"
filters:
branches:
only:
- master
run-tests:
jobs:
- Rust tests - stable

9
tools/requirements.txt Normal file
Просмотреть файл

@ -0,0 +1,9 @@
certifi==2019.6.16 --hash=sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939
chardet==3.0.4 --hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691
Deprecated==1.2.6 --hash=sha256:b07b414c8aac88f60c1d837d21def7e83ba711052e03b3cbaff27972567a8f8d
idna==2.8 --hash=sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c
PyGithub==1.43.8 --hash=sha256:db415a5aeb5ab1e4a3263b1a091b4f9ffbd85a12a06a0303d5bf083ce7c1b2c8
PyJWT==1.7.1 --hash=sha256:5c6eca3c2940464d106b99ba83b00c6add741c9becaec087fb7ccdefea71350e
requests==2.22.0 --hash=sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31
urllib3==1.25.3 --hash=sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1
wrapt==1.11.2 --hash=sha256:565a021fd19419476b9362b05eeaa094178de64f8361e44468f9e9d7843901e1

Просмотреть файл

@ -0,0 +1,315 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/. */
# A script to mirror bugilla issues into github, where we can see
# them as part of our github-and-jira-based planning procedure.
#
# For every relevant bug we find in bugzilla, we create a corresponding
# github issue that:
#
# * has matching summary and description text
# * has the "bugzilla" label
# * may have additional metadata in the issue description
#
# If such an issue already exists, we update it to match the bugzilla
# bug rather than creating a new one.
#
# Note that the mirroring is (for now) entirely one-way. Changes to bug summar,
# description of status in bugzilla will be pushed to github, but any changes
# in github will not be reflected back in bugzilla.
import re
import os
import urllib.parse
import requests
from github import Github
GH_REPO = 'mozilla/application-services'
GH_LABEL = 'bugzilla'
BZ_URL = 'https://bugzilla.mozilla.org/rest'
SYNCED_ISSUE_TEXT = '\n\n---\n\N{LADY BEETLE} Issue is synchronized with Bugzilla [Bug {id}](https://bugzilla.mozilla.org/show_bug.cgi?id={id})\n'
SYNCED_ISSUE_BUGID_REGEX = re.compile(
# N.B. we can't use a r'raw string' literal here because of the \N escape.
'\N{LADY BEETLE} Issue is synchronized with Bugzilla \\[Bug (\\d+)\\]')
SEE_ALSO_ISSUE_REGEX_TEMPLATE = r'^https://github.com/{}/issues/\d+$'
# Jira adds some metadata to issue descriptions, indicated by this separator.
# We want to preserve any lines like this from the github issue description.
JIRA_ISSUE_MARKER = '\N{BOX DRAWINGS LIGHT TRIPLE DASH VERTICAL}'
# For now, only look at recent bugs in order to preserve sanity.
LAST_CHANGE_TIME = '20190801'
def log(msg, *args, **kwds):
msg = str(msg)
print(msg.format(*args, **kwds))
def get_json(url):
"""Fetch a URL and return the result parsed as JSON."""
r = requests.get(url)
r.raise_for_status()
return r.json()
class BugSet(object):
"""A set of bugzilla bugs, which we might like to mirror into GitHub.
This class knows how to query the bugzilla API to find bugs, and how to
fetch appropriate metadata for mirroring into github.
Importantly, it knows how to use a bugzilla API key to find confidential
or security-sensitive bugs, and can report their existence without leaking
potentially confidential details (e.g. by reporting a placeholder summary
of "confidential issue" rather than the actual bug summary).
Use `update_from_bugzilla()` to query for bugs and add them to an in-memory
store, then access them as if this were a dict keyed by bug number. Each bug
will be a dict with the following fields:
* `id`: The bug id, as an integer
* `whiteboard`: The bug's whiteboard text, as a string
* `is_open`: Whether the bug is open, as a boolean
* `summary`: The one-line bug summry, as a string
* `status`: The bug's status field, as a string
* `comment0`: The bug's first comment, which is typically a longer description, as a string
"""
def __init__(self, api_key=None):
self.api_key = api_key
self.bugs = {}
def __getitem__(self, key):
return self.bugs[str(key)]
def __iter__(self):
return iter(self.bugs)
def __len__(self):
return len(self.bugs)
def update_from_bugzilla(self, **kwds):
"""Slurp in bugs from bugzilla that match the given query keywords."""
# First, fetch a minimal set of "safe" metadata that we're happy to put in
# a public github issue, even for confidential bugzilla bugs.
# This is the only query that's allowed to use a BZ API token to access
# confidential bug info.
url = BZ_URL + '/bug?include_fields=id,is_open,see_also'
url += '&' + self._make_query_string(**kwds)
if self.api_key is not None:
url += '&api_key=' + self.api_key
found_bugs = set()
for bug in get_json(url)['bugs']:
bugid = str(bug['id'])
found_bugs.add(bugid)
if bugid not in self.bugs:
self.bugs[bugid] = bug
else:
self.bugs[bugid].update(bug)
# Now make *unauthenticated* public API queries to fetch additional metadata
# which we know is safe to make public. Any security-sensitive bugs will be
# silently omitted from this query.
if found_bugs:
public_bugs = set()
url = BZ_URL + '/bug?include_fields=id,is_open,see_also,summary,status'
url += '&id=' + '&id='.join(found_bugs)
for bug in get_json(url)['bugs']:
bugid = str(bug['id'])
public_bugs.add(bugid)
self.bugs[bugid].update(bug)
# Unlike with fetching bug metadata, trying to fetch comments for a confidential bug
# will error the entire request rather than silently omitting it. So we have to filter
# them out during the loop above. Note that the resulting URL is a bit weird, it's:
#
# /bug/<bug1>/comment?ids=bug2,bug3...
#
# This allows us to fetch comments from multiple bugs in a single query.
if public_bugs:
url = BZ_URL + '/bug/' + public_bugs.pop() + '/comment'
if public_bugs:
url += '?ids=' + '&ids='.join(public_bugs)
for bugnum, bug in get_json(url)['bugs'].items():
bugid = str(bugnum)
self.bugs[bugid]['comment0'] = bug['comments'][0]['text']
def _make_query_string(self, product=None, component=None, id=None, resolved=None, last_change_time=None):
def listify(x): return x if isinstance(x, (list, tuple, set)) else (x,)
def encode(x): return urllib.parse.quote(x, safe='')
qs = []
if product is not None:
qs.extend('product=' + encode(p) for p in listify(product))
if component is not None:
qs.extend('component=' + encode(c) for c in listify(component))
if id is not None:
qs.extend('id=' + encode(i) for i in listify(id))
if last_change_time is not None:
qs.append('last_change_time=' + last_change_time)
if resolved is not None:
if resolved:
raise ValueError(
"Sorry, I haven't looked up how to query for only resolved bugs...")
else:
qs.append('resolution=---')
if len(qs) == 0:
raise ValueError(
"Cowardly refusing to query every bug in existence; please specify some filters")
return '&'.join(qs)
class MirrorIssueSet(object):
"""A set of mirror issues from GitHub, which can be synced to bugzilla bugs.
Given a `BugSet` containing the bugs that you want to appear in github, use
like so:
issues = MirrorIssueSet(GITHUB_TOKEN)
issues.sync_from_bugset(bugs)
This will ensure that every bug in the bugset has a corresponding mirror issue,
creating or updating issues as appropriate. It will also close out any miror issues
that do not appear in the bugset, on the assumption that they've been closed in
bugzilla.
"""
def __init__(self, repo, label, api_key=None):
self._gh = Github(api_key)
self._repo = self._gh.get_repo(repo)
self._label = self._repo.get_label(label)
self._see_also_regex = re.compile(
SEE_ALSO_ISSUE_REGEX_TEMPLATE.format(repo))
# The mirror issues, indexes by bugzilla bugid.
self.mirror_issues = {}
def sync_from_bugset(self, bugs):
"""Sync the mirrored issues with the given BugSet (which might be modified in-place)."""
self.update_from_github()
log('Found {} mirror issues in github', len(self.mirror_issues))
# Fetch details for any mirror issues that are not in the set.
# They might be e.g. closed, or have been moved to a different component,
# but we still want to update them in github.
missing_bugs = [
bugid for bugid in self.mirror_issues if bugid not in bugs]
if missing_bugs:
log('Fetching info for {} missing bugs from bugzilla', len(missing_bugs))
bugs.update_from_bugzilla(id=missing_bugs)
num_updated = 0
for bugid in bugs:
if self.sync_issue_from_bug_info(bugid, bugs[bugid]):
num_updated += 1
if num_updated > 0:
log('Updated {} issues from bugzilla to github', num_updated)
else:
log('Looks like everything is up-to-date')
def update_from_github(self):
"""Find mirror issues in the github repo.
We assume they have a special label for easy searching, and some text in the issue
description that tells us what bug it links to.
"""
for issue in self._repo.get_issues(state='open', labels=[self._label]):
match = SYNCED_ISSUE_BUGID_REGEX.search(issue.body)
if not match:
log("WARNING: Mirror issue #{} does not have a bugzilla bugid", issue.number)
continue
bugid = match.group(1)
if bugid in self.mirror_issues:
log("WARNING: Duplicate mirror issue #{} for Bug {}",
issue.number, bugid)
continue
self.mirror_issues[bugid] = issue
def sync_issue_from_bug_info(self, bugid, bug_info):
issue = self.mirror_issues.get(bugid, None)
issue_info = self._format_issue_info(bug_info, issue)
if issue is None:
if bug_info['is_open']:
# As a light hack, if the bugzilla bug has a "see also" link to an issue in our repo,
# we assume that's an existing mirror issue and avoid creating a new one. This lets us
# keep the bug open in bugzilla but close it in github without constantly creating new
# mirror issues.
for see_also in bug_info.get('see_also', ()):
if self._see_also_regex.match(see_also) is not None:
log('Ignoring bz{id}, which links to {} via see-also',
see_also, **bug_info)
break
else:
issue_info.pop('state')
log('Creating mirror issue for bz{id}', **bug_info)
issue = self.mirror_issues[bugid] = self._repo.create_issue(**issue_info)
return True
else:
changed_fields = [
field for field in issue_info if issue_info[field] != getattr(issue, field)]
if changed_fields:
# Note that this will close issues that have not open in bugzilla.
log('Updating mirror issue #{} for bz{id} (changed: {})',
issue.number, changed_fields, **bug_info)
# Weird API thing where `issue.edit` accepts strings rather than label refs...
issue_info['labels'] = [l.name for l in issue_info['labels']]
issue.edit(**issue_info)
return True
# else:
# # Uncomment me for helpful output during debugging.
# log('No change for issue #{}', issue.number)
return False
def _format_issue_info(self, bug_info, issue):
issue_info = {
'state': 'open' if bug_info['is_open'] else 'closed'
}
if 'summary' in bug_info:
issue_info['title'] = bug_info['summary']
else:
issue_info['title'] = 'Confidential Bugzilla issue'
if 'comment0' in bug_info:
issue_info['body'] = bug_info['comment0']
else:
issue_info['body'] = 'No description is available for this confidential bugzilla issue.'
if issue is None:
issue_info['labels'] = [self._label]
else:
issue_info['labels'] = issue.labels
if self._label not in issue.labels:
issue_info['labels'].append(self._label)
# Ensure we include a link to the bugzilla bug for reference.
issue_info['body'] += SYNCED_ISSUE_TEXT.format(**bug_info)
# Preserve any Jira sync lines in the issue body.
if issue is not None:
for ln in issue.body.split("\n"):
if ln.startswith(JIRA_ISSUE_MARKER):
issue_info['body'] += '\n' + ln
# Jira seems to sometimes add a trailing newline, try to match it to avoid spurious updates.
if issue.body.endswith('\n') and not issue_info['body'].endswith('\n'):
issue_info['body'] += '\n'
return issue_info
def sync_bugzilla_to_github():
# Find the sets of bugs in both places, to intersect them.
log('Finding relevant bugs in bugzilla...')
bugs = BugSet(os.environ.get('BZ_API_KEY'))
bugs.update_from_bugzilla(product='Firefox', component='Firefox Accounts',
resolved=False, last_change_time=LAST_CHANGE_TIME)
bugs.update_from_bugzilla(product='Firefox', component='Sync',
resolved=False, last_change_time=LAST_CHANGE_TIME)
log('Found {} bugzilla bugs', len(bugs))
log('Syncing to github')
issues = MirrorIssueSet(
GH_REPO, GH_LABEL, os.environ.get('GITHUB_TOKEN'))
issues.sync_from_bugset(bugs)
log('Done!')
if __name__ == "__main__":
sync_bugzilla_to_github()