зеркало из https://github.com/mozilla/bugbug.git
Download data from Bugzilla without using the search API.
At least until https://bugzilla.mozilla.org/show_bug.cgi?id=1508695 is fixed.
This commit is contained in:
Родитель
2d555e2fa4
Коммит
368c7cb3d8
|
@ -6,6 +6,8 @@ install:
|
||||||
- pip install -r test-requirements.txt
|
- pip install -r test-requirements.txt
|
||||||
script:
|
script:
|
||||||
- flake8
|
- flake8
|
||||||
|
- python run.py --download --train --goal bug
|
||||||
|
- python run.py --goal bug
|
||||||
- python -m pytest tests/test_*.py
|
- python -m pytest tests/test_*.py
|
||||||
- python setup.py sdist
|
- python setup.py sdist
|
||||||
- pip install dist/bugbug-$(cat VERSION).tar.gz
|
- pip install dist/bugbug-$(cat VERSION).tar.gz
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
import itertools
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
@ -86,7 +85,7 @@ def _download(ids_or_query):
|
||||||
|
|
||||||
|
|
||||||
def download_bugs_between(date_from, date_to, security=False):
|
def download_bugs_between(date_from, date_to, security=False):
|
||||||
products = [
|
products = set([
|
||||||
'Add-on SDK',
|
'Add-on SDK',
|
||||||
'Android Background Services',
|
'Android Background Services',
|
||||||
'Core',
|
'Core',
|
||||||
|
@ -101,41 +100,22 @@ def download_bugs_between(date_from, date_to, security=False):
|
||||||
'NSPR',
|
'NSPR',
|
||||||
'NSS',
|
'NSS',
|
||||||
'Toolkit',
|
'Toolkit',
|
||||||
]
|
])
|
||||||
|
|
||||||
query = {
|
r = requests.get('https://bugzilla.mozilla.org/rest/bug?include_fields=id&f1=creation_ts&o1=greaterthan&v1={}&limit=1&order=bug_id'.format(date_from.strftime('%Y-%m-%d')))
|
||||||
'limit': 500,
|
first_id = r.json()['bugs'][0]['id']
|
||||||
'order': 'bug_id',
|
|
||||||
'product': products,
|
|
||||||
'f1': 'bug_id', 'o1': 'greaterthan', 'v1': '',
|
|
||||||
'f2': 'creation_ts', 'o2': 'greaterthan', 'v2': date_from.strftime('%Y-%m-%d'),
|
|
||||||
'f3': 'creation_ts', 'o3': 'lessthan', 'v3': date_to.strftime('%Y-%m-%d'),
|
|
||||||
'f4': 'cf_last_resolved', 'o4': 'lessthan', 'v4': date_to.strftime('%Y-%m-%d'),
|
|
||||||
}
|
|
||||||
|
|
||||||
if not security:
|
r = requests.get('https://bugzilla.mozilla.org/rest/bug?include_fields=id&f1=creation_ts&o1=lessthan&v1={}&limit=1&order=bug_id%20desc'.format(date_to.strftime('%Y-%m-%d')))
|
||||||
query['f5'] = 'bug_group'
|
last_id = r.json()['bugs'][0]['id']
|
||||||
query['o5'] = 'isempty'
|
|
||||||
|
|
||||||
last_id = 0
|
assert first_id < last_id
|
||||||
total_downloaded = 0
|
|
||||||
while True:
|
|
||||||
query['v1'] = last_id
|
|
||||||
bugs = _download(query)
|
|
||||||
|
|
||||||
last_id = max([last_id] + [bug for bug in bugs.keys()])
|
all_ids = range(first_id, last_id + 1)
|
||||||
|
|
||||||
total_downloaded += len(bugs)
|
download_bugs(all_ids, security=security, products=products)
|
||||||
|
|
||||||
print('Downloaded {} bugs, up to ID {}'.format(total_downloaded, last_id))
|
|
||||||
|
|
||||||
db.append(BUGS_DB, bugs.values())
|
|
||||||
|
|
||||||
if len(bugs) < 500:
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
def download_bugs(bug_ids, security=False):
|
def download_bugs(bug_ids, products=None, security=False):
|
||||||
old_bug_count = 0
|
old_bug_count = 0
|
||||||
old_bugs = []
|
old_bugs = []
|
||||||
new_bug_ids = set([int(bug_id) for bug_id in bug_ids])
|
new_bug_ids = set([int(bug_id) for bug_id in bug_ids])
|
||||||
|
@ -147,16 +127,27 @@ def download_bugs(bug_ids, security=False):
|
||||||
|
|
||||||
print('Loaded {} bugs.'.format(old_bug_count))
|
print('Loaded {} bugs.'.format(old_bug_count))
|
||||||
|
|
||||||
|
yield from old_bugs
|
||||||
|
|
||||||
print('To download {} bugs.'.format(len(new_bug_ids)))
|
print('To download {} bugs.'.format(len(new_bug_ids)))
|
||||||
|
|
||||||
new_bugs = _download(new_bug_ids)
|
new_bug_ids = sorted(list(new_bug_ids))
|
||||||
|
|
||||||
if not security:
|
total_downloaded = 0
|
||||||
new_bugs = {bug_id: bug for bug_id, bug in new_bugs.items() if len(bug['groups']) == 0}
|
chunks = (new_bug_ids[i:(i + 500)] for i in range(0, len(new_bug_ids), 500))
|
||||||
|
for chunk in chunks:
|
||||||
|
new_bugs = _download(chunk)
|
||||||
|
|
||||||
print('Total number of bugs: {}'.format(old_bug_count + len(new_bugs)))
|
total_downloaded += len(new_bugs)
|
||||||
|
|
||||||
|
print('Downloaded {} bugs'.format(total_downloaded))
|
||||||
|
|
||||||
|
if not security:
|
||||||
|
new_bugs = {bug_id: bug for bug_id, bug in new_bugs.items() if len(bug['groups']) == 0}
|
||||||
|
|
||||||
|
if products is not None:
|
||||||
|
new_bugs = {bug_id: bug for bug_id, bug in new_bugs.items() if bug['product'] in products}
|
||||||
|
|
||||||
if len(new_bugs):
|
|
||||||
db.append(BUGS_DB, new_bugs.values())
|
db.append(BUGS_DB, new_bugs.values())
|
||||||
|
|
||||||
return itertools.chain(old_bugs, new_bugs.items())
|
yield from new_bugs.items()
|
||||||
|
|
13
bugbug/db.py
13
bugbug/db.py
|
@ -21,10 +21,11 @@ def register(path, url):
|
||||||
os.makedirs(parent_dir, exist_ok=True)
|
os.makedirs(parent_dir, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def read(path):
|
def download():
|
||||||
assert path in DATABASES
|
for path, url in DATABASES.items():
|
||||||
|
if os.path.exists(path):
|
||||||
|
continue
|
||||||
|
|
||||||
if not os.path.exists(path):
|
|
||||||
# Download and extract database.
|
# Download and extract database.
|
||||||
|
|
||||||
xz_path = '{}.xz'.format(path)
|
xz_path = '{}.xz'.format(path)
|
||||||
|
@ -37,8 +38,12 @@ def read(path):
|
||||||
with lzma.open(xz_path) as input_f:
|
with lzma.open(xz_path) as input_f:
|
||||||
shutil.copyfileobj(input_f, output_f)
|
shutil.copyfileobj(input_f, output_f)
|
||||||
|
|
||||||
|
|
||||||
|
def read(path):
|
||||||
|
assert path in DATABASES
|
||||||
|
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
raise Exception('Database {} does not exist.'.format(path))
|
return ()
|
||||||
|
|
||||||
with open(path, 'r') as f:
|
with open(path, 'r') as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
|
|
3
run.py
3
run.py
|
@ -6,7 +6,9 @@
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from bugbug import bugzilla
|
from bugbug import bugzilla
|
||||||
|
from bugbug import db
|
||||||
from bugbug import labels
|
from bugbug import labels
|
||||||
|
from bugbug import repository # noqa
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
@ -17,6 +19,7 @@ if __name__ == '__main__':
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.download:
|
if args.download:
|
||||||
|
db.download()
|
||||||
bug_ids = labels.get_all_bug_ids()
|
bug_ids = labels.get_all_bug_ids()
|
||||||
bugzilla.download_bugs(bug_ids)
|
bugzilla.download_bugs(bug_ids)
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче